diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/URIishTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/URIishTest.java index d797dd435..b5a753bbb 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/URIishTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/transport/URIishTest.java @@ -88,8 +88,10 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals(str, u.getRawPath()); assertEquals(str, u.getPath()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -99,8 +101,10 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals(str, u.getRawPath()); assertEquals(str, u.getPath()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -110,8 +114,10 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals("D:\\m y", u.getRawPath()); assertEquals("D:\\m y", u.getPath()); assertEquals("D:\\m y", u.toString()); + assertEquals("D:\\m y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -121,8 +127,10 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals(str, u.getRawPath()); assertEquals(str, u.getPath()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -132,8 +140,10 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals("\\\\some\\place", u.getRawPath()); assertEquals("\\\\some\\place", u.getPath()); assertEquals("\\\\some\\place", u.toString()); + assertEquals("\\\\some\\place", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -143,8 +153,49 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("file", u.getScheme()); assertFalse(u.isRemote()); + assertEquals("/home/m y", u.getRawPath()); assertEquals("/home/m y", u.getPath()); - assertEquals(str, u.toString()); + assertEquals("file:///home/m y", u.toString()); + assertEquals("file:///home/m%20y", u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testURIEncode_00() throws Exception { + final String str = "file:///home/m%00y"; + URIish u = new URIish(str); + assertEquals("file", u.getScheme()); + assertFalse(u.isRemote()); + assertEquals("/home/m%00y", u.getRawPath()); + assertEquals("/home/m\u0000y", u.getPath()); + assertEquals("file:///home/m%00y", u.toString()); + assertEquals("file:///home/m%00y", u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testURIEncode_0a() throws Exception { + final String str = "file:///home/m%0ay"; + URIish u = new URIish(str); + assertEquals("file", u.getScheme()); + assertFalse(u.isRemote()); + assertEquals("/home/m%0ay", u.getRawPath()); + assertEquals("/home/m\ny", u.getPath()); + assertEquals("file:///home/m%0ay", u.toString()); + assertEquals("file:///home/m%0ay", u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testURIEncode_unicode() throws Exception { + final String str = "file:///home/m%c3%a5y"; + URIish u = new URIish(str); + assertEquals("file", u.getScheme()); + assertFalse(u.isRemote()); + assertEquals("/home/m%c3%a5y", u.getRawPath()); + assertEquals("/home/m\u00e5y", u.getPath()); + assertEquals("file:///home/m%c3%a5y", u.toString()); + assertEquals("file:///home/m%c3%a5y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -154,8 +205,10 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("file", u.getScheme()); assertFalse(u.isRemote()); + assertEquals("D:/m y", u.getRawPath()); assertEquals("D:/m y", u.getPath()); - assertEquals(str, u.toString()); + assertEquals("file:///D:/m y", u.toString()); + assertEquals("file:///D:/m%20y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -166,8 +219,10 @@ public class URIishTest { assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); assertEquals("example.com", u.getHost()); + assertEquals("/home/m y", u.getRawPath()); assertEquals("/home/m y", u.getPath()); - assertEquals(str, u.toString()); + assertEquals("git://example.com/home/m y", u.toString()); + assertEquals("git://example.com/home/m%20y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -178,9 +233,11 @@ public class URIishTest { assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); assertEquals("example.com", u.getHost()); + assertEquals("/home/m y", u.getRawPath()); assertEquals("/home/m y", u.getPath()); assertEquals(333, u.getPort()); - assertEquals(str, u.toString()); + assertEquals("git://example.com:333/home/m y", u.toString()); + assertEquals("git://example.com:333/home/m%20y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -190,10 +247,12 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("D:/m y", u.getRawPath()); assertEquals("D:/m y", u.getPath()); assertEquals(338, u.getPort()); assertEquals("example.com", u.getHost()); - assertEquals(str, u.toString()); + assertEquals("git://example.com:338/D:/m y", u.toString()); + assertEquals("git://example.com:338/D:/m%20y", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -203,10 +262,27 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("D:/m y", u.getRawPath()); assertEquals("D:/m y", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); + assertEquals("git://example.com/D:/m y", u.toString()); + assertEquals("git://example.com/D:/m%20y", u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testScpStyleNoURIDecoding() throws Exception { + final String str = "example.com:some/p%20ath"; + URIish u = new URIish(str); + assertNull(u.getScheme()); + assertTrue(u.isRemote()); + assertEquals("some/p%20ath", u.getRawPath()); + assertEquals("some/p%20ath", u.getPath()); + assertEquals("example.com", u.getHost()); + assertEquals(-1, u.getPort()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -216,10 +292,12 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertTrue(u.isRemote()); + assertEquals("some/p ath", u.getRawPath()); assertEquals("some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -229,10 +307,12 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -242,11 +322,13 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertTrue(u.isRemote()); + assertEquals("some/p ath", u.getRawPath()); assertEquals("some/p ath", u.getPath()); assertEquals("user", u.getUser()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); assertEquals(str, u.toString()); + assertEquals(str, u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -256,10 +338,12 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("git+ssh", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); - assertEquals(str, u.toString()); + assertEquals("git+ssh://example.com/some/p ath", u.toString()); + assertEquals("git+ssh://example.com/some/p%20ath", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -269,10 +353,12 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("ssh+git", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); - assertEquals(str, u.toString()); + assertEquals("ssh+git://example.com/some/p ath", u.toString()); + assertEquals("ssh+git://example.com/some/p%20ath", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -282,10 +368,12 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("ssh", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals(-1, u.getPort()); - assertEquals(str, u.toString()); + assertEquals("ssh://example.com/some/p ath", u.toString()); + assertEquals("ssh://example.com/some/p%20ath", u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -295,12 +383,15 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("ssh", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals("user", u.getUser()); assertNull(u.getPass()); assertEquals(33, u.getPort()); - assertEquals(str, u.toString()); + assertEquals("ssh://user@example.com:33/some/p ath", u.toString()); + assertEquals("ssh://user@example.com:33/some/p%20ath", + u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -310,13 +401,18 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("ssh", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals("user", u.getUser()); assertEquals("pass", u.getPass()); assertEquals(33, u.getPort()); - assertEquals(str, u.toPrivateString()); + assertEquals("ssh://user:pass@example.com:33/some/p ath", + u.toPrivateString()); + assertEquals("ssh://user:pass@example.com:33/some/p%20ath", + u.toPrivateASCIIString()); assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -326,13 +422,62 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("ssh", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); assertEquals("/some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertEquals("DOMAIN\\user", u.getUser()); assertEquals("pass", u.getPass()); assertEquals(33, u.getPort()); - assertEquals(str, u.toPrivateString()); + assertEquals("ssh://DOMAIN\\user:pass@example.com:33/some/p ath", + u.toPrivateString()); + assertEquals("ssh://DOMAIN\\user:pass@example.com:33/some/p%20ath", + u.toPrivateASCIIString()); + assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testSshProtoWithEscapedADUserPassAndPort() throws Exception { + final String str = "ssh://DOMAIN%5c\u00fcser:pass@example.com:33/some/p ath"; + URIish u = new URIish(str); + assertEquals("ssh", u.getScheme()); + assertTrue(u.isRemote()); + assertEquals("/some/p ath", u.getRawPath()); + assertEquals("/some/p ath", u.getPath()); + assertEquals("example.com", u.getHost()); + assertEquals("DOMAIN\\\u00fcser", u.getUser()); + assertEquals("pass", u.getPass()); + assertEquals(33, u.getPort()); + assertEquals("ssh://DOMAIN\\\u00fcser:pass@example.com:33/some/p ath", + u.toPrivateString()); + assertEquals( + "ssh://DOMAIN\\%c3%bcser:pass@example.com:33/some/p%20ath", + u.toPrivateASCIIString()); + assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + + @Test + public void testURIEncodeDecode() throws Exception { + final String str = "ssh://%3ax%25:%40%41x@example.com:33/some%c3%a5/p%20a th"; + URIish u = new URIish(str); + assertEquals("ssh", u.getScheme()); + assertTrue(u.isRemote()); + assertEquals("/some%c3%a5/p%20a th", u.getRawPath()); + assertEquals("/some\u00e5/p a th", u.getPath()); + assertEquals("example.com", u.getHost()); + assertEquals(":x%", u.getUser()); + assertEquals("@Ax", u.getPass()); + assertEquals(33, u.getPort()); + assertEquals("ssh://%3ax%25:%40Ax@example.com:33/some%c3%a5/p%20a th", + u.toPrivateString()); + assertEquals( + "ssh://%3ax%25:%40Ax@example.com:33/some%c3%a5/p%20a%20th", + u.toPrivateASCIIString()); assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -342,13 +487,17 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("~some/p ath", u.getRawPath()); assertEquals("~some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertNull(u.getUser()); assertNull(u.getPass()); assertEquals(-1, u.getPort()); - assertEquals(str, u.toPrivateString()); + assertEquals("git://example.com/~some/p ath", u.toPrivateString()); + assertEquals("git://example.com/~some/p%20ath", + u.toPrivateASCIIString()); assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -359,13 +508,16 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("git", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("~some/p ath", u.getRawPath()); assertEquals("~some/p ath", u.getPath()); assertEquals("example.com", u.getHost()); assertNull(u.getUser()); assertNull(u.getPass()); assertEquals(-1, u.getPort()); assertEquals(str, u.toPrivateString()); + assertEquals(str, u.toPrivateASCIIString()); assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -375,13 +527,16 @@ public class URIishTest { URIish u = new URIish(str); assertNull(u.getScheme()); assertFalse(u.isRemote()); + assertEquals("/~some/p ath", u.getRawPath()); assertEquals("/~some/p ath", u.getPath()); assertNull(u.getHost()); assertNull(u.getUser()); assertNull(u.getPass()); assertEquals(-1, u.getPort()); assertEquals(str, u.toPrivateString()); + assertEquals(str, u.toPrivateASCIIString()); assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); assertEquals(u, new URIish(str)); } @@ -415,6 +570,33 @@ public class URIishTest { } } + @Test + public void testGetSet() throws Exception { + final String str = "ssh://DOMAIN\\user:pass@example.com:33/some/p ath%20"; + URIish u = new URIish(str); + u = u.setHost(u.getHost()); + u = u.setPass(u.getPass()); + u = u.setPort(u.getPort()); + assertEquals("ssh", u.getScheme()); + assertTrue(u.isRemote()); + u = u.setRawPath(u.getRawPath()); + assertEquals("/some/p ath%20", u.getRawPath()); + u = u.setPath(u.getPath()); + assertEquals("/some/p ath ", u.getRawPath()); + assertEquals("/some/p ath ", u.getPath()); + assertEquals("example.com", u.getHost()); + assertEquals("DOMAIN\\user", u.getUser()); + assertEquals("pass", u.getPass()); + assertEquals(33, u.getPort()); + assertEquals("ssh://DOMAIN\\user:pass@example.com:33/some/p ath ", + u.toPrivateString()); + assertEquals("ssh://DOMAIN\\user:pass@example.com:33/some/p%20ath%20", + u.toPrivateASCIIString()); + assertEquals(u.setPass(null).toPrivateString(), u.toString()); + assertEquals(u.setPass(null).toPrivateASCIIString(), u.toASCIIString()); + assertEquals(u, new URIish(str)); + } + @Test public void testGetValidWithEmptySlashDotGitHumanishName() throws IllegalArgumentException, URISyntaxException { @@ -537,6 +719,7 @@ public class URIishTest { URIish u = new URIish(str); assertEquals("http", u.getScheme()); assertTrue(u.isRemote()); + assertEquals("/some/path", u.getRawPath()); assertEquals("/some/path", u.getPath()); assertEquals("host.xy", u.getHost()); assertEquals(80, u.getPort()); @@ -556,6 +739,27 @@ public class URIishTest { assertEquals(u, new URIish(str)); } + /** + * Exemplify what happens with the special case of encoding '/' as %2F. Web + * services in general parse path components before decoding the characters. + * + * @throws URISyntaxException + */ + @Test + public void testPathSeparator() throws URISyntaxException { + String str = "http://user:secret@host.xy:80/some%2Fpath"; + URIish u = new URIish(str); + assertEquals("http", u.getScheme()); + assertTrue(u.isRemote()); + assertEquals("/some%2Fpath", u.getRawPath()); + assertEquals("/some/path", u.getPath()); + assertEquals("host.xy", u.getHost()); + assertEquals(80, u.getPort()); + assertEquals("user", u.getUser()); + assertEquals("secret", u.getPass()); + assertEquals(u, new URIish(str)); + } + @Test public void testFileProtocol() throws IllegalArgumentException, URISyntaxException, IOException { @@ -565,6 +769,7 @@ public class URIishTest { assertFalse(u.isRemote()); assertNull(u.getHost()); assertNull(u.getPass()); + assertEquals("/a/b.txt", u.getRawPath()); assertEquals("/a/b.txt", u.getPath()); assertEquals(-1, u.getPort()); assertNull(u.getUser()); @@ -586,6 +791,7 @@ public class URIishTest { assertFalse(u.isRemote()); assertNull(u.getHost()); assertNull(u.getPass()); + assertEquals("/a/b.txt", u.getRawPath()); assertEquals("/a/b.txt", u.getPath()); assertEquals(-1, u.getPort()); assertNull(u.getUser()); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/URIish.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/URIish.java index 8254c1f20..de18ece35 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/URIish.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/URIish.java @@ -46,22 +46,26 @@ package org.eclipse.jgit.transport; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.Serializable; +import java.io.UnsupportedEncodingException; import java.net.URISyntaxException; import java.net.URL; +import java.util.BitSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.util.RawParseUtils; import org.eclipse.jgit.util.StringUtils; /** * This URI like construct used for referencing Git archives over the net, as - * well as locally stored archives. The most important difference compared to - * RFC 2396 URI's is that no URI encoding/decoding ever takes place. A space or - * any special character is written as-is. + * well as locally stored archives. It is similar to RFC 2396 URI's, but also + * support SCP and the malformed file:// syntax (as opposed to the correct + * file: syntax. */ public class URIish implements Serializable { /** @@ -179,6 +183,8 @@ public class URIish implements Serializable { private String path; + private String rawPath; + private String user; private String pass; @@ -201,19 +207,21 @@ public class URIish implements Serializable { Matcher matcher = SINGLE_SLASH_FILE_URI.matcher(s); if (matcher.matches()) { scheme = matcher.group(1); - path = cleanLeadingSlashes(matcher.group(2), scheme); + rawPath = cleanLeadingSlashes(matcher.group(2), scheme); + path = unescape(rawPath); return; } matcher = FULL_URI.matcher(s); if (matcher.matches()) { scheme = matcher.group(1); - user = matcher.group(2); - pass = matcher.group(3); - host = matcher.group(4); + user = unescape(matcher.group(2)); + pass = unescape(matcher.group(3)); + host = unescape(matcher.group(4)); if (matcher.group(5) != null) port = Integer.parseInt(matcher.group(5)); - path = cleanLeadingSlashes( + rawPath = cleanLeadingSlashes( n2e(matcher.group(6)) + n2e(matcher.group(7)), scheme); + path = unescape(rawPath); return; } matcher = RELATIVE_SCP_URI.matcher(s); @@ -221,7 +229,8 @@ public class URIish implements Serializable { user = matcher.group(1); pass = matcher.group(2); host = matcher.group(3); - path = matcher.group(4); + rawPath = matcher.group(4); + path = rawPath; return; } matcher = ABSOLUTE_SCP_URI.matcher(s); @@ -229,17 +238,95 @@ public class URIish implements Serializable { user = matcher.group(1); pass = matcher.group(2); host = matcher.group(3); - path = matcher.group(4); + rawPath = matcher.group(4); + path = rawPath; return; } matcher = LOCAL_FILE.matcher(s); if (matcher.matches()) { - path = matcher.group(1); + rawPath = matcher.group(1); + path = rawPath; return; } throw new URISyntaxException(s, JGitText.get().cannotParseGitURIish); } + private static String unescape(String s) throws URISyntaxException { + if (s == null) + return null; + if (s.indexOf('%') < 0) + return s; + + byte[] bytes; + try { + bytes = s.getBytes(Constants.CHARACTER_ENCODING); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // can't happen + } + + byte[] os = new byte[bytes.length]; + int j = 0; + for (int i = 0; i < bytes.length; ++i) { + byte c = bytes[i]; + if (c == '%') { + if (i + 2 >= bytes.length) + throw new URISyntaxException(s, JGitText.get().cannotParseGitURIish); + int val = (RawParseUtils.parseHexInt4(bytes[i + 1]) << 4) + | RawParseUtils.parseHexInt4(bytes[i + 2]); + os[j++] = (byte) val; + i += 2; + } else + os[j++] = c; + } + return RawParseUtils.decode(os, 0, j); + } + + private static final BitSet reservedChars = new BitSet(127); + + static { + for (byte b : Constants.encodeASCII("!*'();:@&=+$,/?#[]")) + reservedChars.set(b); + } + + /** + * Escape unprintable characters optionally URI-reserved characters + * + * @param s + * The Java String to encode (may contain any character) + * @param escapeReservedChars + * true to escape URI reserved characters + * @param encodeNonAscii + * encode any non-ASCII characters + * @return a URI-encoded string + */ + private static String escape(String s, boolean escapeReservedChars, + boolean encodeNonAscii) { + if (s == null) + return null; + ByteArrayOutputStream os = new ByteArrayOutputStream(s.length()); + byte[] bytes; + try { + bytes = s.getBytes(Constants.CHARACTER_ENCODING); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); // cannot happen + } + for (int i = 0; i < bytes.length; ++i) { + int b = bytes[i] & 0xFF; + if (b <= 32 || (encodeNonAscii && b > 127) || b == '%' + || (escapeReservedChars && reservedChars.get(b))) { + os.write('%'); + byte[] tmp = Constants.encodeASCII(String.format("%02x", + Integer.valueOf(b))); + os.write(tmp[0]); + os.write(tmp[1]); + } else { + os.write(b); + } + } + byte[] buf = os.toByteArray(); + return RawParseUtils.decode(buf, 0, buf.length); + } + private String n2e(String s) { if (s == null) return ""; @@ -272,6 +359,11 @@ public class URIish implements Serializable { public URIish(final URL u) { scheme = u.getProtocol(); path = u.getPath(); + try { + rawPath = u.toURI().getRawPath(); + } catch (URISyntaxException e) { + throw new RuntimeException(e); // Impossible + } final String ui = u.getUserInfo(); if (ui != null) { @@ -291,6 +383,7 @@ public class URIish implements Serializable { private URIish(final URIish u) { this.scheme = u.scheme; + this.rawPath = u.rawPath; this.path = u.path; this.user = u.user; this.pass = u.pass; @@ -352,6 +445,13 @@ public class URIish implements Serializable { return path; } + /** + * @return path name component + */ + public String getRawPath() { + return rawPath; + } + /** * Return a new URI matching this one, but with a different path. * @@ -362,6 +462,22 @@ public class URIish implements Serializable { public URIish setPath(final String n) { final URIish r = new URIish(this); r.path = n; + r.rawPath = n; + return r; + } + + /** + * Return a new URI matching this one, but with a different (raw) path. + * + * @param n + * the new value for path. + * @return a new URI with the updated value. + * @throws URISyntaxException + */ + public URIish setRawPath(final String n) throws URISyntaxException { + final URIish r = new URIish(this); + r.path = unescape(n); + r.rawPath = n; return r; } @@ -475,14 +591,15 @@ public class URIish implements Serializable { * @return the URI, including its password field, if any. */ public String toPrivateString() { - return format(true); + return format(true, false, false); } public String toString() { - return format(false); + return format(false, false, false); } - private String format(final boolean includePassword) { + private String format(final boolean includePassword, boolean escape, + boolean escapeNonAscii) { final StringBuilder r = new StringBuilder(); if (getScheme() != null) { r.append(getScheme()); @@ -490,17 +607,17 @@ public class URIish implements Serializable { } if (getUser() != null) { - r.append(getUser()); + r.append(escape(getUser(), true, escapeNonAscii)); if (includePassword && getPass() != null) { r.append(':'); - r.append(getPass()); + r.append(escape(getPass(), true, escapeNonAscii)); } } if (getHost() != null) { if (getUser() != null) r.append('@'); - r.append(getHost()); + r.append(escape(getHost(), false, escapeNonAscii)); if (getScheme() != null && getPort() > 0) { r.append(':'); r.append(getPort()); @@ -513,12 +630,33 @@ public class URIish implements Serializable { r.append('/'); } else if (getHost() != null) r.append(':'); - r.append(getPath()); + if (getScheme() != null) + if (escapeNonAscii) + r.append(escape(getPath(), false, escapeNonAscii)); + else + r.append(getRawPath()); + else + r.append(getPath()); } return r.toString(); } + /** + * @return the URI as an ASCII string. Password is not included. + */ + public String toASCIIString() { + return format(false, true, true); + } + + /** + * @return the URI including password, formatted with only ASCII characters + * such that it will be valid for use over the network. + */ + public String toPrivateASCIIString() { + return format(true, true, true); + } + /** * Get the "humanish" part of the path. Some examples of a 'humanish' part * for a full path: