diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java index 93ea9a7ab..3b563b3af 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java @@ -64,6 +64,16 @@ public class RawTextTest { assertEquals(0, r.size()); } + @Test + public void testBinary() { + String input = "foo-a\nf\0o-b\n"; + byte[] data = Constants.encodeASCII(input); + final RawText a = new RawText(data); + assertEquals(a.content, data); + assertEquals(a.size(), 1); + assertEquals(a.getString(0, 1, false), input); + } + @Test public void testEquals() { final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n")); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java index 6efdce6d7..fe070c80a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java @@ -48,45 +48,45 @@ import static org.junit.Assert.assertNotNull; import java.io.UnsupportedEncodingException; +import org.eclipse.jgit.errors.BinaryBlobException; import org.junit.Test; public class RawParseUtils_LineMapTest { @Test - public void testEmpty() { + public void testEmpty() throws Exception { final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); assertNotNull(map); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map)); } @Test - public void testOneBlankLine() { + public void testOneBlankLine() throws Exception { final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map)); } @Test - public void testTwoLineFooBar() throws UnsupportedEncodingException { + public void testTwoLineFooBar() throws Exception { final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } @Test - public void testTwoLineNoLF() throws UnsupportedEncodingException { + public void testTwoLineNoLF() throws Exception { final byte[] buf = "foo\nbar".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } - @Test - public void testBinary() throws UnsupportedEncodingException { + @Test(expected = BinaryBlobException.class) + public void testBinary() throws Exception { final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1"); - final IntList map = RawParseUtils.lineMap(buf, 3, buf.length); - assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map)); + RawParseUtils.lineMap(buf, 3, buf.length); } @Test - public void testFourLineBlanks() throws UnsupportedEncodingException { + public void testFourLineBlanks() throws Exception { final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 4fae3e478..5f50145d6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -93,7 +93,29 @@ public class RawText extends Sequence { */ public RawText(final byte[] input) { content = input; - lines = RawParseUtils.lineMap(content, 0, content.length); + IntList map; + try { + map = RawParseUtils.lineMap(content, 0, content.length); + } catch (BinaryBlobException e) { + map = new IntList(3); + map.add(Integer.MIN_VALUE); + map.add(0); + map.add(content.length); + } + lines = map; + } + + /** + * Construct a new RawText if the line map is already known. + * + * @param data + * the blob data. + * @param lineMap + * Indices of line starts, with indexed by base-1 linenumber. + */ + private RawText(final byte[] data, final IntList lineMap) { + content = data; + lines = lineMap; } /** @@ -357,7 +379,8 @@ public class RawText extends Sequence { System.arraycopy(head, 0, data, 0, head.length); IO.readFully(stream, data, off, (int) (sz-off)); - return new RawText(data); + IntList lineMap = RawParseUtils.lineMap(data, 0, data.length); + return new RawText(data, lineMap); } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index ad138bbf1..0d6a8d961 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -63,6 +63,7 @@ import java.util.HashMap; import java.util.Map; import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; @@ -618,9 +619,6 @@ public final class RawParseUtils { *

* The last element (index map.size()-1) always contains * end. - *

- * If the data contains a '\0' anywhere, the whole region is considered binary - * and a LineMap corresponding to a single line is returned. *

* * @param buf @@ -631,10 +629,9 @@ public final class RawParseUtils { * @param end * 1 past the end of the content within buf. * @return a line map indexing the start position of each line. + * @throws BinaryBlobException if any '\0' is found. */ - public static final IntList lineMap(final byte[] buf, int ptr, int end) { - int start = ptr; - + public static final IntList lineMap(final byte[] buf, int ptr, int end) throws BinaryBlobException { // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. @@ -647,11 +644,7 @@ public final class RawParseUtils { } if (buf[ptr] == '\0') { - // binary data. - map = new IntList(3); - map.add(Integer.MIN_VALUE); - map.add(start); - break; + throw new BinaryBlobException(); } foundLF = (buf[ptr] == '\n');