From 08d2e0188c932d1c87f603c9c1435296a63910d2 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Wed, 18 Apr 2018 23:37:25 +0200 Subject: [PATCH] Introduce new RawText constructor and RawParseUtils.lineMapOrBinary This makes binary detection exact in ResolveMerger and DiffFormatter This has the same intention as Id4342a199628d9406bfa04af1b023c27a47d4014 but preserves backward compatibility of the signature of RawParseUtils.lineMap. Change-Id: Ia24a4e716592bab3363ae24e3a46315a7511154f Signed-off-by: Han-Wen Nienhuys Signed-off-by: Matthias Sohn --- .../org/eclipse/jgit/diff/RawTextTest.java | 10 +++++ .../jgit/util/RawParseUtils_LineMapTest.java | 18 +++++++- .../src/org/eclipse/jgit/diff/RawText.java | 21 ++++++++- .../org/eclipse/jgit/util/RawParseUtils.java | 45 ++++++++++++++++--- 4 files changed, 83 insertions(+), 11 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java index 69e40777c..8cf3eedfd 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java @@ -64,6 +64,16 @@ public class RawTextTest { assertEquals(0, r.size()); } + @Test + public void testBinary() { + String input = "foo-a\nf\0o-b\n"; + byte[] data = Constants.encodeASCII(input); + final RawText a = new RawText(data); + assertEquals(a.content, data); + assertEquals(a.size(), 1); + assertEquals(a.getString(0, 1, false), input); + } + @Test public void testEquals() { final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n")); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java index 024379866..7630c1118 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java @@ -47,18 +47,25 @@ import static java.nio.charset.StandardCharsets.ISO_8859_1; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertNotNull; +import org.eclipse.jgit.errors.BinaryBlobException; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; public class RawParseUtils_LineMapTest { + @Rule + public ExpectedException exception = ExpectedException.none(); + + @Test - public void testEmpty() { + public void testEmpty() throws Exception { final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); assertNotNull(map); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map)); } @Test - public void testOneBlankLine() { + public void testOneBlankLine() throws Exception { final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map)); } @@ -84,6 +91,13 @@ public class RawParseUtils_LineMapTest { assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map)); } + @Test + public void testLineMapOrBinary() throws Exception { + final byte[] buf = "xxxfoo\nb\0ar".getBytes(ISO_8859_1); + exception.expect(BinaryBlobException.class); + RawParseUtils.lineMapOrBinary(buf, 3, buf.length); + } + @Test public void testFourLineBlanks() { final byte[] buf = "foo\n\n\nbar\n".getBytes(ISO_8859_1); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index ec88ce4ff..27d089489 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -92,8 +92,25 @@ public class RawText extends Sequence { * through cached arrays is safe. */ public RawText(final byte[] input) { + this(input, RawParseUtils.lineMap(input, 0, input.length)); + } + + /** + * Create a new sequence from the existing content byte array, and the line + * map indicating line boundaries. + * + * @param input + * the content array. The array is never modified, so passing + * through cached arrays is safe. + * @param lineMap + * an array with the line starts of the input, in 1-based offset. + * The first and last entry should be {@link Integer#MIN_VALUE}, and the array end + * respectively. + * @since 5.0 + */ + public RawText(final byte[] input, IntList lineMap) { content = input; - lines = RawParseUtils.lineMap(content, 0, content.length); + lines = lineMap; } /** @@ -369,7 +386,7 @@ public class RawText extends Sequence { System.arraycopy(head, 0, data, 0, head.length); IO.readFully(stream, data, off, (int) (sz-off)); - return new RawText(data); + return new RawText(data, RawParseUtils.lineMapOrBinary(data, 0, (int) sz)); } } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index 014c7727a..66f7613e2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -63,6 +63,7 @@ import java.util.HashMap; import java.util.Map; import org.eclipse.jgit.annotations.Nullable; +import org.eclipse.jgit.errors.BinaryBlobException; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.PersonIdent; @@ -632,11 +633,37 @@ public final class RawParseUtils { * line 1. * @param end * 1 past the end of the content within buf. - * @return a line map indexing the start position of each line. + * @return a line map indexing the start position of each line, or a map representing the entire + * array as a single line if a '\0' is found. */ public static final IntList lineMap(final byte[] buf, int ptr, int end) { - int start = ptr; + IntList map; + try { + map = lineMapOrBinary(buf, ptr, end); + } catch (BinaryBlobException e) { + map = new IntList(3); + map.add(Integer.MIN_VALUE); + map.add(ptr); + map.add(end); + } + return map; + } + /** + * Like {@link #lineMap(byte[], int, int)} but throw {@link BinaryBlobException} if a null char + * is encountered. + * @param buf buffer to scan. + * @param ptr position within the buffer corresponding to the first byte of + * line 1. + * @param end 1 past the end of the content within buf. + * @return a line map indexing the start position of each line, or a map representing the entire + * array as a single line if a '\0' is found. + * @throws BinaryBlobException + * + * @since 5.0 + */ + public static final IntList lineMapOrBinary(final byte[] buf, int ptr, int end) + throws BinaryBlobException { // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. @@ -649,11 +676,15 @@ public final class RawParseUtils { } if (buf[ptr] == '\0') { - // binary data. - map = new IntList(3); - map.add(Integer.MIN_VALUE); - map.add(start); - break; + throw new BinaryBlobException() { + + private static final long serialVersionUID = 1L; + + @Override + public Throwable fillInStackTrace() { + return this; + } + }; } foundLF = (buf[ptr] == '\n');