From ab0eedcead5b38e446e7a022de1c749c5e37f115 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Thu, 27 Jul 2017 13:49:39 +0200 Subject: [PATCH 1/2] Trim boilerplate in RawParseUtils_LineMapTest. Signed-off-by: Han-Wen Nienhuys Change-Id: Ib003f7c8f2816dd57e941799a665e70ecd6645a2 --- .../jgit/util/RawParseUtils_LineMapTest.java | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java index 593971478..e8e191d78 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java @@ -43,6 +43,7 @@ package org.eclipse.jgit.util; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -55,52 +56,44 @@ public class RawParseUtils_LineMapTest { public void testEmpty() { final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); assertNotNull(map); - assertEquals(2, map.size()); - assertEquals(Integer.MIN_VALUE, map.get(0)); - assertEquals(0, map.get(1)); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map)); } @Test public void testOneBlankLine() { final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); - assertEquals(3, map.size()); - assertEquals(Integer.MIN_VALUE, map.get(0)); - assertEquals(0, map.get(1)); - assertEquals(1, map.get(2)); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map)); } @Test public void testTwoLineFooBar() throws UnsupportedEncodingException { final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); - assertEquals(4, map.size()); - assertEquals(Integer.MIN_VALUE, map.get(0)); - assertEquals(0, map.get(1)); - assertEquals(4, map.get(2)); - assertEquals(buf.length, map.get(3)); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } @Test public void testTwoLineNoLF() throws UnsupportedEncodingException { final byte[] buf = "foo\nbar".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); - assertEquals(4, map.size()); - assertEquals(Integer.MIN_VALUE, map.get(0)); - assertEquals(0, map.get(1)); - assertEquals(4, map.get(2)); - assertEquals(buf.length, map.get(3)); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } @Test public void testFourLineBlanks() throws UnsupportedEncodingException { final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); - assertEquals(6, map.size()); - assertEquals(Integer.MIN_VALUE, map.get(0)); - assertEquals(0, map.get(1)); - assertEquals(4, map.get(2)); - assertEquals(5, map.get(3)); - assertEquals(6, map.get(4)); - assertEquals(buf.length, map.get(5)); + + assertArrayEquals(new int[]{ + Integer.MIN_VALUE, 0, 4, 5, 6, buf.length + }, asInts(map)); + } + + private int[] asInts(IntList l) { + int[] result = new int[l.size()]; + for (int i = 0; i < l.size(); i++) { + result[i] = l.get(i); + } + return result; } } From a551b64694c24fff58014ae5ca298b47539cf96d Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Thu, 27 Jul 2017 13:58:49 +0200 Subject: [PATCH 2/2] Treat RawText of binary data as file with one single line. This avoids executing mergeAlgorithm.merge on binary data, which is unlikely to be useful. Arguably, binary data should not make it to ResolveMerger#contentMerge, but this approach has the following advantages: * binary detection is exact, since it doesn't only look at the start of the blob. * it is cheap, as we have to iterate over the bytes anyway to find '\n'. Signed-off-by: Han-Wen Nienhuys Change-Id: I424295df1dc60a719859d9d7c599067891b15792 --- .../jgit/util/RawParseUtils_LineMapTest.java | 7 +++++ .../org/eclipse/jgit/util/RawParseUtils.java | 29 +++++++++++++++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java index e8e191d78..2e9cbb503 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java @@ -79,6 +79,13 @@ public class RawParseUtils_LineMapTest { assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); } + @Test + public void testBinary() throws UnsupportedEncodingException { + final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1"); + final IntList map = RawParseUtils.lineMap(buf, 3, buf.length); + assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map)); + } + @Test public void testFourLineBlanks() throws UnsupportedEncodingException { final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java index 86777b9cd..ad138bbf1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java @@ -618,6 +618,10 @@ public final class RawParseUtils { *

* The last element (index map.size()-1) always contains * end. + *

+ * If the data contains a '\0' anywhere, the whole region is considered binary + * and a LineMap corresponding to a single line is returned. + *

* * @param buf * buffer to scan. @@ -629,14 +633,29 @@ public final class RawParseUtils { * @return a line map indexing the start position of each line. */ public static final IntList lineMap(final byte[] buf, int ptr, int end) { + int start = ptr; + // Experimentally derived from multiple source repositories // the average number of bytes/line is 36. Its a rough guess // to initially size our map close to the target. - // - final IntList map = new IntList((end - ptr) / 36); - map.fillTo(1, Integer.MIN_VALUE); - for (; ptr < end; ptr = nextLF(buf, ptr)) - map.add(ptr); + IntList map = new IntList((end - ptr) / 36); + map.add(Integer.MIN_VALUE); + boolean foundLF = true; + for (; ptr < end; ptr++) { + if (foundLF) { + map.add(ptr); + } + + if (buf[ptr] == '\0') { + // binary data. + map = new IntList(3); + map.add(Integer.MIN_VALUE); + map.add(start); + break; + } + + foundLF = (buf[ptr] == '\n'); + } map.add(end); return map; }