Browse Source

Treat RawText of binary data as file with one single line.

This avoids executing mergeAlgorithm.merge on binary data, which is
unlikely to be useful.

Arguably, binary data should not make it to
ResolveMerger#contentMerge, but this approach has the following
advantages:

* binary detection is exact, since it doesn't only look at the start
  of the blob.

* it is cheap, as we have to iterate over the bytes anyway to find
  '\n'.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: I424295df1dc60a719859d9d7c599067891b15792
stable-4.9
Han-Wen Nienhuys 7 years ago
parent
commit
a551b64694
  1. 7
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java
  2. 29
      org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

7
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java

@ -79,6 +79,13 @@ public class RawParseUtils_LineMapTest {
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
} }
@Test
public void testBinary() throws UnsupportedEncodingException {
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
}
@Test @Test
public void testFourLineBlanks() throws UnsupportedEncodingException { public void testFourLineBlanks() throws UnsupportedEncodingException {
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");

29
org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

@ -618,6 +618,10 @@ public final class RawParseUtils {
* <p> * <p>
* The last element (index <code>map.size()-1</code>) always contains * The last element (index <code>map.size()-1</code>) always contains
* <code>end</code>. * <code>end</code>.
* <p>
* If the data contains a '\0' anywhere, the whole region is considered binary
* and a LineMap corresponding to a single line is returned.
* </p>
* *
* @param buf * @param buf
* buffer to scan. * buffer to scan.
@ -629,14 +633,29 @@ public final class RawParseUtils {
* @return a line map indexing the start position of each line. * @return a line map indexing the start position of each line.
*/ */
public static final IntList lineMap(final byte[] buf, int ptr, int end) { public static final IntList lineMap(final byte[] buf, int ptr, int end) {
int start = ptr;
// Experimentally derived from multiple source repositories // Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess // the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target. // to initially size our map close to the target.
// IntList map = new IntList((end - ptr) / 36);
final IntList map = new IntList((end - ptr) / 36); map.add(Integer.MIN_VALUE);
map.fillTo(1, Integer.MIN_VALUE); boolean foundLF = true;
for (; ptr < end; ptr = nextLF(buf, ptr)) for (; ptr < end; ptr++) {
map.add(ptr); if (foundLF) {
map.add(ptr);
}
if (buf[ptr] == '\0') {
// binary data.
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
}
foundLF = (buf[ptr] == '\n');
}
map.add(end); map.add(end);
return map; return map;
} }

Loading…
Cancel
Save