Browse Source

Throw BinaryBlobException from RawParseUtils#lineMap.

This makes detection of binaries exact for ResolveMerger and
DiffFormatter: they will classify files as binary regardless of where
the '\0' occurs in the text.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Change-Id: Id4342a199628d9406bfa04af1b023c27a47d4014
stable-4.10
Han-Wen Nienhuys 7 years ago
parent
commit
f2e64cd895
  1. 10
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java
  2. 18
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java
  3. 27
      org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
  4. 15
      org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

10
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java

@ -64,6 +64,16 @@ public class RawTextTest {
assertEquals(0, r.size()); assertEquals(0, r.size());
} }
@Test
public void testBinary() {
String input = "foo-a\nf\0o-b\n";
byte[] data = Constants.encodeASCII(input);
final RawText a = new RawText(data);
assertEquals(a.content, data);
assertEquals(a.size(), 1);
assertEquals(a.getString(0, 1, false), input);
}
@Test @Test
public void testEquals() { public void testEquals() {
final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n")); final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));

18
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java

@ -48,45 +48,45 @@ import static org.junit.Assert.assertNotNull;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.junit.Test; import org.junit.Test;
public class RawParseUtils_LineMapTest { public class RawParseUtils_LineMapTest {
@Test @Test
public void testEmpty() { public void testEmpty() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0); final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
assertNotNull(map); assertNotNull(map);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map)); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
} }
@Test @Test
public void testOneBlankLine() { public void testOneBlankLine() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1); final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map)); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
} }
@Test @Test
public void testTwoLineFooBar() throws UnsupportedEncodingException { public void testTwoLineFooBar() throws Exception {
final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1"); final byte[] buf = "foo\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
} }
@Test @Test
public void testTwoLineNoLF() throws UnsupportedEncodingException { public void testTwoLineNoLF() throws Exception {
final byte[] buf = "foo\nbar".getBytes("ISO-8859-1"); final byte[] buf = "foo\nbar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map)); assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 4, buf.length}, asInts(map));
} }
@Test @Test(expected = BinaryBlobException.class)
public void testBinary() throws UnsupportedEncodingException { public void testBinary() throws Exception {
final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1"); final byte[] buf = "xxxfoo\nb\0ar".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 3, buf.length); RawParseUtils.lineMap(buf, 3, buf.length);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
} }
@Test @Test
public void testFourLineBlanks() throws UnsupportedEncodingException { public void testFourLineBlanks() throws Exception {
final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1"); final byte[] buf = "foo\n\n\nbar\n".getBytes("ISO-8859-1");
final IntList map = RawParseUtils.lineMap(buf, 0, buf.length); final IntList map = RawParseUtils.lineMap(buf, 0, buf.length);

27
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java

@ -93,7 +93,29 @@ public class RawText extends Sequence {
*/ */
public RawText(final byte[] input) { public RawText(final byte[] input) {
content = input; content = input;
lines = RawParseUtils.lineMap(content, 0, content.length); IntList map;
try {
map = RawParseUtils.lineMap(content, 0, content.length);
} catch (BinaryBlobException e) {
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(0);
map.add(content.length);
}
lines = map;
}
/**
* Construct a new RawText if the line map is already known.
*
* @param data
* the blob data.
* @param lineMap
* Indices of line starts, with indexed by base-1 linenumber.
*/
private RawText(final byte[] data, final IntList lineMap) {
content = data;
lines = lineMap;
} }
/** /**
@ -357,7 +379,8 @@ public class RawText extends Sequence {
System.arraycopy(head, 0, data, 0, head.length); System.arraycopy(head, 0, data, 0, head.length);
IO.readFully(stream, data, off, (int) (sz-off)); IO.readFully(stream, data, off, (int) (sz-off));
return new RawText(data); IntList lineMap = RawParseUtils.lineMap(data, 0, data.length);
return new RawText(data, lineMap);
} }
} }
} }

15
org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

@ -63,6 +63,7 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.eclipse.jgit.annotations.Nullable; import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.PersonIdent;
@ -618,9 +619,6 @@ public final class RawParseUtils {
* <p> * <p>
* The last element (index <code>map.size()-1</code>) always contains * The last element (index <code>map.size()-1</code>) always contains
* <code>end</code>. * <code>end</code>.
* <p>
* If the data contains a '\0' anywhere, the whole region is considered binary
* and a LineMap corresponding to a single line is returned.
* </p> * </p>
* *
* @param buf * @param buf
@ -631,10 +629,9 @@ public final class RawParseUtils {
* @param end * @param end
* 1 past the end of the content within <code>buf</code>. * 1 past the end of the content within <code>buf</code>.
* @return a line map indexing the start position of each line. * @return a line map indexing the start position of each line.
* @throws BinaryBlobException if any '\0' is found.
*/ */
public static final IntList lineMap(final byte[] buf, int ptr, int end) { public static final IntList lineMap(final byte[] buf, int ptr, int end) throws BinaryBlobException {
int start = ptr;
// Experimentally derived from multiple source repositories // Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess // the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target. // to initially size our map close to the target.
@ -647,11 +644,7 @@ public final class RawParseUtils {
} }
if (buf[ptr] == '\0') { if (buf[ptr] == '\0') {
// binary data. throw new BinaryBlobException();
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
} }
foundLF = (buf[ptr] == '\n'); foundLF = (buf[ptr] == '\n');

Loading…
Cancel
Save