Browse Source

Introduce new RawText constructor and RawParseUtils.lineMapOrBinary

This makes binary detection exact in ResolveMerger and DiffFormatter

This has the same intention as
Id4342a199628d9406bfa04af1b023c27a47d4014 but preserves backward
compatibility of the signature of RawParseUtils.lineMap.

Change-Id: Ia24a4e716592bab3363ae24e3a46315a7511154f
Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
stable-5.0
Han-Wen Nienhuys 7 years ago committed by Matthias Sohn
parent
commit
08d2e0188c
  1. 10
      org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java
  2. 18
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java
  3. 21
      org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
  4. 45
      org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

10
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RawTextTest.java

@ -64,6 +64,16 @@ public class RawTextTest {
assertEquals(0, r.size());
}
@Test
public void testBinary() {
String input = "foo-a\nf\0o-b\n";
byte[] data = Constants.encodeASCII(input);
final RawText a = new RawText(data);
assertEquals(a.content, data);
assertEquals(a.size(), 1);
assertEquals(a.getString(0, 1, false), input);
}
@Test
public void testEquals() {
final RawText a = new RawText(Constants.encodeASCII("foo-a\nfoo-b\n"));

18
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/RawParseUtils_LineMapTest.java

@ -47,18 +47,25 @@ import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertNotNull;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
public class RawParseUtils_LineMapTest {
@Rule
public ExpectedException exception = ExpectedException.none();
@Test
public void testEmpty() {
public void testEmpty() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] {}, 0, 0);
assertNotNull(map);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0}, asInts(map));
}
@Test
public void testOneBlankLine() {
public void testOneBlankLine() throws Exception {
final IntList map = RawParseUtils.lineMap(new byte[] { '\n' }, 0, 1);
assertArrayEquals(new int[]{Integer.MIN_VALUE, 0, 1}, asInts(map));
}
@ -84,6 +91,13 @@ public class RawParseUtils_LineMapTest {
assertArrayEquals(new int[]{Integer.MIN_VALUE, 3, buf.length}, asInts(map));
}
@Test
public void testLineMapOrBinary() throws Exception {
final byte[] buf = "xxxfoo\nb\0ar".getBytes(ISO_8859_1);
exception.expect(BinaryBlobException.class);
RawParseUtils.lineMapOrBinary(buf, 3, buf.length);
}
@Test
public void testFourLineBlanks() {
final byte[] buf = "foo\n\n\nbar\n".getBytes(ISO_8859_1);

21
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java

@ -92,8 +92,25 @@ public class RawText extends Sequence {
* through cached arrays is safe.
*/
public RawText(final byte[] input) {
this(input, RawParseUtils.lineMap(input, 0, input.length));
}
/**
* Create a new sequence from the existing content byte array, and the line
* map indicating line boundaries.
*
* @param input
* the content array. The array is never modified, so passing
* through cached arrays is safe.
* @param lineMap
* an array with the line starts of the input, in 1-based offset.
* The first and last entry should be {@link Integer#MIN_VALUE}, and the array end
* respectively.
* @since 5.0
*/
public RawText(final byte[] input, IntList lineMap) {
content = input;
lines = RawParseUtils.lineMap(content, 0, content.length);
lines = lineMap;
}
/**
@ -369,7 +386,7 @@ public class RawText extends Sequence {
System.arraycopy(head, 0, data, 0, head.length);
IO.readFully(stream, data, off, (int) (sz-off));
return new RawText(data);
return new RawText(data, RawParseUtils.lineMapOrBinary(data, 0, (int) sz));
}
}
}

45
org.eclipse.jgit/src/org/eclipse/jgit/util/RawParseUtils.java

@ -63,6 +63,7 @@ import java.util.HashMap;
import java.util.Map;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.BinaryBlobException;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.PersonIdent;
@ -632,11 +633,37 @@ public final class RawParseUtils {
* line 1.
* @param end
* 1 past the end of the content within <code>buf</code>.
* @return a line map indexing the start position of each line.
* @return a line map indexing the start position of each line, or a map representing the entire
* array as a single line if a '\0' is found.
*/
public static final IntList lineMap(final byte[] buf, int ptr, int end) {
int start = ptr;
IntList map;
try {
map = lineMapOrBinary(buf, ptr, end);
} catch (BinaryBlobException e) {
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(ptr);
map.add(end);
}
return map;
}
/**
* Like {@link #lineMap(byte[], int, int)} but throw {@link BinaryBlobException} if a null char
* is encountered.
* @param buf buffer to scan.
* @param ptr position within the buffer corresponding to the first byte of
* line 1.
* @param end 1 past the end of the content within <code>buf</code>.
* @return a line map indexing the start position of each line, or a map representing the entire
* array as a single line if a '\0' is found.
* @throws BinaryBlobException
*
* @since 5.0
*/
public static final IntList lineMapOrBinary(final byte[] buf, int ptr, int end)
throws BinaryBlobException {
// Experimentally derived from multiple source repositories
// the average number of bytes/line is 36. Its a rough guess
// to initially size our map close to the target.
@ -649,11 +676,15 @@ public final class RawParseUtils {
}
if (buf[ptr] == '\0') {
// binary data.
map = new IntList(3);
map.add(Integer.MIN_VALUE);
map.add(start);
break;
throw new BinaryBlobException() {
private static final long serialVersionUID = 1L;
@Override
public Throwable fillInStackTrace() {
return this;
}
};
}
foundLF = (buf[ptr] == '\n');

Loading…
Cancel
Save