Browse Source

Added check for binary files while diffing

Added a check in Diff to ensure that files that are most likely
not text are not line-by-line diffed. Files are determined to be
binary by checking the first 8000 bytes for a null character. This
is a similar heuristic to what C Git uses.

Change-Id: I2b6f05674c88d89b3f549a5db483f850f7f46c26
stable-0.9
Jeff Schumacher 15 years ago committed by Shawn O. Pearce
parent
commit
9f2249bd26
  1. 23
      org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
  2. 21
      org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java

23
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java

@ -132,16 +132,28 @@ class Diff extends TextBuiltin {
+ (mode1.equals(mode2) ? " " + mode1 : "")); + (mode1.equals(mode2) ? " " + mode1 : ""));
out.println("--- " + (isNew ? "/dev/null" : name1)); out.println("--- " + (isNew ? "/dev/null" : name1));
out.println("+++ " + (isDelete ? "/dev/null" : name2)); out.println("+++ " + (isDelete ? "/dev/null" : name2));
RawText a = getRawText(id1);
RawText b = getRawText(id2); byte[] aRaw = getRawBytes(id1);
byte[] bRaw = getRawBytes(id2);
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
out.println("Binary files differ");
return;
}
RawText a = getRawText(aRaw);
RawText b = getRawText(bRaw);
MyersDiff diff = new MyersDiff(a, b); MyersDiff diff = new MyersDiff(a, b);
fmt.formatEdits(out, a, b, diff.getEdits()); fmt.formatEdits(out, a, b, diff.getEdits());
} }
private RawText getRawText(ObjectId id) throws IOException { private byte[] getRawBytes(ObjectId id) throws IOException {
if (id.equals(ObjectId.zeroId())) if (id.equals(ObjectId.zeroId()))
return new RawText(new byte[] {}); return new byte[] {};
byte[] raw = db.openBlob(id).getCachedBytes(); return db.openBlob(id).getCachedBytes();
}
private RawText getRawText(byte[] raw) {
if (ignoreWsAll) if (ignoreWsAll)
return new RawTextIgnoreAllWhitespace(raw); return new RawTextIgnoreAllWhitespace(raw);
else if (ignoreWsTrailing) else if (ignoreWsTrailing)
@ -154,4 +166,3 @@ class Diff extends TextBuiltin {
return new RawText(raw); return new RawText(raw);
} }
} }

21
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java

@ -65,6 +65,9 @@ import org.eclipse.jgit.util.RawParseUtils;
* they are converting from "line number" to "element index". * they are converting from "line number" to "element index".
*/ */
public class RawText implements Sequence { public class RawText implements Sequence {
/** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */
private static final int FIRST_FEW_BYTES = 8000;
/** The file content for this sequence. */ /** The file content for this sequence. */
protected final byte[] content; protected final byte[] content;
@ -202,4 +205,22 @@ public class RawText implements Sequence {
hash = (hash << 5) ^ (raw[ptr] & 0xff); hash = (hash << 5) ^ (raw[ptr] & 0xff);
return hash; return hash;
} }
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
*
* @param raw
* the raw file content.
* @return true if raw is likely to be a binary file, false otherwise
*/
public static boolean isBinary(byte[] raw) {
// Same heuristic as C Git
int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length;
for (int ptr = 0; ptr < size; ptr++)
if (raw[ptr] == '\0')
return true;
return false;
}
} }

Loading…
Cancel
Save