Browse Source

Improve DiffFormatter text file access

When we are asked to create a difference between two files the caller
really wants to see that output.  Instead of punting because a file
is too big to process, consider it to be binary.  This reduces the
accuracy of our output display, but makes it a lot more likely that
the formatter can still generate something semi-useful.

We set our default binary threshold to 50 MiB, which is the same
threshold that PackWriter uses before punting and deciding a file
is too big to delta compress.  Anything under this size we try to
load and process, anything over that size (or that won't allocate
in the heap) gets tagged as binary.

Change-Id: I69553c9ef96db7f2058c6210657f1181ce882335
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.9
Shawn O. Pearce 14 years ago
parent
commit
9f323462be
  1. 113
      org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java

113
org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffFormatter.java

@ -62,10 +62,10 @@ import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.errors.AmbiguousObjectException;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.CoreConfig;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
@ -74,6 +74,7 @@ import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.patch.FileHeader;
import org.eclipse.jgit.patch.HunkHeader;
import org.eclipse.jgit.patch.FileHeader.PatchType;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.util.QuotedString;
import org.eclipse.jgit.util.io.DisabledOutputStream;
@ -81,8 +82,16 @@ import org.eclipse.jgit.util.io.DisabledOutputStream;
* Format a Git style patch script.
*/
public class DiffFormatter {
private static final int DEFAULT_BINARY_FILE_THRESHOLD = PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
private static final byte[] noNewLine = encodeASCII("\\ No newline at end of file\n");
/** Magic return content indicating it is empty or no content present. */
private static final byte[] EMPTY = new byte[] {};
/** Magic return indicating the content is binary. */
private static final byte[] BINARY = new byte[] {};
private final OutputStream out;
private Repository db;
@ -93,7 +102,7 @@ public class DiffFormatter {
private RawText.Factory rawTextFactory = RawText.FACTORY;
private int bigFileThreshold = 50 * 1024 * 1024;
private int binaryFileThreshold = DEFAULT_BINARY_FILE_THRESHOLD;
private String oldPrefix = "a/";
@ -124,9 +133,6 @@ public class DiffFormatter {
*/
public void setRepository(Repository repository) {
db = repository;
CoreConfig cfg = db.getConfig().get(CoreConfig.KEY);
bigFileThreshold = cfg.getStreamFileThreshold();
}
/**
@ -175,16 +181,17 @@ public class DiffFormatter {
}
/**
* Set the maximum file size that should be considered for diff output.
* <p>
* Text files that are larger than this size will not have a difference
* generated during output.
* Set maximum file size for text files.
*
* Files larger than this size will be treated as though they are binary and
* not text. Default is {@value #DEFAULT_BINARY_FILE_THRESHOLD} .
*
* @param bigFileThreshold
* the limit, in bytes.
* @param threshold
* the limit, in bytes. Files larger than this size will be
* assumed to be binary, even if they aren't.
*/
public void setBigFileThreshold(int bigFileThreshold) {
this.bigFileThreshold = bigFileThreshold;
public void setBinaryFileThreshold(int threshold) {
this.binaryFileThreshold = threshold;
}
/**
@ -282,28 +289,6 @@ public class DiffFormatter {
return ('"' + name + '"').equals(q) ? name : q;
}
private byte[] open(ObjectReader reader, FileMode mode,
AbbreviatedObjectId id) throws IOException {
if (mode == FileMode.MISSING)
return new byte[] {};
if (mode.getObjectType() != Constants.OBJ_BLOB)
return new byte[] {};
if (!id.isComplete()) {
Collection<ObjectId> ids = reader.resolve(id);
if (ids.size() == 1)
id = AbbreviatedObjectId.fromObjectId(ids.iterator().next());
else if (ids.size() == 0)
throw new MissingObjectException(id, Constants.OBJ_BLOB);
else
throw new AmbiguousObjectException(id, ids);
}
ObjectLoader ldr = reader.open(id.toObjectId());
return ldr.getCachedBytes(bigFileThreshold);
}
/**
* Format a patch script, reusing a previously parsed FileHeader.
* <p>
@ -560,16 +545,24 @@ public class DiffFormatter {
if (db == null)
throw new IllegalStateException(
JGitText.get().repositoryIsRequired);
ObjectReader reader = db.newObjectReader();
byte[] aRaw, bRaw;
try {
aRaw = open(reader, ent.getOldMode(), ent.getOldId());
bRaw = open(reader, ent.getNewMode(), ent.getNewId());
aRaw = open(reader, //
ent.getOldPath(), //
ent.getOldMode(), //
ent.getOldId());
bRaw = open(reader, //
ent.getNewPath(), //
ent.getNewMode(), //
ent.getNewId());
} finally {
reader.release();
}
if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
if (aRaw == BINARY || bRaw == BINARY //
|| RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
formatOldNewPaths(buf, ent);
buf.write(encodeASCII("Binary files differ\n"));
editList = new EditList();
@ -599,6 +592,50 @@ public class DiffFormatter {
return res;
}
private byte[] open(ObjectReader reader, String path, FileMode mode,
AbbreviatedObjectId id) throws IOException {
if (mode == FileMode.MISSING)
return EMPTY;
if (mode.getObjectType() != Constants.OBJ_BLOB)
return EMPTY;
if (isBinary(path))
return BINARY;
if (!id.isComplete()) {
Collection<ObjectId> ids = reader.resolve(id);
if (ids.size() == 1)
id = AbbreviatedObjectId.fromObjectId(ids.iterator().next());
else if (ids.size() == 0)
throw new MissingObjectException(id, Constants.OBJ_BLOB);
else
throw new AmbiguousObjectException(id, ids);
}
try {
ObjectLoader ldr = reader.open(id.toObjectId());
return ldr.getBytes(binaryFileThreshold);
} catch (LargeObjectException.ExceedsLimit overLimit) {
return BINARY;
} catch (LargeObjectException.ExceedsByteArrayLimit overLimit) {
return BINARY;
} catch (LargeObjectException.OutOfMemory tooBig) {
return BINARY;
} catch (LargeObjectException tooBig) {
tooBig.setObjectId(id.toObjectId());
throw tooBig;
}
}
private boolean isBinary(String path) {
return false;
}
private void formatHeader(ByteArrayOutputStream o, DiffEntry ent)
throws IOException {
final ChangeType type = ent.getChangeType();

Loading…
Cancel
Save