From 127a5f95e1d20b6dd84f4d2ce2a2fb435b36fb56 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Tue, 24 Aug 2010 12:50:59 -0700 Subject: [PATCH] Use limited getCachedBytes in RevWalk Parsing is rewritten to use the size limited form of getCachedBytes, thus freeing the revwalk infrastructure from needing to care about a large object vs. a small object when it gets an ObjectLoader. Right now we hardcode our upper bound for a commit or annotated tag to be 15 MiB. I don't know of any that is more than 1 MiB in the wild, so going 15x that should give us some reasonable headroom. Change-Id: If296c211d8b257d76e44908504e71dd9ba70ffa8 Signed-off-by: Shawn O. Pearce --- .../src/org/eclipse/jgit/revwalk/RevBlob.java | 20 +++++++++ .../org/eclipse/jgit/revwalk/RevCommit.java | 4 +- .../org/eclipse/jgit/revwalk/RevObject.java | 45 ++----------------- .../src/org/eclipse/jgit/revwalk/RevTag.java | 4 +- .../src/org/eclipse/jgit/revwalk/RevTree.java | 20 +++++++++ .../src/org/eclipse/jgit/revwalk/RevWalk.java | 33 ++++++++++++-- 6 files changed, 78 insertions(+), 48 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevBlob.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevBlob.java index 5bf0ad146..4245fcab9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevBlob.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevBlob.java @@ -45,6 +45,10 @@ package org.eclipse.jgit.revwalk; +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -64,4 +68,20 @@ public class RevBlob extends RevObject { public final int getType() { return Constants.OBJ_BLOB; } + + @Override + void parseHeaders(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + if (walk.reader.has(this)) + flags |= PARSED; + else + throw new MissingObjectException(this, getType()); + } + + @Override + void parseBody(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + if ((flags & PARSED) == 0) + parseHeaders(walk); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java index 9e018b7a0..05c173888 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevCommit.java @@ -133,14 +133,14 @@ public class RevCommit extends RevObject { @Override void parseHeaders(final RevWalk walk) throws MissingObjectException, IncorrectObjectTypeException, IOException { - parseCanonical(walk, loadCanonical(walk)); + parseCanonical(walk, walk.getCachedBytes(this)); } @Override void parseBody(final RevWalk walk) throws MissingObjectException, IncorrectObjectTypeException, IOException { if (buffer == null) { - buffer = loadCanonical(walk); + buffer = walk.getCachedBytes(this); if ((flags & PARSED) == 0) parseCanonical(walk, buffer); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevObject.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevObject.java index b31bc3768..f343c1373 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevObject.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevObject.java @@ -45,64 +45,27 @@ package org.eclipse.jgit.revwalk; import java.io.IOException; -import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; -import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.ObjectLoader; -import org.eclipse.jgit.lib.ObjectStream; -import org.eclipse.jgit.util.IO; /** Base object type accessed during revision walking. */ public abstract class RevObject extends ObjectId { static final int PARSED = 1; - static byte[] asByteArray(RevObject obj, ObjectLoader loader) throws IOException { - if (loader.isLarge()) { - ObjectStream in = loader.openStream(); - try { - long sz = in.getSize(); - if (Integer.MAX_VALUE <= sz){ - if (obj != null) - throw new LargeObjectException(obj.copy()); - throw new LargeObjectException(); - } - byte[] buf = new byte[(int) sz]; - IO.readFully(in, buf, 0, buf.length); - return buf; - } finally { - in.close(); - } - } - return loader.getCachedBytes(); - } - int flags; RevObject(final AnyObjectId name) { super(name); } - void parseHeaders(final RevWalk walk) throws MissingObjectException, - IncorrectObjectTypeException, IOException { - loadCanonical(walk); - flags |= PARSED; - } - - void parseBody(final RevWalk walk) throws MissingObjectException, - IncorrectObjectTypeException, IOException { - if ((flags & PARSED) == 0) - parseHeaders(walk); - } + abstract void parseHeaders(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException; - final byte[] loadCanonical(final RevWalk walk) throws IOException, - MissingObjectException, IncorrectObjectTypeException, - CorruptObjectException { - return asByteArray(this, walk.reader.open(this, getType())); - } + abstract void parseBody(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException; /** * Get Git object type. See {@link Constants}. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTag.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTag.java index aa33d91a5..bc53c28cc 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTag.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTag.java @@ -131,14 +131,14 @@ public class RevTag extends RevObject { @Override void parseHeaders(final RevWalk walk) throws MissingObjectException, IncorrectObjectTypeException, IOException { - parseCanonical(walk, loadCanonical(walk)); + parseCanonical(walk, walk.getCachedBytes(this)); } @Override void parseBody(final RevWalk walk) throws MissingObjectException, IncorrectObjectTypeException, IOException { if (buffer == null) { - buffer = loadCanonical(walk); + buffer = walk.getCachedBytes(this); if ((flags & PARSED) == 0) parseCanonical(walk, buffer); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTree.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTree.java index 76ec28362..d37040903 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTree.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevTree.java @@ -45,6 +45,10 @@ package org.eclipse.jgit.revwalk; +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -64,4 +68,20 @@ public class RevTree extends RevObject { public final int getType() { return Constants.OBJ_TREE; } + + @Override + void parseHeaders(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + if (walk.reader.has(this)) + flags |= PARSED; + else + throw new MissingObjectException(this, getType()); + } + + @Override + void parseBody(RevWalk walk) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + if ((flags & PARSED) == 0) + parseHeaders(walk); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index e39f8b325..dd513d8a9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -61,6 +61,7 @@ import org.eclipse.jgit.errors.RevWalkException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.AsyncObjectLoaderQueue; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.CoreConfig; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdSubclassMap; @@ -170,6 +171,9 @@ public class RevWalk implements Iterable { private final ObjectIdSubclassMap objects; + /** Largest commit or annotated tag we are willing to touch. */ + private final int bigFileThreshold; + private int freeFlags = APP_FLAGS; private int delayFreeFlags; @@ -226,6 +230,13 @@ public class RevWalk implements Iterable { filter = RevFilter.ALL; treeFilter = TreeFilter.ALL; retainBody = true; + + if (repo != null) { + CoreConfig cfg = repo.getConfig().get(CoreConfig.KEY); + bigFileThreshold = cfg.getStreamFileThreshold(); + } else { + bigFileThreshold = 15 * 1024 * 1024; + } } /** @return the reader this walker is using to load objects. */ @@ -813,13 +824,14 @@ public class RevWalk implements Iterable { } private RevObject parseNew(AnyObjectId id, ObjectLoader ldr) - throws CorruptObjectException, LargeObjectException { + throws LargeObjectException, CorruptObjectException, + MissingObjectException, IOException { RevObject r; int type = ldr.getType(); switch (type) { case Constants.OBJ_COMMIT: { final RevCommit c = createCommit(id); - c.parseCanonical(this, ldr.getCachedBytes()); + c.parseCanonical(this, getCachedBytes(c, ldr)); r = c; break; } @@ -835,7 +847,7 @@ public class RevWalk implements Iterable { } case Constants.OBJ_TAG: { final RevTag t = new RevTag(id); - t.parseCanonical(this, ldr.getCachedBytes()); + t.parseCanonical(this, getCachedBytes(t, ldr)); r = t; break; } @@ -847,6 +859,21 @@ public class RevWalk implements Iterable { return r; } + byte[] getCachedBytes(RevObject obj) throws LargeObjectException, + MissingObjectException, IncorrectObjectTypeException, IOException { + return getCachedBytes(obj, reader.open(obj, obj.getType())); + } + + byte[] getCachedBytes(RevObject obj, ObjectLoader ldr) + throws LargeObjectException, MissingObjectException, IOException { + try { + return ldr.getCachedBytes(bigFileThreshold); + } catch (LargeObjectException tooBig) { + tooBig.setObjectId(obj); + throw tooBig; + } + } + /** * Asynchronous object parsing. *