Browse Source

PackWriter: Sort commits by parse order to improve locality

RevWalk in JGit and the revision code in C Git both parse commits out
of the pack file in an order that differs from strict timestamp and
topological sorting.  Both implementations pop a commit from the head
of a date queue, and then immediately parse all of its parents in
order to insert those into the date queue at the proper positions as
determined by their committer timestamp field.  This implies that the
parents are parsed when their most recent child is popped from the
queue, and not where they are popped during traversal.

Hoisting a parent commit to be immediately behind its child improves
locality by making sure all parents of a merge are clustered together,
and thus can be paged into the parser by the pack file buffering
system (aka WindowCache in JGit) together.

Change-Id: I80f9e64cafa2e8f082776b43845edf23065386a2
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.12
Shawn O. Pearce 14 years ago
parent
commit
733780e8a1
  1. 41
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

41
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -1116,6 +1116,7 @@ public class PackWriter {
final Map<ObjectId, CachedPack> tipToPack = new HashMap<ObjectId, CachedPack>();
final RevFlag inCachedPack = walker.newFlag("inCachedPack");
final RevFlag include = walker.newFlag("include");
final RevFlag added = walker.newFlag("added");
final RevFlagSet keepOnRestart = new RevFlagSet();
keepOnRestart.add(inCachedPack);
@ -1177,13 +1178,15 @@ public class PackWriter {
int typesToPrune = 0;
final int maxBases = config.getDeltaSearchWindowSize();
Set<RevTree> baseTrees = new HashSet<RevTree>();
RevObject o;
while ((o = walker.next()) != null) {
if (o.has(inCachedPack)) {
CachedPack pack = tipToPack.get(o);
List<RevCommit> commits = new ArrayList<RevCommit>();
RevCommit c;
while ((c = walker.next()) != null) {
if (c.has(inCachedPack)) {
CachedPack pack = tipToPack.get(c);
if (includesAllTips(pack, include, walker)) {
useCachedPack(walker, keepOnRestart, //
wantObjs, haveObjs, pack);
commits = new ArrayList<RevCommit>();
countingMonitor.endTask();
countingMonitor.beginTask(JGitText.get().countingObjects,
@ -1192,16 +1195,36 @@ public class PackWriter {
}
}
if (o.has(RevFlag.UNINTERESTING)) {
if (c.has(RevFlag.UNINTERESTING)) {
if (baseTrees.size() <= maxBases)
baseTrees.add(((RevCommit) o).getTree());
baseTrees.add(c.getTree());
continue;
}
addObject(o, 0);
commits.add(c);
countingMonitor.update(1);
}
if (objectsLists[Constants.OBJ_COMMIT] instanceof ArrayList) {
ArrayList<ObjectToPack> list = (ArrayList<ObjectToPack>) objectsLists[Constants.OBJ_COMMIT];
list.ensureCapacity(list.size() + commits.size());
}
for (RevCommit cmit : commits) {
if (!cmit.has(added)) {
cmit.add(added);
addObject(cmit, 0);
}
for (int i = 0; i < cmit.getParentCount(); i++) {
RevCommit p = cmit.getParent(i);
if (!p.has(added) && !p.has(RevFlag.UNINTERESTING)) {
p.add(added);
addObject(p, 0);
}
}
}
commits = null;
for (CachedPack p : cachedPacks) {
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) {
if (baseTrees.size() <= maxBases)
@ -1213,6 +1236,7 @@ public class PackWriter {
BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
objectsMap, edgeObjects, reader);
RevObject o;
while ((o = walker.nextObject()) != null) {
if (o.has(RevFlag.UNINTERESTING))
continue;
@ -1284,9 +1308,6 @@ public class PackWriter {
for (ObjectId id : pack.getTips())
baseObj.add(walker.lookupOrNull(id));
objectsMap.clear();
objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
setThin(true);
walker.resetRetain(keepOnRestart);
walker.sort(RevSort.TOPO);

Loading…
Cancel
Save