Browse Source

PackWriter: Hoist and cluster reference targets

Many source browsers and network related tools like UploadPack need
to find and parse the target of all branches and annotated tags
within the repository during their startup phase.  Clustering these
together into the same part of the pack file will improve locality,
reducing thrashing when an application starts and needs to load
all of these into memory at once.

To prevent bottlenecking basic log viewing tools that are scannning
backwards from the tip of a current branch (and don't need tags)
we place this cluster of older targets after 4096 newer commits
have already been placed into the pack stream.  4096 was chosen as
a rough guess, but was based on a few factors:

  - log viewers typically show 5-200 commits per page
  - users only view the first page or two

  - DHT can cram 2200-4000 commits per 1 MiB chunk
    thus these will fall into the second commit chunk (roughly)

Unfortunately this placement hurts history tools that are scanning
backwards through the commit graph and completely ignored tags or
branch heads when they started.

An ancient tagged commit is no longer positioned behind its first
child (its now much earlier), resulting in a page fault for the
parser to reload this cluster of objects on demand.  This may be
an acceptable loss.  If a user is walking backwards and has already
scanned through more than 4096 commits of history, waiting for the
region to reload isn't really that bad compared to the amount of
time already spent.

If the repository is so small that there are less than 4096 commits,
this change has no impact on the placement of objects.

Change-Id: If3052e430d305e17878d94145c93754f56b74c61
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.12
Shawn O. Pearce 14 years ago
parent
commit
8f865bfffe
  1. 37
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java
  2. 12
      org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java
  3. 13
      org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

37
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -154,6 +154,8 @@ public class PackWriter {
private List<CachedPack> cachedPacks = new ArrayList<CachedPack>(2);
private Set<ObjectId> tagTargets = Collections.emptySet();
private Deflater myDeflater;
private final ObjectReader reader;
@ -330,6 +332,22 @@ public class PackWriter {
ignoreMissingUninteresting = ignore;
}
/**
* Set the tag targets that should be hoisted earlier during packing.
* <p>
* Callers may put objects into this set before invoking any of the
* preparePack methods to influence where an annotated tag's target is
* stored within the resulting pack. Typically these will be clustered
* together, and hoisted earlier in the file even if they are ancient
* revisions, allowing readers to find tag targets with better locality.
*
* @param objects
* objects that annotated tags point at.
*/
public void setTagTargets(Set<ObjectId> objects) {
tagTargets = objects;
}
/**
* Returns objects number in a pack file that was created by this writer.
*
@ -1251,10 +1269,14 @@ public class PackWriter {
ArrayList<ObjectToPack> list = (ArrayList<ObjectToPack>) objectsLists[Constants.OBJ_COMMIT];
list.ensureCapacity(list.size() + commits.size());
}
int commitCnt = 0;
boolean putTagTargets = false;
for (RevCommit cmit : commits) {
if (!cmit.has(added)) {
cmit.add(added);
addObject(cmit, 0);
commitCnt++;
}
for (int i = 0; i < cmit.getParentCount(); i++) {
@ -1262,7 +1284,22 @@ public class PackWriter {
if (!p.has(added) && !p.has(RevFlag.UNINTERESTING)) {
p.add(added);
addObject(p, 0);
commitCnt++;
}
}
if (!putTagTargets && 4096 < commitCnt) {
for (ObjectId id : tagTargets) {
RevObject obj = walker.lookupOrNull(id);
if (obj instanceof RevCommit
&& obj.has(include)
&& !obj.has(RevFlag.UNINTERESTING)
&& !obj.has(added)) {
obj.add(added);
addObject(obj, 0);
}
}
putTagTargets = true;
}
}
commits = null;

12
org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java

@ -88,6 +88,8 @@ public class BundleWriter {
private final Set<RevCommit> assume;
private final Set<ObjectId> tagTargets;
private PackConfig packConfig;
/**
@ -100,6 +102,7 @@ public class BundleWriter {
db = repo;
include = new TreeMap<String, ObjectId>();
assume = new HashSet<RevCommit>();
tagTargets = new HashSet<ObjectId>();
}
/**
@ -143,6 +146,13 @@ public class BundleWriter {
*/
public void include(final Ref r) {
include(r.getName(), r.getObjectId());
if (r.getPeeledObjectId() != null)
tagTargets.add(r.getPeeledObjectId());
else if (r.getObjectId() != null
&& r.getName().startsWith(Constants.R_HEADS))
tagTargets.add(r.getObjectId());
}
/**
@ -192,6 +202,8 @@ public class BundleWriter {
exc.add(r.getId());
packWriter.setDeltaBaseAsOffset(true);
packWriter.setThin(exc.size() > 0);
if (exc.size() == 0)
packWriter.setTagTargets(tagTargets);
packWriter.preparePack(monitor, inc, exc);
final Writer w = new OutputStreamWriter(os, Constants.CHARSET);

13
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

@ -695,6 +695,19 @@ public class UploadPack {
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK));
if (commonBase.isEmpty()) {
Set<ObjectId> tagTargets = new HashSet<ObjectId>();
for (Ref ref : refs.values()) {
if (ref.getPeeledObjectId() != null)
tagTargets.add(ref.getPeeledObjectId());
else if (ref.getObjectId() == null)
continue;
else if (ref.getName().startsWith(Constants.R_HEADS))
tagTargets.add(ref.getObjectId());
}
pw.setTagTargets(tagTargets);
}
RevWalk rw = walk;
if (wantAll.isEmpty()) {
pw.preparePack(pm, wantIds, commonBase);

Loading…
Cancel
Save