Browse Source

Shallow fetch: avoid sending unneeded blobs

When doing an incremental fetch from JGit, "have" commits are marked
as "uninteresting". In a non-shallow fetch, when the RevWalk hits an
"uninteresting" commit it marks the commit's corresponding tree as
uninteresting. That has the effect of dropping those trees and all the
trees and blobs they reference out of the thin pack returned to the
client.

However, shallow fetches use a DepthWalk to limit the RevWalk, which
nearly always causes the RevWalk to terminate before encountering the
"have" commits. As a result the pack created for the incremental fetch
never encounters "uninteresting" tree objects and thus includes
duplicate objects that it knows the client already has.

Change-Id: I7b1f7c3b0d83e04d34cd2fa676f1ad4fec904c05
Signed-off-by: Terry Parker <tparker@google.com>
stable-4.5
Terry Parker 8 years ago
parent
commit
7edf05530d
  1. 70
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java
  2. 20
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java

70
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java

@ -76,6 +76,9 @@ import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSet; import org.eclipse.jgit.lib.ObjectIdSet;
import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.Sets; import org.eclipse.jgit.lib.Sets;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.DepthWalk;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevObject;
@ -542,15 +545,74 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
expected.contains(pi.getObjectId(i))); expected.contains(pi.getObjectId(i)));
} }
@Test
public void testShallowIsMinimal() throws Exception {
FileRepository repo = createBareRepository();
TestRepository<Repository> r = new TestRepository<Repository>(repo);
BranchBuilder bb = r.branch("refs/heads/master");
RevBlob contentA = r.blob("A");
RevBlob contentB = r.blob("B");
RevBlob contentC = r.blob("C");
RevBlob contentD = r.blob("D");
RevBlob contentE = r.blob("E");
RevCommit c1 = bb.commit().add("a", contentA).create();
RevCommit c2 = bb.commit().add("b", contentB).create();
RevCommit c3 = bb.commit().add("c", contentC).create();
RevCommit c4 = bb.commit().add("d", contentD).create();
RevCommit c5 = bb.commit().add("e", contentE).create();
r.getRevWalk().parseHeaders(c1);
r.getRevWalk().parseHeaders(c2);
r.getRevWalk().parseHeaders(c3);
r.getRevWalk().parseHeaders(c4);
r.getRevWalk().parseHeaders(c5);
PackIndex idx = writeShallowPack(repo, 1, wants(c2), NONE, NONE);
assertContent(idx,
Arrays.asList(c1.getId(), c2.getId(), c1.getTree().getId(),
c2.getTree().getId(), contentA.getId(),
contentB.getId()));
// Client already has blobs A and B, verify those are not packed.
idx = writeShallowPack(repo, 1, wants(c5), haves(c1, c2), shallows(c1));
assertContent(idx,
Arrays.asList(c4.getId(), c5.getId(), c4.getTree().getId(),
c5.getTree().getId(), contentC.getId(),
contentD.getId(), contentE.getId()));
}
private static PackIndex writePack(FileRepository repo, private static PackIndex writePack(FileRepository repo,
Set<? extends ObjectId> want, Set<ObjectIdSet> excludeObjects) Set<? extends ObjectId> want, Set<ObjectIdSet> excludeObjects)
throws IOException { throws IOException {
RevWalk walk = new RevWalk(repo);
return writePack(repo, walk, 0, want, NONE, excludeObjects);
}
private static PackIndex writeShallowPack(FileRepository repo, int depth,
Set<? extends ObjectId> want, Set<? extends ObjectId> have,
Set<? extends ObjectId> shallow) throws IOException {
// During negotiation, UploadPack would have set up a DepthWalk and
// marked the client's "shallow" commits. Emulate that here.
DepthWalk.RevWalk walk = new DepthWalk.RevWalk(repo, depth);
walk.assumeShallow(shallow);
return writePack(repo, walk, depth, want, have, EMPTY_ID_SET);
}
private static PackIndex writePack(FileRepository repo, RevWalk walk,
int depth, Set<? extends ObjectId> want,
Set<? extends ObjectId> have, Set<ObjectIdSet> excludeObjects)
throws IOException {
try (PackWriter pw = new PackWriter(repo)) { try (PackWriter pw = new PackWriter(repo)) {
pw.setDeltaBaseAsOffset(true); pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(false); pw.setReuseDeltaCommits(false);
for (ObjectIdSet idx : excludeObjects) for (ObjectIdSet idx : excludeObjects) {
pw.excludeObjects(idx); pw.excludeObjects(idx);
pw.preparePack(NullProgressMonitor.INSTANCE, want, NONE); }
if (depth > 0) {
pw.setShallowPack(depth, null);
}
ObjectWalk ow = walk.toObjectWalkWithSameObjects();
pw.preparePack(NullProgressMonitor.INSTANCE, ow, want, have);
String id = pw.computeName().getName(); String id = pw.computeName().getName();
File packdir = new File(repo.getObjectsDirectory(), "pack"); File packdir = new File(repo.getObjectsDirectory(), "pack");
File packFile = new File(packdir, "pack-" + id + ".pack"); File packFile = new File(packdir, "pack-" + id + ".pack");
@ -737,4 +799,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
private static Set<ObjectId> wants(ObjectId... objects) { private static Set<ObjectId> wants(ObjectId... objects) {
return Sets.of(objects); return Sets.of(objects);
} }
private static Set<ObjectId> shallows(ObjectId... objects) {
return Sets.of(objects);
}
} }

20
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java

@ -1653,6 +1653,8 @@ public class PackWriter implements AutoCloseable {
List<RevObject> haveObjs = new ArrayList<RevObject>(haveEst); List<RevObject> haveObjs = new ArrayList<RevObject>(haveEst);
List<RevTag> wantTags = new ArrayList<RevTag>(want.size()); List<RevTag> wantTags = new ArrayList<RevTag>(want.size());
// Retrieve the RevWalk's versions of "want" and "have" objects to
// maintain any flags previously set in the RevWalk.
AsyncRevObjectQueue q = walker.parseAny(all, true); AsyncRevObjectQueue q = walker.parseAny(all, true);
try { try {
for (;;) { for (;;) {
@ -1695,12 +1697,26 @@ public class PackWriter implements AutoCloseable {
if (walker instanceof DepthWalk.ObjectWalk) { if (walker instanceof DepthWalk.ObjectWalk) {
DepthWalk.ObjectWalk depthWalk = (DepthWalk.ObjectWalk) walker; DepthWalk.ObjectWalk depthWalk = (DepthWalk.ObjectWalk) walker;
for (RevObject obj : wantObjs) for (RevObject obj : wantObjs) {
depthWalk.markRoot(obj); depthWalk.markRoot(obj);
}
// Mark the tree objects associated with "have" commits as
// uninteresting to avoid writing redundant blobs. A normal RevWalk
// lazily propagates the "uninteresting" state from a commit to its
// tree during the walk, but DepthWalks can terminate early so
// preemptively propagate that state here.
for (RevObject obj : haveObjs) {
if (obj instanceof RevCommit) {
RevTree t = ((RevCommit) obj).getTree();
depthWalk.markUninteresting(t);
}
}
if (unshallowObjects != null) { if (unshallowObjects != null) {
for (ObjectId id : unshallowObjects) for (ObjectId id : unshallowObjects) {
depthWalk.markUnshallow(walker.parseAny(id)); depthWalk.markUnshallow(walker.parseAny(id));
} }
}
} else { } else {
for (RevObject obj : wantObjs) for (RevObject obj : wantObjs)
walker.markStart(obj); walker.markStart(obj);

Loading…
Cancel
Save