From 437be8dfade74b039eeee24e400addcbb8fe2ca9 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Fri, 8 Mar 2013 12:25:12 -0800 Subject: [PATCH 1/2] Simplify UploadPack by parsing wants separately from haves The DHT backend was very slow at parsing objects. To work around that performance limitation I obfuscated UploadPack by folding both the want and have sets together in a single parse queue. Since DHT was removed the complexity is no longer constructive to JGit. Doing this refactoring prepares the code for a slightly future change where the have lines need to be handled specially from the want lines. Splitting the parsing up into two phases makes such a modification trivial. Change-Id: If7aad533b82448bbb688278e21f709282e5ccf4b --- .../eclipse/jgit/transport/UploadPack.java | 143 ++++++++---------- 1 file changed, 60 insertions(+), 83 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index bc4045416..f0ba0cdc5 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -787,74 +787,23 @@ public class UploadPack { preUploadHook.onBeginNegotiateRound(this, wantIds, peerHas.size()); if (peerHas.isEmpty()) return last; + if (wantAll.isEmpty() && !wantIds.isEmpty()) + parseWants(); - List toParse = peerHas; - HashSet peerHasSet = null; - boolean needMissing = false; sentReady = false; - - if (wantAll.isEmpty() && !wantIds.isEmpty()) { - // We have not yet parsed the want list. Parse it now. - peerHasSet = new HashSet(peerHas); - int cnt = wantIds.size() + peerHasSet.size(); - toParse = new ArrayList(cnt); - toParse.addAll(wantIds); - toParse.addAll(peerHasSet); - needMissing = true; - } - - Set notAdvertisedWants = null; int haveCnt = 0; - AsyncRevObjectQueue q = walk.parseAny(toParse, needMissing); + AsyncRevObjectQueue q = walk.parseAny(peerHas, false); try { for (;;) { RevObject obj; try { obj = q.next(); } catch (MissingObjectException notFound) { - ObjectId id = notFound.getObjectId(); - if (wantIds.contains(id)) { - String msg = MessageFormat.format( - JGitText.get().wantNotValid, id.name()); - throw new PackProtocolException(msg, notFound); - } continue; } if (obj == null) break; - // If the object is still found in wantIds, the want - // list wasn't parsed earlier, and was done in this batch. - // - if (wantIds.remove(obj)) { - if (!advertised.contains(obj) && requestPolicy != RequestPolicy.ANY) { - if (notAdvertisedWants == null) - notAdvertisedWants = new HashSet(); - notAdvertisedWants.add(obj); - } - - if (!obj.has(WANT)) { - obj.add(WANT); - wantAll.add(obj); - } - - if (!(obj instanceof RevCommit)) - obj.add(SATISFIED); - - if (obj instanceof RevTag) { - RevObject target = walk.peel(obj); - if (target instanceof RevCommit) { - if (!target.has(WANT)) { - target.add(WANT); - wantAll.add(target); - } - } - } - - if (!peerHasSet.contains(obj)) - continue; - } - last = obj; haveCnt++; @@ -891,25 +840,6 @@ public class UploadPack { q.release(); } - // If the client asked for non advertised object, check our policy. - if (notAdvertisedWants != null && !notAdvertisedWants.isEmpty()) { - switch (requestPolicy) { - case ADVERTISED: - default: - throw new PackProtocolException(MessageFormat.format( - JGitText.get().wantNotValid, - notAdvertisedWants.iterator().next().name())); - - case REACHABLE_COMMIT: - checkNotAdvertisedWants(notAdvertisedWants); - break; - - case ANY: - // Allow whatever was asked for. - break; - } - } - int missCnt = peerHas.size() - haveCnt; // If we don't have one of the objects but we're also willing to @@ -952,7 +882,61 @@ public class UploadPack { return last; } - private void checkNotAdvertisedWants(Set notAdvertisedWants) + private void parseWants() throws IOException { + AsyncRevObjectQueue q = walk.parseAny(wantIds, true); + try { + List checkReachable = null; + RevObject obj; + while ((obj = q.next()) != null) { + if (!advertised.contains(obj)) { + switch (requestPolicy) { + case ADVERTISED: + default: + throw new PackProtocolException(MessageFormat.format( + JGitText.get().wantNotValid, obj)); + case REACHABLE_COMMIT: + if (!(obj instanceof RevCommit)) { + throw new PackProtocolException(MessageFormat.format( + JGitText.get().wantNotValid, obj)); + } + if (checkReachable == null) + checkReachable = new ArrayList(); + checkReachable.add((RevCommit) obj); + break; + case ANY: + break; + } + } + want(obj); + + if (!(obj instanceof RevCommit)) + obj.add(SATISFIED); + if (obj instanceof RevTag) { + obj = walk.peel(obj); + if (obj instanceof RevCommit) + want(obj); + } + } + if (checkReachable != null) + checkNotAdvertisedWants(checkReachable); + wantIds.clear(); + } catch (MissingObjectException notFound) { + ObjectId id = notFound.getObjectId(); + throw new PackProtocolException(MessageFormat.format( + JGitText.get().wantNotValid, id.name()), notFound); + } finally { + q.release(); + } + } + + private void want(RevObject obj) { + if (!obj.has(WANT)) { + obj.add(WANT); + wantAll.add(obj); + } + } + + private void checkNotAdvertisedWants(List notAdvertisedWants) throws MissingObjectException, IncorrectObjectTypeException, IOException { // Walk the requested commits back to the advertised commits. // If any commit exists, a branch was deleted or rewound and @@ -960,15 +944,8 @@ public class UploadPack { // If the requested commit is merged into an advertised branch // it will be marked UNINTERESTING and no commits return. - for (RevObject o : notAdvertisedWants) { - if (!(o instanceof RevCommit)) { - throw new PackProtocolException(MessageFormat.format( - JGitText.get().wantNotValid, - notAdvertisedWants.iterator().next().name())); - } - walk.markStart((RevCommit) o); - } - + for (RevCommit c : notAdvertisedWants) + walk.markStart(c); for (ObjectId id : advertised) { try { walk.markUninteresting(walk.parseCommit(id)); From 4e9fe58bb504f5041d322895a2fa2db93212e47e Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Fri, 8 Mar 2013 12:45:28 -0800 Subject: [PATCH 2/2] Avoid looking at UNREACHABLE_GARBAGE for client have lines Clients send a bunch of unknown objects to UploadPack on each round of negotiation. Many of these are not known to the server, which leads the implementation to be looking at indexes for garbage packs. Disable examining the index of a garbage pack, allowing servers to avoid reading them from disk during negotiation. The effect of this change is the server will only ACK a have line if the object was reachable during the last garbage collection, or was recently added to the repository. For most repositories there is no impact in this behavior change. If a repository rewinds a branch, runs GC, and then resets the branch back to where it was before, the now current tip is going to be skipped by this change. A client that has the commit may wind up getting a slightly larger data transfer from the server as an older common ancestor will be chosen during negotiation. This is fixable on the server side by running GC again to correct the layout of objects in pack files. Change-Id: Icd550359ef70fc7b701980f9b13d923fd13c744b --- .../org/eclipse/jgit/lib/ObjectReader.java | 13 +++++++++++++ .../eclipse/jgit/storage/dfs/DfsPackFile.java | 9 +++++++-- .../eclipse/jgit/storage/dfs/DfsReader.java | 19 +++++++++++++++++-- .../eclipse/jgit/transport/UploadPack.java | 2 ++ 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java index 68fee612f..42851498b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java @@ -436,6 +436,19 @@ public abstract class ObjectReader { // Do nothing by default, most readers don't want or need advice. } + /** + * Advise the reader to avoid unreachable objects. + *

+ * While enabled the reader will skip over anything previously proven to be + * unreachable. This may be dangerous in the face of concurrent writes. + * + * @param avoid + * true to avoid unreachable objects. + */ + public void setAvoidUnreachableObjects(boolean avoid) { + // Do nothing by default. + } + /** * An index that can be used to speed up ObjectWalks. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsPackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsPackFile.java index 80cced84e..707488b03 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsPackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsPackFile.java @@ -45,9 +45,10 @@ package org.eclipse.jgit.storage.dfs; +import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX; -import static org.eclipse.jgit.storage.pack.PackExt.PACK; import static org.eclipse.jgit.storage.pack.PackExt.INDEX; +import static org.eclipse.jgit.storage.pack.PackExt.PACK; import java.io.BufferedInputStream; import java.io.EOFException; @@ -276,8 +277,12 @@ public final class DfsPackFile { } } + final boolean isGarbage() { + return packDesc.getPackSource() == UNREACHABLE_GARBAGE; + } + PackBitmapIndex getBitmapIndex(DfsReader ctx) throws IOException { - if (invalid) + if (invalid || isGarbage()) return null; DfsBlockCache.Ref idxref = bitmapIndex; if (idxref != null) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsReader.java index 7d5fb5b2a..401c483d4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsReader.java @@ -118,6 +118,8 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { private boolean wantReadAhead; + private boolean avoidUnreachable; + private List pendingReadAhead; DfsReader(DfsObjDatabase db) { @@ -143,6 +145,11 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { return new DfsReader(db); } + @Override + public void setAvoidUnreachableObjects(boolean avoid) { + avoidUnreachable = avoid; + } + @Override public BitmapIndex getBitmapIndex() throws IOException { for (DfsPackFile pack : db.getPacks()) { @@ -169,8 +176,11 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { throws IOException { if (id.isComplete()) return Collections.singleton(id.toObjectId()); + boolean noGarbage = avoidUnreachable; HashSet matches = new HashSet(4); for (DfsPackFile pack : db.getPacks()) { + if (noGarbage && pack.isGarbage()) + continue; pack.resolve(this, matches, id, 256); if (256 <= matches.size()) break; @@ -182,8 +192,9 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { public boolean has(AnyObjectId objectId) throws IOException { if (last != null && last.hasObject(this, objectId)) return true; + boolean noGarbage = avoidUnreachable; for (DfsPackFile pack : db.getPacks()) { - if (last == pack) + if (pack == last || (noGarbage && pack.isGarbage())) continue; if (pack.hasObject(this, objectId)) { last = pack; @@ -203,8 +214,9 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { return ldr; } + boolean noGarbage = avoidUnreachable; for (DfsPackFile pack : db.getPacks()) { - if (pack == last) + if (pack == last || (noGarbage && pack.isGarbage())) continue; ObjectLoader ldr = pack.get(this, objectId); if (ldr != null) { @@ -265,6 +277,7 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { int lastIdx = 0; DfsPackFile lastPack = packList[lastIdx]; + boolean noGarbage = avoidUnreachable; OBJECT_SCAN: for (T t : objectIds) { try { @@ -281,6 +294,8 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { if (i == lastIdx) continue; DfsPackFile pack = packList[i]; + if (noGarbage && pack.isGarbage()) + continue; try { long p = pack.findOffset(this, t); if (0 < p) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index f0ba0cdc5..12f688658 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -792,6 +792,7 @@ public class UploadPack { sentReady = false; int haveCnt = 0; + walk.getObjectReader().setAvoidUnreachableObjects(true); AsyncRevObjectQueue q = walk.parseAny(peerHas, false); try { for (;;) { @@ -838,6 +839,7 @@ public class UploadPack { } } finally { q.release(); + walk.getObjectReader().setAvoidUnreachableObjects(false); } int missCnt = peerHas.size() - haveCnt;