From f048af3fd19547d3692f5df968571ffd7556b688 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sat, 31 Jul 2010 20:08:10 -0700 Subject: [PATCH] Implement async/batch lookup of object data An ObjectReader implementation may be very slow for a single object, but yet support bulk queries efficiently by batching multiple small requests into a single larger request. This easily happens when the reader is built on top of a database that is stored on another host, as the network round-trip time starts to dominate the operation cost. RevWalk, ObjectWalk, UploadPack and PackWriter are the first major users of this new bulk interface, with the goal being to support an efficient way to pack a repository for a fetch/clone client when the source repository is stored in a high-latency storage system. Processing the want/have lists is now done in bulk, to remove the high costs associated with common ancestor negotiation. PackWriter already performs object reuse selection in bulk, but it now can also do the object size lookup and object counting phases with higher efficiency. Actual object reuse, deltification, and final output are still doing sequential lookups, making them a bit more expensive to perform. Change-Id: I4c966f84917482598012074c370b9831451404ee Signed-off-by: Shawn O. Pearce --- .../jgit/lib/AsyncObjectLoaderQueue.java | 112 +++++++++ .../jgit/lib/AsyncObjectSizeQueue.java | 90 ++++++++ .../org/eclipse/jgit/lib/AsyncOperation.java | 75 +++++++ .../org/eclipse/jgit/lib/ObjectReader.java | 112 +++++++++ .../jgit/revwalk/AsyncRevObjectQueue.java | 70 ++++++ .../src/org/eclipse/jgit/revwalk/RevWalk.java | 165 +++++++++++--- .../jgit/storage/pack/DeltaWindow.java | 11 +- .../jgit/storage/pack/ObjectToPack.java | 15 +- .../eclipse/jgit/storage/pack/PackWriter.java | 178 ++++++++++----- .../eclipse/jgit/transport/UploadPack.java | 212 +++++++++++------- 10 files changed, 864 insertions(+), 176 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectLoaderQueue.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectSizeQueue.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncOperation.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/revwalk/AsyncRevObjectQueue.java diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectLoaderQueue.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectLoaderQueue.java new file mode 100644 index 000000000..a3732eaac --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectLoaderQueue.java @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import java.io.IOException; + +import org.eclipse.jgit.errors.MissingObjectException; + +/** + * Queue to open objects asynchronously. + * + * A queue may perform background decompression of objects and supply them + * (possibly out-of-order) to the application. + * + * @param + * type of identifier supplied to the call that made the queue. + */ +public interface AsyncObjectLoaderQueue extends + AsyncOperation { + + /** + * Position this queue onto the next available result. + * + * Even if this method returns true, {@link #open()} may still throw + * {@link MissingObjectException} if the underlying object database was + * concurrently modified and the current object is no longer available. + * + * @return true if there is a result available; false if the queue has + * finished its input iteration. + * @throws MissingObjectException + * the object does not exist. If the implementation is retaining + * the application's objects {@link #getCurrent()} will be the + * current object that is missing. There may be more results + * still available, so the caller should continue invoking next + * to examine another result. + * @throws IOException + * the object store cannot be accessed. + */ + public boolean next() throws MissingObjectException, IOException; + + /** + * @return the current object, null if the implementation lost track. + * Implementations may for performance reasons discard the caller's + * ObjectId and provider their own through {@link #getObjectId()}. + */ + public T getCurrent(); + + /** @return the ObjectId of the current object. Never null. */ + public ObjectId getObjectId(); + + /** + * Obtain a loader to read the object. + * + * This method can only be invoked once per result + * + * Due to race conditions with a concurrent modification of the underlying + * object database, an object may be unavailable when this method is + * invoked, even though next returned successfully. + * + * @return the ObjectLoader to read this object. Never null. + * @throws MissingObjectException + * the object does not exist. If the implementation is retaining + * the application's objects {@link #getCurrent()} will be the + * current object that is missing. There may be more results + * still available, so the caller should continue invoking next + * to examine another result. + * @throws IOException + * the object store cannot be accessed. + */ + public ObjectLoader open() throws IOException; +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectSizeQueue.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectSizeQueue.java new file mode 100644 index 000000000..c9667242a --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncObjectSizeQueue.java @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import java.io.IOException; + +import org.eclipse.jgit.errors.MissingObjectException; + +/** + * Queue to examine object sizes asynchronously. + * + * A queue may perform background lookup of object sizes and supply them + * (possibly out-of-order) to the application. + * + * @param + * type of identifier supplied to the call that made the queue. + */ +public interface AsyncObjectSizeQueue extends + AsyncOperation { + + /** + * Position this queue onto the next available result. + * + * @return true if there is a result available; false if the queue has + * finished its input iteration. + * @throws MissingObjectException + * the object does not exist. If the implementation is retaining + * the application's objects {@link #getCurrent()} will be the + * current object that is missing. There may be more results + * still available, so the caller should continue invoking next + * to examine another result. + * @throws IOException + * the object store cannot be accessed. + */ + public boolean next() throws MissingObjectException, IOException; + + /** + * @return the current object, null if the implementation lost track. + * Implementations may for performance reasons discard the caller's + * ObjectId and provider their own through {@link #getObjectId()}. + */ + public T getCurrent(); + + /** @return the ObjectId of the current object. Never null. */ + public ObjectId getObjectId(); + + /** @return the size of the current object. */ + public long getSize(); +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncOperation.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncOperation.java new file mode 100644 index 000000000..fb73dc1c5 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/AsyncOperation.java @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +/** + * Asynchronous operation handle. + * + * Callers that start an asynchronous operation are supplied with a handle that + * may be used to attempt cancellation of the operation if the caller does not + * wish to continue. + */ +public interface AsyncOperation { + /** + * Cancels the running task. + * + * Attempts to cancel execution of this task. This attempt will fail if the + * task has already completed, already been cancelled, or could not be + * cancelled for some other reason. If successful, and this task has not + * started when cancel is called, this task should never run. If the task + * has already started, then the mayInterruptIfRunning parameter determines + * whether the thread executing this task should be interrupted in an + * attempt to stop the task. + * + * @param mayInterruptIfRunning + * true if the thread executing this task should be interrupted; + * otherwise, in-progress tasks are allowed to complete + * @return false if the task could not be cancelled, typically because it + * has already completed normally; true otherwise + */ + public boolean cancel(boolean mayInterruptIfRunning); + + /** Release resources used by the operation, including cancellation. */ + public void release(); +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java index e1ee1441d..d4e866a22 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.lib; import java.io.IOException; import java.util.Collection; +import java.util.Iterator; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; @@ -149,6 +150,60 @@ public abstract class ObjectReader { throws MissingObjectException, IncorrectObjectTypeException, IOException; + /** + * Asynchronous object opening. + * + * @param + * type of identifier being supplied. + * @param objectIds + * objects to open from the object store. The supplied collection + * must not be modified until the queue has finished. + * @param reportMissing + * if true missing objects are reported by calling failure with a + * MissingObjectException. This may be more expensive for the + * implementation to guarantee. If false the implementation may + * choose to report MissingObjectException, or silently skip over + * the object with no warning. + * @return queue to read the objects from. + */ + public AsyncObjectLoaderQueue open( + Iterable objectIds, final boolean reportMissing) { + final Iterator idItr = objectIds.iterator(); + return new AsyncObjectLoaderQueue() { + private T cur; + + public boolean next() throws MissingObjectException, IOException { + if (idItr.hasNext()) { + cur = idItr.next(); + return true; + } else { + return false; + } + } + + public T getCurrent() { + return cur; + } + + public ObjectId getObjectId() { + return cur; + } + + public ObjectLoader open() throws IOException { + return ObjectReader.this.open(cur, OBJ_ANY); + } + + public boolean cancel(boolean mayInterruptIfRunning) { + return true; + } + + public void release() { + // Since we are sequential by default, we don't + // have any state to clean up if we terminate early. + } + }; + } + /** * Get only the size of an object. *

@@ -177,6 +232,63 @@ public abstract class ObjectReader { return open(objectId, typeHint).getSize(); } + /** + * Asynchronous object size lookup. + * + * @param + * type of identifier being supplied. + * @param objectIds + * objects to get the size of from the object store. The supplied + * collection must not be modified until the queue has finished. + * @param reportMissing + * if true missing objects are reported by calling failure with a + * MissingObjectException. This may be more expensive for the + * implementation to guarantee. If false the implementation may + * choose to report MissingObjectException, or silently skip over + * the object with no warning. + * @return queue to read object sizes from. + */ + public AsyncObjectSizeQueue getObjectSize( + Iterable objectIds, final boolean reportMissing) { + final Iterator idItr = objectIds.iterator(); + return new AsyncObjectSizeQueue() { + private T cur; + + private long sz; + + public boolean next() throws MissingObjectException, IOException { + if (idItr.hasNext()) { + cur = idItr.next(); + sz = getObjectSize(cur, OBJ_ANY); + return true; + } else { + return false; + } + } + + public T getCurrent() { + return cur; + } + + public ObjectId getObjectId() { + return cur; + } + + public long getSize() { + return sz; + } + + public boolean cancel(boolean mayInterruptIfRunning) { + return true; + } + + public void release() { + // Since we are sequential by default, we don't + // have any state to clean up if we terminate early. + } + }; + } + /** * Advice from a {@link RevWalk} that a walk is starting from these roots. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/AsyncRevObjectQueue.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/AsyncRevObjectQueue.java new file mode 100644 index 000000000..1c0438a5e --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/AsyncRevObjectQueue.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.revwalk; + +import java.io.IOException; + +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AsyncOperation; + +/** + * Queue to lookup and parse objects asynchronously. + * + * A queue may perform background lookup of objects and supply them (possibly + * out-of-order) to the application. + */ +public interface AsyncRevObjectQueue extends AsyncOperation { + /** + * Obtain the next object. + * + * @return the object; null if there are no more objects remaining. + * @throws MissingObjectException + * the object does not exist. There may be more objects + * remaining in the iteration, the application should call + * {@link #next()} again. + * @throws IOException + * the object store cannot be accessed. + */ + public RevObject next() throws MissingObjectException, IOException; +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java index 5f8fd5c8a..e39f8b325 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/RevWalk.java @@ -50,19 +50,23 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.Iterator; +import java.util.List; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.RevWalkException; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.AsyncObjectLoaderQueue; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdSubclassMap; import org.eclipse.jgit.lib.ObjectLoader; -import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.filter.RevFilter; import org.eclipse.jgit.treewalk.filter.TreeFilter; @@ -683,6 +687,18 @@ public class RevWalk implements Iterable { return r; } + /** + * Locate an object that was previously allocated in this walk. + * + * @param id + * name of the object. + * @return reference to the object if it has been previously located; + * otherwise null. + */ + public RevObject lookupOrNull(AnyObjectId id) { + return objects.get(id); + } + /** * Locate a reference to a commit and immediately parse its content. *

@@ -789,39 +805,126 @@ public class RevWalk implements Iterable { public RevObject parseAny(final AnyObjectId id) throws MissingObjectException, IOException { RevObject r = objects.get(id); - if (r == null) { - final ObjectLoader ldr = reader.open(id); - final int type = ldr.getType(); - switch (type) { - case Constants.OBJ_COMMIT: { - final RevCommit c = createCommit(id); - c.parseCanonical(this, ldr.getCachedBytes()); - r = c; - break; - } - case Constants.OBJ_TREE: { - r = new RevTree(id); - r.flags |= PARSED; - break; - } - case Constants.OBJ_BLOB: { - r = new RevBlob(id); - r.flags |= PARSED; - break; + if (r == null) + r = parseNew(id, reader.open(id)); + else + parseHeaders(r); + return r; + } + + private RevObject parseNew(AnyObjectId id, ObjectLoader ldr) + throws CorruptObjectException, LargeObjectException { + RevObject r; + int type = ldr.getType(); + switch (type) { + case Constants.OBJ_COMMIT: { + final RevCommit c = createCommit(id); + c.parseCanonical(this, ldr.getCachedBytes()); + r = c; + break; + } + case Constants.OBJ_TREE: { + r = new RevTree(id); + r.flags |= PARSED; + break; + } + case Constants.OBJ_BLOB: { + r = new RevBlob(id); + r.flags |= PARSED; + break; + } + case Constants.OBJ_TAG: { + final RevTag t = new RevTag(id); + t.parseCanonical(this, ldr.getCachedBytes()); + r = t; + break; + } + default: + throw new IllegalArgumentException(MessageFormat.format(JGitText + .get().badObjectType, type)); + } + objects.add(r); + return r; + } + + /** + * Asynchronous object parsing. + * + * @param + * any ObjectId type. + * @param objectIds + * objects to open from the object store. The supplied collection + * must not be modified until the queue has finished. + * @param reportMissing + * if true missing objects are reported by calling failure with a + * MissingObjectException. This may be more expensive for the + * implementation to guarantee. If false the implementation may + * choose to report MissingObjectException, or silently skip over + * the object with no warning. + * @return queue to read the objects from. + */ + public AsyncRevObjectQueue parseAny( + Iterable objectIds, boolean reportMissing) { + List need = new ArrayList(); + List have = new ArrayList(); + for (T id : objectIds) { + RevObject r = objects.get(id); + if (r != null && (r.flags & PARSED) != 0) + have.add(r); + else + need.add(id); + } + + final Iterator objItr = have.iterator(); + if (need.isEmpty()) { + return new AsyncRevObjectQueue() { + public RevObject next() { + return objItr.hasNext() ? objItr.next() : null; + } + + public boolean cancel(boolean mayInterruptIfRunning) { + return true; + } + + public void release() { + // In-memory only, no action required. + } + }; + } + + final AsyncObjectLoaderQueue lItr = reader.open(need, reportMissing); + return new AsyncRevObjectQueue() { + public RevObject next() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + if (objItr.hasNext()) + return objItr.next(); + if (!lItr.next()) + return null; + + ObjectId id = lItr.getObjectId(); + ObjectLoader ldr = lItr.open(); + RevObject r = objects.get(id); + if (r == null) + r = parseNew(id, ldr); + else if (r instanceof RevCommit) { + byte[] raw = ldr.getCachedBytes(); + ((RevCommit) r).parseCanonical(RevWalk.this, raw); + } else if (r instanceof RevTag) { + byte[] raw = ldr.getCachedBytes(); + ((RevTag) r).parseCanonical(RevWalk.this, raw); + } else + r.flags |= PARSED; + return r; } - case Constants.OBJ_TAG: { - final RevTag t = new RevTag(id); - t.parseCanonical(this, ldr.getCachedBytes()); - r = t; - break; + + public boolean cancel(boolean mayInterruptIfRunning) { + return lItr.cancel(mayInterruptIfRunning); } - default: - throw new IllegalArgumentException(MessageFormat.format(JGitText.get().badObjectType, type)); + + public void release() { + lItr.release(); } - objects.add(r); - } else - parseHeaders(r); - return r; + }; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java index c96105650..d5296a03e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java @@ -143,11 +143,8 @@ class DeltaWindow { } res.set(toSearch[off]); - if (res.object.isDoNotDelta()) { - // PackWriter marked edge objects with the - // do-not-delta flag. They are the only ones - // that appear in toSearch with it set, but - // we don't actually want to make a delta for + if (res.object.isEdge()) { + // We don't actually want to make a delta for // them, just need to push them into the window // so they can be read by other objects. // @@ -211,7 +208,7 @@ class DeltaWindow { // ObjectToPack srcObj = window[bestSlot].object; ObjectToPack resObj = res.object; - if (srcObj.isDoNotDelta()) { + if (srcObj.isEdge()) { // The source (the delta base) is an edge object outside of the // pack. Its part of the common base set that the peer already // has on hand, so we don't want to send it. We have to store @@ -280,7 +277,7 @@ class DeltaWindow { dropFromWindow(srcSlot); return NEXT_SRC; } catch (IOException notAvailable) { - if (src.object.isDoNotDelta()) { + if (src.object.isEdge()) { // This is an edge that is suddenly not available. dropFromWindow(srcSlot); return NEXT_SRC; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectToPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectToPack.java index 3161668be..047fa8e6c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectToPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectToPack.java @@ -64,6 +64,8 @@ public class ObjectToPack extends PackedObjectInfo { private static final int DO_NOT_DELTA = 1 << 2; + private static final int EDGE = 1 << 3; + private static final int TYPE_SHIFT = 5; private static final int DELTA_SHIFT = 8; @@ -79,7 +81,8 @@ public class ObjectToPack extends PackedObjectInfo { *

  • 1 bit: wantWrite
  • *
  • 1 bit: canReuseAsIs
  • *
  • 1 bit: doNotDelta
  • - *
  • 2 bits: unused
  • + *
  • 1 bit: edgeObject
  • + *
  • 1 bit: unused
  • *
  • 3 bits: type
  • *
  • --
  • *
  • 24 bits: deltaDepth
  • @@ -243,6 +246,14 @@ public class ObjectToPack extends PackedObjectInfo { flags &= ~DO_NOT_DELTA; } + boolean isEdge() { + return (flags & EDGE) != 0; + } + + void setEdge() { + flags |= EDGE; + } + int getFormat() { if (isReuseAsIs()) { if (isDeltaRepresentation()) @@ -305,6 +316,8 @@ public class ObjectToPack extends PackedObjectInfo { buf.append(" reuseAsIs"); if (isDoNotDelta()) buf.append(" doNotDelta"); + if (isEdge()) + buf.append(" edge"); if (getDeltaDepth() > 0) buf.append(" depth=" + getDeltaDepth()); if (isDeltaRepresentation()) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index 5f6bf4bfc..41370c1de 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -56,8 +56,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; @@ -75,6 +77,7 @@ import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.AsyncObjectSizeQueue; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; @@ -84,6 +87,7 @@ import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; +import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; @@ -544,20 +548,66 @@ public class PackWriter { // them in the search phase below. // for (ObjectToPack eo : edgeObjects) { - try { - if (loadSize(eo)) - list[cnt++] = eo; - } catch (IOException notAvailable) { - // Skip this object. Since we aren't going to write it out - // the only consequence of it being unavailable to us is we - // may produce a larger data stream than we could have. - // - if (!ignoreMissingUninteresting) - throw notAvailable; - } + eo.setWeight(0); + list[cnt++] = eo; } + // Compute the sizes of the objects so we can do a proper sort. + // We let the reader skip missing objects if it chooses. For + // some readers this can be a huge win. We detect missing objects + // by having set the weights above to 0 and allowing the delta + // search code to discover the missing object and skip over it, or + // abort with an exception if we actually had to have it. + // monitor.beginTask(JGitText.get().compressingObjects, cnt); + AsyncObjectSizeQueue sizeQueue = reader.getObjectSize( + Arrays. asList(list).subList(0, cnt), false); + try { + final long limit = config.getBigFileThreshold(); + for (;;) { + monitor.update(1); + + try { + if (!sizeQueue.next()) + break; + } catch (MissingObjectException notFound) { + if (ignoreMissingUninteresting) { + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp != null && otp.isEdge()) { + otp.setDoNotDelta(true); + continue; + } + + otp = edgeObjects.get(notFound.getObjectId()); + if (otp != null) { + otp.setDoNotDelta(true); + continue; + } + } + throw notFound; + } + + ObjectToPack otp = sizeQueue.getCurrent(); + if (otp == null) { + otp = objectsMap.get(sizeQueue.getObjectId()); + if (otp == null) + otp = edgeObjects.get(sizeQueue.getObjectId()); + } + + long sz = sizeQueue.getSize(); + if (limit <= sz || Integer.MAX_VALUE <= sz) + otp.setDoNotDelta(true); // too big, avoid costly files + + else if (sz <= DeltaIndex.BLKSZ) + otp.setDoNotDelta(true); // too small, won't work + + else + otp.setWeight((int) sz); + } + } finally { + sizeQueue.release(); + } + monitor.endTask(); // Sort the objects by path hash so like files are near each other, // and then by size descending so that bigger files are first. This @@ -566,52 +616,51 @@ public class PackWriter { // Arrays.sort(list, 0, cnt, new Comparator() { public int compare(ObjectToPack a, ObjectToPack b) { - int cmp = a.getType() - b.getType(); - if (cmp == 0) - cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1); - if (cmp == 0) - cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1); - if (cmp == 0) - cmp = b.getWeight() - a.getWeight(); - return cmp; + int cmp = (a.isDoNotDelta() ? 1 : 0) + - (b.isDoNotDelta() ? 1 : 0); + if (cmp != 0) + return cmp; + + cmp = a.getType() - b.getType(); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() >>> 1) - (b.getPathHash() >>> 1); + if (cmp != 0) + return cmp; + + cmp = (a.getPathHash() & 1) - (b.getPathHash() & 1); + if (cmp != 0) + return cmp; + + return b.getWeight() - a.getWeight(); } }); + + // Above we stored the objects we cannot delta onto the end. + // Remove them from the list so we don't waste time on them. + while (0 < cnt && list[cnt - 1].isDoNotDelta()) + cnt--; + if (cnt == 0) + return; + + monitor.beginTask(JGitText.get().compressingObjects, cnt); searchForDeltas(monitor, list, cnt); monitor.endTask(); } - private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) - throws MissingObjectException, IncorrectObjectTypeException, - IOException { + private int findObjectsNeedingDelta(ObjectToPack[] list, int cnt, int type) { for (ObjectToPack otp : objectsLists[type]) { if (otp.isDoNotDelta()) // delta is disabled for this path continue; if (otp.isDeltaRepresentation()) // already reusing a delta continue; - if (loadSize(otp)) - list[cnt++] = otp; + otp.setWeight(0); + list[cnt++] = otp; } return cnt; } - private boolean loadSize(ObjectToPack e) throws MissingObjectException, - IncorrectObjectTypeException, IOException { - long sz = reader.getObjectSize(e, e.getType()); - - // If its too big for us to handle, skip over it. - // - if (config.getBigFileThreshold() <= sz || Integer.MAX_VALUE <= sz) - return false; - - // If its too tiny for the delta compression to work, skip it. - // - if (sz <= DeltaIndex.BLKSZ) - return false; - - e.setWeight((int) sz); - return true; - } - private void searchForDeltas(final ProgressMonitor monitor, final ObjectToPack[] list, final int cnt) throws MissingObjectException, IncorrectObjectTypeException, @@ -963,28 +1012,45 @@ public class PackWriter { final Collection uninterestingObjects) throws MissingObjectException, IOException, IncorrectObjectTypeException { + List all = new ArrayList(interestingObjects.size()); + for (ObjectId id : interestingObjects) + all.add(id.copy()); + + final Set not; + if (uninterestingObjects != null && !uninterestingObjects.isEmpty()) { + not = new HashSet(); + for (ObjectId id : uninterestingObjects) + not.add(id.copy()); + all.addAll(not); + } else + not = Collections.emptySet(); + final ObjectWalk walker = new ObjectWalk(reader); walker.setRetainBody(false); walker.sort(RevSort.COMMIT_TIME_DESC); - if (thin) + if (thin && !not.isEmpty()) walker.sort(RevSort.BOUNDARY, true); - for (ObjectId id : interestingObjects) { - RevObject o = walker.parseAny(id); - walker.markStart(o); - } - if (uninterestingObjects != null) { - for (ObjectId id : uninterestingObjects) { - final RevObject o; + AsyncRevObjectQueue q = walker.parseAny(all, true); + try { + for (;;) { try { - o = walker.parseAny(id); - } catch (MissingObjectException x) { - if (ignoreMissingUninteresting) + RevObject o = q.next(); + if (o == null) + break; + if (not.contains(o.copy())) + walker.markUninteresting(o); + else + walker.markStart(o); + } catch (MissingObjectException e) { + if (ignoreMissingUninteresting + && not.contains(e.getObjectId())) continue; - throw x; + throw e; } - walker.markUninteresting(o); } + } finally { + q.release(); } return walker; } @@ -1032,7 +1098,7 @@ public class PackWriter { case Constants.OBJ_BLOB: ObjectToPack otp = new ObjectToPack(object); otp.setPathHash(pathHashCode); - otp.setDoNotDelta(true); + otp.setEdge(); edgeObjects.add(otp); thin = true; break; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java index 16d56df66..0d1c0355a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java @@ -50,7 +50,6 @@ import java.io.OutputStream; import java.text.MessageFormat; import java.util.ArrayList; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -63,6 +62,7 @@ import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevFlagSet; @@ -145,9 +145,6 @@ public class UploadPack { /** Objects the client wants to obtain. */ private final List wantAll = new ArrayList(); - /** Objects the client wants to obtain. */ - private final List wantCommits = new ArrayList(); - /** Objects on both sides, these don't have to be sent. */ private final List commonBase = new ArrayList(); @@ -166,6 +163,9 @@ public class UploadPack { /** Marked on objects in {@link #commonBase}. */ private final RevFlag COMMON; + /** Objects where we found a path from the want list to a common base. */ + private final RevFlag SATISFIED; + private final RevFlagSet SAVE; private MultiAck multiAck = MultiAck.OFF; @@ -185,6 +185,7 @@ public class UploadPack { WANT = walk.newFlag("WANT"); PEER_HAS = walk.newFlag("PEER_HAS"); COMMON = walk.newFlag("COMMON"); + SATISFIED = walk.newFlag("SATISFIED"); walk.carry(PEER_HAS); SAVE = new RevFlagSet(); @@ -376,8 +377,9 @@ public class UploadPack { } private void recvWants() throws IOException { + HashSet wantIds = new HashSet(); boolean isFirst = true; - for (;; isFirst = false) { + for (;;) { String line; try { line = pckIn.readString(); @@ -401,41 +403,54 @@ public class UploadPack { line = line.substring(0, 45); } - final ObjectId id = ObjectId.fromString(line.substring(5)); - final RevObject o; - try { - o = walk.parseAny(id); - } catch (IOException e) { - throw new PackProtocolException(MessageFormat.format(JGitText.get().notValid, id.name()), e); - } - if (!o.has(ADVERTISED)) - throw new PackProtocolException(MessageFormat.format(JGitText.get().notValid, id.name())); - try { - want(o); - } catch (IOException e) { - throw new PackProtocolException(MessageFormat.format(JGitText.get().notValid, id.name()), e); - } + wantIds.add(ObjectId.fromString(line.substring(5))); + isFirst = false; } - } - private void want(RevObject o) throws MissingObjectException, IOException { - if (!o.has(WANT)) { - o.add(WANT); - wantAll.add(o); - - if (o instanceof RevCommit) - wantCommits.add((RevCommit) o); + if (wantIds.isEmpty()) + return; - else if (o instanceof RevTag) { - o = walk.peel(o); - if (o instanceof RevCommit) - want(o); + AsyncRevObjectQueue q = walk.parseAny(wantIds, true); + try { + for (;;) { + RevObject o; + try { + o = q.next(); + } catch (IOException error) { + throw new PackProtocolException(MessageFormat.format( + JGitText.get().notValid, error.getMessage()), error); + } + if (o == null) + break; + if (o.has(WANT)) { + // Already processed, the client repeated itself. + + } else if (o.has(ADVERTISED)) { + o.add(WANT); + wantAll.add(o); + + if (o instanceof RevTag) { + o = walk.peel(o); + if (o instanceof RevCommit) { + if (!o.has(WANT)) { + o.add(WANT); + wantAll.add(o); + } + } + } + } else { + throw new PackProtocolException(MessageFormat.format( + JGitText.get().notValid, o.name())); + } } + } finally { + q.release(); } } private boolean negotiate() throws IOException { ObjectId last = ObjectId.zeroId(); + List peerHas = new ArrayList(64); for (;;) { String line; try { @@ -445,6 +460,7 @@ public class UploadPack { } if (line == PacketLineIn.END) { + last = processHaveLines(peerHas, last); if (commonBase.isEmpty() || multiAck != MultiAck.OFF) pckOut.writeString("NAK\n"); if (!biDirectionalPipe) @@ -452,39 +468,11 @@ public class UploadPack { pckOut.flush(); } else if (line.startsWith("have ") && line.length() == 45) { - final ObjectId id = ObjectId.fromString(line.substring(5)); - if (matchHave(id)) { - // Both sides have the same object; let the client know. - // - last = id; - switch (multiAck) { - case OFF: - if (commonBase.size() == 1) - pckOut.writeString("ACK " + id.name() + "\n"); - break; - case CONTINUE: - pckOut.writeString("ACK " + id.name() + " continue\n"); - break; - case DETAILED: - pckOut.writeString("ACK " + id.name() + " common\n"); - break; - } - } else if (okToGiveUp()) { - // They have this object; we don't. - // - switch (multiAck) { - case OFF: - break; - case CONTINUE: - pckOut.writeString("ACK " + id.name() + " continue\n"); - break; - case DETAILED: - pckOut.writeString("ACK " + id.name() + " ready\n"); - break; - } - } + peerHas.add(ObjectId.fromString(line.substring(5))); } else if (line.equals("done")) { + last = processHaveLines(peerHas, last); + if (commonBase.isEmpty()) pckOut.writeString("NAK\n"); @@ -499,21 +487,76 @@ public class UploadPack { } } - private boolean matchHave(final ObjectId id) { - final RevObject o; + private ObjectId processHaveLines(List peerHas, ObjectId last) + throws IOException { + if (peerHas.isEmpty()) + return last; + + // If both sides have the same object; let the client know. + // + AsyncRevObjectQueue q = walk.parseAny(peerHas, false); try { - o = walk.parseAny(id); - } catch (IOException err) { - return false; + for (;;) { + RevObject obj; + try { + obj = q.next(); + } catch (MissingObjectException notFound) { + continue; + } + if (obj == null) + break; + + last = obj; + if (obj.has(PEER_HAS)) + continue; + + obj.add(PEER_HAS); + if (obj instanceof RevCommit) + ((RevCommit) obj).carry(PEER_HAS); + addCommonBase(obj); + + switch (multiAck) { + case OFF: + if (commonBase.size() == 1) + pckOut.writeString("ACK " + obj.name() + "\n"); + break; + case CONTINUE: + pckOut.writeString("ACK " + obj.name() + " continue\n"); + break; + case DETAILED: + pckOut.writeString("ACK " + obj.name() + " common\n"); + break; + } + } + } finally { + q.release(); } - if (!o.has(PEER_HAS)) { - o.add(PEER_HAS); - if (o instanceof RevCommit) - ((RevCommit) o).carry(PEER_HAS); - addCommonBase(o); + // If we don't have one of the objects but we're also willing to + // create a pack at this point, let the client know so it stops + // telling us about its history. + // + for (int i = peerHas.size() - 1; i >= 0; i--) { + ObjectId id = peerHas.get(i); + if (walk.lookupOrNull(id) == null) { + if (okToGiveUp()) { + switch (multiAck) { + case OFF: + break; + case CONTINUE: + pckOut.writeString("ACK " + id.name() + " continue\n"); + break; + case DETAILED: + pckOut.writeString("ACK " + id.name() + " ready\n"); + break; + } + } + break; + } } - return true; + + peerHas.clear(); + return last; } private void addCommonBase(final RevObject o) { @@ -535,27 +578,34 @@ public class UploadPack { return false; try { - for (final Iterator i = wantCommits.iterator(); i - .hasNext();) { - final RevCommit want = i.next(); - if (wantSatisfied(want)) - i.remove(); + for (RevObject obj : wantAll) { + if (wantSatisfied(obj)) + return false; } + return true; } catch (IOException e) { throw new PackProtocolException(JGitText.get().internalRevisionError, e); } - return wantCommits.isEmpty(); } - private boolean wantSatisfied(final RevCommit want) throws IOException { + private boolean wantSatisfied(final RevObject want) throws IOException { + if (want.has(SATISFIED)) + return true; + + if (!(want instanceof RevCommit)) { + want.add(SATISFIED); + return true; + } + walk.resetRetain(SAVE); - walk.markStart(want); + walk.markStart((RevCommit) want); for (;;) { final RevCommit c = walk.next(); if (c == null) break; if (c.has(PEER_HAS)) { addCommonBase(c); + want.add(SATISFIED); return true; } }