Browse Source

Teach PackWriter how to reuse an existing object list

Counting the objects needed for packing is the most expensive part of
an UploadPack request that has no uninteresting objects (otherwise
known as an initial clone).  During this phase the PackWriter is
enumerating the entire set of objects in this repository, so they can
be sent to the client for their new clone.

Allow the ObjectReader (and therefore the underlying storage system)
to keep a cached list of all reachable objects from a small number of
points in the project's history.  If one of those points is reached
during enumeration of the commit graph, most objects are obtained from
the cached list instead of direct traversal.

PackWriter uses the list by discarding the current object lists and
restarting a traversal from all refs but marking the object list name
as uninteresting.  This allows PackWriter to enumerate all objects
that are more recent than the list creation, or that were on side
branches that the list does not include.

However, ObjectWalk tags all of the trees and commits within the list
commit as UNINTERESTING, which would normally cause PackWriter to
construct a thin pack that excludes these objects.  To avoid that,
addObject() was refactored to allow this list-based enumeration to
always include an object, even if it has been tagged UNINTERESTING by
the ObjectWalk.  This implies the list-based enumeration may only be
used for initial clones, where all objects are being sent.

The UNINTERESTING labeling occurs because StartGenerator always
enables the BoundaryGenerator if the walker is an ObjectWalk and a
commit was marked UNINTERESTING, even if RevSort.BOUNDARY was not
enabled.  This is the default reasonable behavior for an ObjectWalk,
but isn't desired here in PackWriter with the list-based enumeration.
Rather than trying to change all of this behavior, PackWriter works
around it.

Because the list name commit's immediate files and trees were all
enumerated before the list enumeration itself starts (and are also
within the list itself) PackWriter runs the risk of adding the same
objects to its ObjectIdSubclassMap twice.  Since this breaks the
internal map data structure (and also may cause the object to transmit
twice), PackWriter needs to use a new "added" RevFlag to track whether
or not an object has been put into the outgoing list yet.

Change-Id: Ie99ed4d969a6bb20cc2528ac6b8fb91043cee071
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.11
Shawn O. Pearce 14 years ago
parent
commit
f5fe2dca3c
  1. 32
      org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java
  2. 140
      org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java
  3. 120
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

32
org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectReader.java

@ -46,11 +46,14 @@ package org.eclipse.jgit.lib;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.revwalk.ObjectListIterator;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
@ -427,6 +430,35 @@ public abstract class ObjectReader {
// Do nothing by default, most readers don't want or need advice.
}
/**
* Obtain the available pre-computed object reachability lists.
* <p>
* The lists are indexed by commit ObjectId, so the returned set contains
* the commit ObjectIds naming each set.
*
* @return set of commit ObjectIds that identify lists.
*/
public Set<ObjectId> getAvailableObjectLists() {
return Collections.emptySet();
}
/**
* Open a pre-computed object list for reading.
*
* @param listName
* a commit ObjectId previously returned by
* {@link #getAvailableObjectLists()}.
* @param walker
* the revision pool to use when looking up objects.
* @return the list iterator.
* @throws IOException
* the reader cannot load the precomputed list.
*/
public ObjectListIterator openObjectList(AnyObjectId listName,
ObjectWalk walker) throws IOException {
throw new UnsupportedOperationException();
}
/**
* Release any resources used by this reader.
* <p>

140
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectListIterator.java

@ -0,0 +1,140 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.revwalk;
import java.io.IOException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.storage.pack.PackWriter;
/**
* Iterates through an open object list.
* <p>
* A cached object list should be constructed by enumerating from a single
* stable commit back to the beginning of the project, using an ObjectWalk:
*
* <pre>
* ObjectWalk walk = new ObjectWalk(repository);
* walk.markStart(walk.parseCommit(listName));
*
* RevCommit commit;
* while ((commit = walk.next()) != null)
* list.addCommit(commit);
*
* RevObject object;
* while ((object == walk.nextObject()) != null)
* list.addObject(object, walk.getPathHasCode());
* </pre>
* <p>
* {@link PackWriter} relies on the list covering a single commit, and going all
* the way back to the root. If a list contains multiple starting commits the
* PackWriter will include all of those objects, even if the client did not ask
* for them, or should not have been given the objects.
*/
public abstract class ObjectListIterator {
private final ObjectWalk walk;
/**
* Initialize the list iterator.
*
* @param walk
* the revision pool the iterator will use when allocating the
* returned objects.
*/
protected ObjectListIterator(ObjectWalk walk) {
this.walk = walk;
}
/**
* Lookup an object from the revision pool.
*
* @param id
* the object to allocate.
* @param type
* the type of the object. The type must be accurate, as it is
* used to allocate the proper RevObject instance.
* @return the object.
*/
protected RevObject lookupAny(AnyObjectId id, int type) {
return walk.lookupAny(id, type);
}
/**
* Pop the next most recent commit.
* <p>
* Commits should be returned in descending commit time order, or in
* topological order. Either ordering is acceptable for a list to use.
*
* @return next most recent commit; null if traversal is over.
* @throws IOException
* the list cannot be read.
*/
public abstract RevCommit next() throws IOException;
/**
* Pop the next most recent object.
* <p>
* Only RevTree and RevBlob may be returned from this method, as these are
* the only non-commit types reachable from a RevCommit. Lists may return
* the objects clustered by type, or clustered by order of first-discovery
* when walking from the most recent to the oldest commit.
*
* @return the next object. Null at the end of the list.
* @throws IOException
* the list cannot be read.
*/
public abstract RevObject nextObject() throws IOException;
/**
* Get the current object's path hash code.
* <p>
* The path hash code should be cached from the ObjectWalk.
*
* @return path hash code; any integer may be returned.
*/
public abstract int getPathHashCode();
/** Release the resources associated with this iterator. */
public abstract void release();
}

120
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -86,6 +86,7 @@ import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.ThreadSafeProgressMonitor;
import org.eclipse.jgit.revwalk.AsyncRevObjectQueue;
import org.eclipse.jgit.revwalk.ObjectListIterator;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevObject;
@ -376,9 +377,8 @@ public class PackWriter {
throws IOException {
if (countingMonitor == null)
countingMonitor = NullProgressMonitor.INSTANCE;
ObjectWalk walker = setUpWalker(interestingObjects,
findObjectsToPack(countingMonitor, interestingObjects,
uninterestingObjects);
findObjectsToPack(countingMonitor, walker);
}
/**
@ -972,11 +972,14 @@ public class PackWriter {
out.write(packcsum);
}
private ObjectWalk setUpWalker(
private void findObjectsToPack(final ProgressMonitor countingMonitor,
final Collection<? extends ObjectId> interestingObjects,
final Collection<? extends ObjectId> uninterestingObjects)
throws MissingObjectException, IOException,
IncorrectObjectTypeException {
countingMonitor.beginTask(JGitText.get().countingObjects,
ProgressMonitor.UNKNOWN);
List<ObjectId> all = new ArrayList<ObjectId>(interestingObjects.size());
for (ObjectId id : interestingObjects)
all.add(id.copy());
@ -991,13 +994,18 @@ public class PackWriter {
not = Collections.emptySet();
final ObjectWalk walker = new ObjectWalk(reader);
final RevFlag hasObjectList = walker.newFlag("hasObjectList");
walker.setRetainBody(false);
if (not.isEmpty())
if (not.isEmpty()) {
walker.sort(RevSort.COMMIT_TIME_DESC);
else
for (ObjectId listName : reader.getAvailableObjectLists())
walker.lookupCommit(listName).add(hasObjectList);
} else {
walker.sort(RevSort.TOPO);
if (thin && !not.isEmpty())
walker.sort(RevSort.BOUNDARY, true);
if (thin)
walker.sort(RevSort.BOUNDARY, true);
}
AsyncRevObjectQueue q = walker.parseAny(all, true);
try {
@ -1020,27 +1028,93 @@ public class PackWriter {
} finally {
q.release();
}
return walker;
}
private void findObjectsToPack(final ProgressMonitor countingMonitor,
final ObjectWalk walker) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
countingMonitor.beginTask(JGitText.get().countingObjects,
ProgressMonitor.UNKNOWN);
RevObject listName = null;
RevObject o;
while ((o = walker.next()) != null) {
addObject(o, 0);
if (o.has(hasObjectList)) {
listName = o;
break;
}
addResultOrBase(o, 0);
countingMonitor.update(1);
}
while ((o = walker.nextObject()) != null) {
addObject(o, walker.getPathHashCode());
countingMonitor.update(1);
if (listName != null) {
addByObjectList(listName, countingMonitor, walker,
interestingObjects);
} else {
while ((o = walker.nextObject()) != null) {
addResultOrBase(o, walker.getPathHashCode());
countingMonitor.update(1);
}
}
countingMonitor.endTask();
}
private void addByObjectList(RevObject listName,
final ProgressMonitor countingMonitor, final ObjectWalk walker,
final Collection<? extends ObjectId> interestingObjects)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
int restartedProgress = objectsMap.size();
for (List<ObjectToPack> list : objectsLists)
list.clear();
objectsMap.clear();
walker.reset();
walker.markUninteresting(listName);
for (ObjectId id : interestingObjects)
walker.markStart(walker.lookupOrNull(id));
RevFlag added = walker.newFlag("added");
RevObject o;
while ((o = walker.next()) != null) {
addResult(o, 0);
o.add(added);
if (restartedProgress != 0)
restartedProgress--;
else
countingMonitor.update(1);
}
while ((o = walker.nextObject()) != null) {
addResult(o, walker.getPathHashCode());
o.add(added);
if (restartedProgress != 0)
restartedProgress--;
else
countingMonitor.update(1);
}
ObjectListIterator itr = reader.openObjectList(listName, walker);
try {
while ((o = itr.next()) != null) {
if (o.has(added))
continue;
addResult(o, 0);
o.add(added);
if (restartedProgress != 0)
restartedProgress--;
else
countingMonitor.update(1);
}
while ((o = itr.nextObject()) != null) {
if (o.has(added))
continue;
addResult(o, itr.getPathHashCode());
o.add(added);
if (restartedProgress != 0)
restartedProgress--;
else
countingMonitor.update(1);
}
} finally {
itr.release();
}
}
/**
* Include one object to the output file.
* <p>
@ -1055,10 +1129,10 @@ public class PackWriter {
*/
public void addObject(final RevObject object)
throws IncorrectObjectTypeException {
addObject(object, 0);
addResultOrBase(object, 0);
}
private void addObject(final RevObject object, final int pathHashCode)
private void addResultOrBase(final RevObject object, final int pathHashCode)
throws IncorrectObjectTypeException {
if (object.has(RevFlag.UNINTERESTING)) {
switch (object.getType()) {
@ -1071,9 +1145,13 @@ public class PackWriter {
thin = true;
break;
}
return;
} else {
addResult(object, pathHashCode);
}
}
private void addResult(final RevObject object, final int pathHashCode)
throws IncorrectObjectTypeException {
final ObjectToPack otp;
if (reuseSupport != null)
otp = reuseSupport.newObjectToPack(object);

Loading…
Cancel
Save