diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java index d0461640f..ee2ebc1b2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java @@ -47,13 +47,14 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Set; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.storage.pack.CachedPack; +import org.eclipse.jgit.storage.pack.ObjectToPack; import org.eclipse.jgit.storage.pack.PackOutputStream; +import org.eclipse.jgit.storage.pack.StoredObjectRepresentation; class LocalCachedPack extends CachedPack { private final ObjectDirectory odb; @@ -62,6 +63,8 @@ class LocalCachedPack extends CachedPack { private final String[] packNames; + private PackFile[] packs; + LocalCachedPack(ObjectDirectory odb, Set tips, List packNames) { this.odb = odb; @@ -82,34 +85,39 @@ class LocalCachedPack extends CachedPack { @Override public long getObjectCount() throws IOException { long cnt = 0; - for (String packName : packNames) - cnt += getPackFile(packName).getObjectCount(); + for (PackFile pack : getPacks()) + cnt += pack.getObjectCount(); return cnt; } void copyAsIs(PackOutputStream out, boolean validate, WindowCursor wc) throws IOException { - for (String packName : packNames) - getPackFile(packName).copyPackAsIs(out, validate, wc); + for (PackFile pack : getPacks()) + pack.copyPackAsIs(out, validate, wc); } @Override - public Set hasObject(Iterable toFind) - throws IOException { - PackFile[] packs = new PackFile[packNames.length]; - for (int i = 0; i < packNames.length; i++) - packs[i] = getPackFile(packNames[i]); - - Set have = new HashSet(); - for (ObjectId id : toFind) { - for (PackFile pack : packs) { - if (pack.hasObject(id)) { - have.add(id); - break; - } + public boolean hasObject(ObjectToPack obj, StoredObjectRepresentation rep) { + try { + LocalObjectRepresentation local = (LocalObjectRepresentation) rep; + for (PackFile pack : getPacks()) { + if (local.pack == pack) + return true; } + return false; + } catch (FileNotFoundException packGone) { + return false; + } + } + + private PackFile[] getPacks() throws FileNotFoundException { + if (packs == null) { + PackFile[] p = new PackFile[packNames.length]; + for (int i = 0; i < packNames.length; i++) + p[i] = getPackFile(packNames[i]); + packs = p; } - return have; + return packs; } private PackFile getPackFile(String packName) throws FileNotFoundException { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java index 580fa7841..7199c5b7e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java @@ -93,16 +93,27 @@ public abstract class CachedPack { } /** - * Determine if the pack contains the requested objects. + * Determine if this pack contains the object representation given. + *

+ * PackWriter uses this method during the finding sources phase to prune + * away any objects from the leading thin-pack that already appear within + * this pack and should not be sent twice. + *

+ * Implementors are strongly encouraged to rely on looking at {@code rep} + * only and using its internal state to decide if this object is within this + * pack. Implementors should ensure a representation from this cached pack + * is tested as part of + * {@link ObjectReuseAsIs#selectObjectRepresentation(PackWriter, org.eclipse.jgit.lib.ProgressMonitor, Iterable)} + * , ensuring this method would eventually return true if the object would + * be included by this cached pack. * - * @param - * any type of ObjectId to search for. - * @param toFind - * the objects to search for. - * @return the objects contained in the pack. - * @throws IOException - * the pack cannot be accessed + * @param obj + * the object being packed. Can be used as an ObjectId. + * @param rep + * representation from the {@link ObjectReuseAsIs} instance that + * originally supplied this CachedPack. + * @return true if this pack contains this object. */ - public abstract Set hasObject( - Iterable toFind) throws IOException; + public abstract boolean hasObject(ObjectToPack obj, + StoredObjectRepresentation rep); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java index 6b10349bc..795b004e8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java @@ -81,13 +81,19 @@ public interface ObjectReuseAsIs { /** * Select the best object representation for a packer. - * + *

* Implementations should iterate through all available representations of * an object, and pass them in turn to the PackWriter though * {@link PackWriter#select(ObjectToPack, StoredObjectRepresentation)} so * the writer can select the most suitable representation to reuse into the * output stream. - * + *

+ * If the implementation returns CachedPack from {@link #getCachedPacks()}, + * it must consider the representation of any object that is stored in any + * of the offered CachedPacks. PackWriter relies on this behavior to prune + * duplicate objects out of the pack stream when it selects a CachedPack and + * the object was also reached through the thin-pack enumeration. + *

* The implementation may choose to consider multiple objects at once on * concurrent threads, but must evaluate all representations of an object * within the same thread. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index 9c247d010..bef5816b7 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -188,6 +188,8 @@ public class PackWriter { private boolean ignoreMissingUninteresting = true; + private boolean pruneCurrentObjectList; + /** * Create writer for specified repository. *

@@ -526,16 +528,7 @@ public class PackWriter { */ public boolean willInclude(final AnyObjectId id) throws IOException { ObjectToPack obj = objectsMap.get(id); - if (obj != null && !obj.isEdge()) - return true; - - Set toFind = Collections.singleton(id.toObjectId()); - for (CachedPack pack : cachedPacks) { - if (pack.hasObject(toFind).contains(id)) - return true; - } - - return false; + return obj != null && !obj.isEdge(); } /** @@ -639,7 +632,10 @@ public class PackWriter { if (writeMonitor == null) writeMonitor = NullProgressMonitor.INSTANCE; - if ((reuseDeltas || config.isReuseObjects()) && reuseSupport != null) + if (reuseSupport != null && ( + reuseDeltas + || config.isReuseObjects() + || !cachedPacks.isEmpty())) searchForReuse(compressMonitor); if (config.isDeltaCompress()) searchForDeltas(compressMonitor); @@ -715,8 +711,12 @@ public class PackWriter { cnt += list.size(); long start = System.currentTimeMillis(); monitor.beginTask(JGitText.get().searchForReuse, cnt); - for (List list : objectsLists) + for (List list : objectsLists) { + pruneCurrentObjectList = false; reuseSupport.selectObjectRepresentation(this, monitor, list); + if (pruneCurrentObjectList) + pruneEdgesFromObjectList(list); + } monitor.endTask(); stats.timeSearchingForReuse = System.currentTimeMillis() - start; } @@ -1324,7 +1324,6 @@ public class PackWriter { for (RevObject obj : haveObjs) walker.markUninteresting(obj); - int typesToPrune = 0; final int maxBases = config.getDeltaSearchWindowSize(); Set baseTrees = new HashSet(); BlockList commits = new BlockList(); @@ -1388,15 +1387,6 @@ public class PackWriter { } commits = null; - for (CachedPack p : cachedPacks) { - for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) { - if (baseTrees.size() <= maxBases) - baseTrees.add(walker.lookupCommit(d).getTree()); - objectsMap.get(d).setEdge(); - typesToPrune |= 1 << Constants.OBJ_COMMIT; - } - } - BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, // objectsMap, edgeObjects, reader); RevObject o; @@ -1413,39 +1403,13 @@ public class PackWriter { countingMonitor.update(1); } - for (CachedPack p : cachedPacks) { - for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TREE])) { - objectsMap.get(d).setEdge(); - typesToPrune |= 1 << Constants.OBJ_TREE; - } - for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_BLOB])) { - objectsMap.get(d).setEdge(); - typesToPrune |= 1 << Constants.OBJ_BLOB; - } - for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TAG])) { - objectsMap.get(d).setEdge(); - typesToPrune |= 1 << Constants.OBJ_TAG; - } - } - - if (typesToPrune != 0) { - pruneObjectList(typesToPrune, Constants.OBJ_COMMIT); - pruneObjectList(typesToPrune, Constants.OBJ_TREE); - pruneObjectList(typesToPrune, Constants.OBJ_BLOB); - pruneObjectList(typesToPrune, Constants.OBJ_TAG); - } - for (CachedPack pack : cachedPacks) countingMonitor.update((int) pack.getObjectCount()); countingMonitor.endTask(); stats.timeCounting = System.currentTimeMillis() - countingStart; } - private void pruneObjectList(int typesToPrune, int typeCode) { - if ((typesToPrune & (1 << typeCode)) == 0) - return; - - final List list = objectsLists[typeCode]; + private static void pruneEdgesFromObjectList(List list) { final int size = list.size(); int src = 0; int dst = 0; @@ -1544,6 +1508,23 @@ public class PackWriter { */ public void select(ObjectToPack otp, StoredObjectRepresentation next) { int nFmt = next.getFormat(); + + if (!cachedPacks.isEmpty()) { + if (otp.isEdge()) + return; + if ((nFmt == PACK_WHOLE) | (nFmt == PACK_DELTA)) { + for (CachedPack pack : cachedPacks) { + if (pack.hasObject(otp, next)) { + otp.setEdge(); + otp.clearDeltaBase(); + otp.clearReuseAsIs(); + pruneCurrentObjectList = true; + return; + } + } + } + } + int nWeight; if (otp.isReuseAsIs()) { // We've already chosen to reuse a packed form, if next