From 7c58f6282a53754822b00ae821d99262c43c86a9 Mon Sep 17 00:00:00 2001 From: Colby Ranger Date: Fri, 18 Jan 2013 16:22:10 -0800 Subject: [PATCH] Update DfsGarbageCollector to not read back a pack index. Previously, the Dfs GC excluded objects from packs by passing a previously written index to the PackWriter. Reading back a file on Dfs is slow. Instead, allow the PackWriter to expose the objects included in a pack and forward that to invocations of excludeObjects() . Change-Id: I377cb4ab07f62cf790505e1eeb0b2efe81897c79 --- .../jgit/storage/file/PackWriterTest.java | 18 +++++-- .../eclipse/jgit/internal/JGitText.properties | 1 + .../org/eclipse/jgit/internal/JGitText.java | 1 + .../jgit/storage/dfs/DfsGarbageCollector.java | 30 ++++++----- .../src/org/eclipse/jgit/storage/file/GC.java | 21 +++++--- .../eclipse/jgit/storage/pack/PackWriter.java | 50 +++++++++++++++---- 6 files changed, 89 insertions(+), 32 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java index 4752a3fb2..281715f5e 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java @@ -66,6 +66,7 @@ import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.junit.JGitTestUtil; import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.junit.TestRepository.BranchBuilder; +import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectInserter; @@ -77,6 +78,7 @@ import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.storage.pack.PackWriter; +import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet; import org.eclipse.jgit.transport.PackParser; import org.junit.After; import org.junit.Before; @@ -463,7 +465,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { RevCommit c1 = bb.commit().add("f", contentA).create(); testRepo.getRevWalk().parseHeaders(c1); PackIndex pf1 = writePack(repo, Collections.singleton(c1), - Collections. emptySet()); + Collections. emptySet()); assertContent( pf1, Arrays.asList(c1.getId(), c1.getTree().getId(), @@ -472,7 +474,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { RevCommit c2 = bb.commit().add("f", contentB).create(); testRepo.getRevWalk().parseHeaders(c2); PackIndex pf2 = writePack(repo, Collections.singleton(c2), - Collections.singleton(pf1)); + Collections.singleton(objectIdSet(pf1))); assertContent( pf2, Arrays.asList(c2.getId(), c2.getTree().getId(), @@ -490,12 +492,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { } private static PackIndex writePack(FileRepository repo, - Set want, Set excludeObjects) + Set want, Set excludeObjects) throws IOException { PackWriter pw = new PackWriter(repo); pw.setDeltaBaseAsOffset(true); pw.setReuseDeltaCommits(false); - for (PackIndex idx : excludeObjects) + for (ObjectIdSet idx : excludeObjects) pw.excludeObjects(idx); pw.preparePack(NullProgressMonitor.INSTANCE, want, Collections. emptySet()); @@ -668,4 +670,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { assertEquals(objectsOrder[i++].toObjectId(), me.toObjectId()); } } + + private static ObjectIdSet objectIdSet(final PackIndex idx) { + return new ObjectIdSet() { + public boolean contains(AnyObjectId objectId) { + return idx.hasObject(objectId); + } + }; + } } diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index bccc3736f..48cb48754 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -30,6 +30,7 @@ blobNotFound=Blob not found: {0} blobNotFoundForPath=Blob not found: {0} for path: {1} branchNameInvalid=Branch name {0} is not allowed cachedPacksPreventsIndexCreation=Using cached packs prevents index creation +cachedPacksPreventsListingObjects=Using cached packs prevents listing objects cannotBeCombined=Cannot be combined. cannotBeRecursiveWhenTreesAreIncluded=TreeWalk shouldn't be recursive when tree objects are included. cannotCombineSquashWithNoff=Cannot combine --squash with --no-ff. diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index 9843c2d1f..287684392 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -91,6 +91,7 @@ public class JGitText extends TranslationBundle { /***/ public String blobNotFoundForPath; /***/ public String branchNameInvalid; /***/ public String cachedPacksPreventsIndexCreation; + /***/ public String cachedPacksPreventsListingObjects; /***/ public String cannotBeCombined; /***/ public String cannotBeRecursiveWhenTreesAreIncluded; /***/ public String cannotCombineSquashWithNoff; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java index 439781303..21f01ad6b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java @@ -82,7 +82,7 @@ public class DfsGarbageCollector { private final List newPackStats; - private final List newPackList; + private final List newPackObj; private DfsReader ctx; @@ -116,7 +116,7 @@ public class DfsGarbageCollector { objdb = repo.getObjectDatabase(); newPackDesc = new ArrayList(4); newPackStats = new ArrayList(4); - newPackList = new ArrayList(4); + newPackObj = new ArrayList(4); packConfig = new PackConfig(repo); packConfig.setIndexVersion(2); @@ -244,8 +244,8 @@ public class DfsGarbageCollector { PackWriter pw = newPackWriter(); try { - for (DfsPackFile pack : newPackList) - pw.excludeObjects(pack.getPackIndex(ctx)); + for (PackWriter.ObjectIdSet packedObjs : newPackObj) + pw.excludeObjects(packedObjs); pw.preparePack(pm, nonHeads, allHeads); if (0 < pw.getObjectCount()) writePack(GC, pw, pm); @@ -259,10 +259,6 @@ public class DfsGarbageCollector { return; // TODO(sop) This is ugly. The garbage pack needs to be deleted. - List newIdx = new ArrayList(newPackList.size()); - for (DfsPackFile pack : newPackList) - newIdx.add(pack.getPackIndex(ctx)); - PackWriter pw = newPackWriter(); try { RevWalk pool = new RevWalk(ctx); @@ -272,7 +268,7 @@ public class DfsGarbageCollector { for (PackIndex.MutableEntry ent : oldIdx) { pm.update(1); ObjectId id = ent.toObjectId(); - if (pool.lookupOrNull(id) != null || anyIndexHas(newIdx, id)) + if (pool.lookupOrNull(id) != null || anyPackHas(id)) continue; int type = oldPack.getObjectType(ctx, ent.getOffset()); @@ -287,9 +283,9 @@ public class DfsGarbageCollector { } } - private static boolean anyIndexHas(List list, AnyObjectId id) { - for (PackIndex idx : list) - if (idx.hasObject(id)) + private boolean anyPackHas(AnyObjectId id) { + for (PackWriter.ObjectIdSet packedObjs : newPackObj) + if (packedObjs.contains(id)) return true; return false; } @@ -336,6 +332,13 @@ public class DfsGarbageCollector { out.close(); } + final List packedObjs = pw.getObjectList(); + newPackObj.add(new PackWriter.ObjectIdSet() { + public boolean contains(AnyObjectId objectId) { + return 0 <= Collections.binarySearch(packedObjs, objectId); + } + }); + PackWriter.Statistics stats = pw.getStatistics(); pack.setPackStats(stats); pack.setFileSize(PACK, stats.getTotalBytes()); @@ -343,7 +346,8 @@ public class DfsGarbageCollector { pack.setDeltaCount(stats.getTotalDeltas()); objectsPacked += stats.getTotalObjects(); newPackStats.add(stats); - newPackList.add(DfsBlockCache.getInstance().getOrCreate(pack, null)); + + DfsBlockCache.getInstance().getOrCreate(pack, null); return pack; } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java index cf4ec5893..45f382316 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java @@ -70,6 +70,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; @@ -83,6 +84,7 @@ import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.pack.PackWriter; +import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.filter.TreeFilter; import org.eclipse.jgit.util.FileUtils; @@ -498,10 +500,10 @@ public class GC { tagTargets.add(ref.getPeeledObjectId()); } - List excluded = new LinkedList(); - for (PackFile f : repo.getObjectDatabase().getPacks()) + List excluded = new LinkedList(); + for (final PackFile f : repo.getObjectDatabase().getPacks()) if (f.shouldBeKept()) - excluded.add(f.getIndex()); + excluded.add(objectIdSet(f.getIndex())); tagTargets.addAll(allHeads); nonHeads.addAll(indexObjects); @@ -513,7 +515,7 @@ public class GC { tagTargets, excluded); if (heads != null) { ret.add(heads); - excluded.add(0, heads.getIndex()); + excluded.add(0, objectIdSet(heads.getIndex())); } } if (!nonHeads.isEmpty()) { @@ -632,7 +634,7 @@ public class GC { private PackFile writePack(Set want, Set have, Set tagTargets, - List excludeObjects) throws IOException { + List excludeObjects) throws IOException { File tmpPack = null; File tmpIdx = null; PackWriter pw = new PackWriter(repo); @@ -643,7 +645,7 @@ public class GC { if (tagTargets != null) pw.setTagTargets(tagTargets); if (excludeObjects != null) - for (PackIndex idx : excludeObjects) + for (ObjectIdSet idx : excludeObjects) pw.excludeObjects(idx); pw.preparePack(pm, want, have); if (pw.getObjectCount() == 0) @@ -855,4 +857,11 @@ public class GC { expireAgeMillis = -1; } + private static ObjectIdSet objectIdSet(final PackIndex idx) { + return new ObjectIdSet() { + public boolean contains(AnyObjectId objectId) { + return idx.hasObject(objectId); + } + }; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index bfb6c652d..b199d4fee 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -103,7 +103,6 @@ import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevSort; import org.eclipse.jgit.revwalk.RevTag; import org.eclipse.jgit.revwalk.RevTree; -import org.eclipse.jgit.storage.file.PackIndex; import org.eclipse.jgit.storage.file.PackIndexWriter; import org.eclipse.jgit.util.BlockList; import org.eclipse.jgit.util.TemporaryBuffer; @@ -144,6 +143,18 @@ import org.eclipse.jgit.util.TemporaryBuffer; public class PackWriter { private static final int PACK_VERSION_GENERATED = 2; + /** A collection of object ids. */ + public interface ObjectIdSet { + /** + * Returns true if the objectId is contained within the collection. + * + * @param objectId + * the objectId to find + * @return whether the collection contains the objectId. + */ + boolean contains(AnyObjectId objectId); + } + private static final Map, Boolean> instances = new ConcurrentHashMap, Boolean>(); @@ -206,9 +217,9 @@ public class PackWriter { private Set tagTargets = Collections.emptySet(); - private PackIndex[] excludeInPacks; + private ObjectIdSet[] excludeInPacks; - private PackIndex excludeInPackLast; + private ObjectIdSet excludeInPackLast; private Deflater myDeflater; @@ -507,19 +518,40 @@ public class PackWriter { return stats.totalObjects; } + /** + * Returns the object ids in the pack file that was created by this writer, + * sorted by name. + * + * This method can only be invoked after + * {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has + * been invoked and completed successfully. + * + * @return number of objects in pack. + * @throws IOException + * a cached pack cannot supply its object ids. + */ + public List getObjectList() throws IOException { + if (!cachedPacks.isEmpty()) + throw new IOException( + JGitText.get().cachedPacksPreventsListingObjects); + + return Collections.unmodifiableList( + (List) sortByName()); + } + /** * Add a pack index whose contents should be excluded from the result. * * @param idx * objects in this index will not be in the output pack. */ - public void excludeObjects(PackIndex idx) { + public void excludeObjects(ObjectIdSet idx) { if (excludeInPacks == null) { - excludeInPacks = new PackIndex[] { idx }; + excludeInPacks = new ObjectIdSet[] { idx }; excludeInPackLast = idx; } else { int cnt = excludeInPacks.length; - PackIndex[] newList = new PackIndex[cnt + 1]; + ObjectIdSet[] newList = new ObjectIdSet[cnt + 1]; System.arraycopy(excludeInPacks, 0, newList, 0, cnt); newList[cnt] = idx; excludeInPacks = newList; @@ -1798,10 +1830,10 @@ public class PackWriter { private boolean exclude(AnyObjectId objectId) { if (excludeInPacks == null) return false; - if (excludeInPackLast.hasObject(objectId)) + if (excludeInPackLast.contains(objectId)) return true; - for (PackIndex idx : excludeInPacks) { - if (idx.hasObject(objectId)) { + for (ObjectIdSet idx : excludeInPacks) { + if (idx.contains(objectId)) { excludeInPackLast = idx; return true; }