From bb002c619bc373059c4f2494da7870f5679ba845 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Fri, 8 Mar 2013 11:02:04 -0800 Subject: [PATCH] Avoid repacking unreachable garbage in DfsGarbageCollector If a repository has significant amounts of unreachable garbage the final phase to coalesce it can take longer than any other part of the garbage collection phase. Provide a setting for applications to tweak the threshold where coalescing ends and files just remain on disk. Change-Id: I5f11a998a7185c75ece3271d8bc6181bb83f54c1 --- .../jgit/storage/dfs/DfsGarbageCollector.java | 57 +++++++++++++++++-- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java index 6027eadc4..76b36a416 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java @@ -46,12 +46,11 @@ package org.eclipse.jgit.storage.dfs; import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.GC; import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import static org.eclipse.jgit.storage.pack.PackExt.BITMAP_INDEX; -import static org.eclipse.jgit.storage.pack.PackExt.PACK; import static org.eclipse.jgit.storage.pack.PackExt.INDEX; +import static org.eclipse.jgit.storage.pack.PackExt.PACK; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -69,6 +68,7 @@ import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource; import org.eclipse.jgit.storage.file.PackIndex; import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.storage.pack.PackExt; import org.eclipse.jgit.storage.pack.PackWriter; import org.eclipse.jgit.util.io.CountingOutputStream; @@ -90,6 +90,8 @@ public class DfsGarbageCollector { private PackConfig packConfig; + private long coalesceGarbageLimit = 50 << 20; + private Map refsBefore; private List packsBefore; @@ -139,6 +141,38 @@ public class DfsGarbageCollector { return this; } + /** @return garbage packs smaller than this size will be repacked. */ + public long getCoalesceGarbageLimit() { + return coalesceGarbageLimit; + } + + /** + * Set the byte size limit for garbage packs to be repacked. + *

+ * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at + * the end of the run. This allows the garbage collector to coalesce + * unreachable objects into a single file. + *

+ * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will + * be left alone by the garbage collector. This avoids unnecessary disk IO + * reading and copying the objects. + *

+ * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.
+ * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced. + *

+ * Keeping unreachable garbage prevents race conditions with repository + * changes that may suddenly need an object whose only copy was stored in + * the UNREACHABLE_GARBAGE pack. + * + * @param limit + * size in bytes. + * @return {@code this} + */ + public DfsGarbageCollector setCoalesceGarbageLimit(long limit) { + coalesceGarbageLimit = limit; + return this; + } + /** * Create a single new pack file containing all of the live objects. *

@@ -167,7 +201,7 @@ public class DfsGarbageCollector { objdb.clearCache(); refsBefore = repo.getAllRefs(); - packsBefore = Arrays.asList(objdb.getPacks()); + packsBefore = packsToRebuild(); if (packsBefore.isEmpty()) return true; @@ -203,6 +237,19 @@ public class DfsGarbageCollector { } } + private List packsToRebuild() throws IOException { + DfsPackFile[] packs = objdb.getPacks(); + List out = new ArrayList(packs.length); + for (DfsPackFile p : packs) { + DfsPackDescription d = p.getPackDescription(); + if (d.getPackSource() != UNREACHABLE_GARBAGE) + out.add(p); + else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) + out.add(p); + } + return out; + } + /** @return all of the source packs that fed into this compaction. */ public List getSourcePacks() { return toPrune(); @@ -264,9 +311,9 @@ public class DfsGarbageCollector { PackWriter pw = newPackWriter(); try { RevWalk pool = new RevWalk(ctx); + pm.beginTask("Finding garbage", (int) getObjectsBefore()); for (DfsPackFile oldPack : packsBefore) { PackIndex oldIdx = oldPack.getPackIndex(ctx); - pm.beginTask("Finding garbage", (int) oldIdx.getObjectCount()); for (PackIndex.MutableEntry ent : oldIdx) { pm.update(1); ObjectId id = ent.toObjectId(); @@ -276,8 +323,8 @@ public class DfsGarbageCollector { int type = oldPack.getObjectType(ctx, ent.getOffset()); pw.addObject(pool.lookupAny(id, type)); } - pm.endTask(); } + pm.endTask(); if (0 < pw.getObjectCount()) writePack(UNREACHABLE_GARBAGE, pw, pm); } finally {