From d126bcc5c800ef03e8292e7ad1dd7fba7c3358d2 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 25 Jul 2017 15:37:56 -0700 Subject: [PATCH 1/2] dfs: compact reftables during DfsPackCompactor Combine intermediate, non-GC reftables when combining pack files. This shrinks the reftable stack, improving lookup times. Change-Id: I5dbba41806f99af5ecaff3a3119f6630e9404256 --- .../storage/dfs/DfsPackCompactor.java | 244 +++++++++++++----- .../storage/reftable/ReftableCompactor.java | 21 +- 2 files changed, 203 insertions(+), 62 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java index ac14c0bc3..99663eb73 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java @@ -44,21 +44,29 @@ package org.eclipse.jgit.internal.storage.dfs; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; +import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE; import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Set; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackWriter; +import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor; +import org.eclipse.jgit.internal.storage.reftable.ReftableConfig; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; @@ -89,16 +97,15 @@ import org.eclipse.jgit.util.io.CountingOutputStream; */ public class DfsPackCompactor { private final DfsRepository repo; - private final List srcPacks; - + private final List srcReftables; private final List exclude; - private final List newPacks; - - private final List newStats; + private PackStatistics newStats; + private DfsPackDescription outDesc; private int autoAddSize; + private ReftableConfig reftableConfig; private RevWalk rw; private RevFlag added; @@ -114,9 +121,19 @@ public class DfsPackCompactor { repo = repository; autoAddSize = 5 * 1024 * 1024; // 5 MiB srcPacks = new ArrayList<>(); + srcReftables = new ArrayList<>(); exclude = new ArrayList<>(4); - newPacks = new ArrayList<>(1); - newStats = new ArrayList<>(1); + } + + /** + * @param cfg + * configuration to write a reftable. Reftable compacting is + * disabled (default) when {@code cfg} is {@code null}. + * @return {@code this} + */ + public DfsPackCompactor setReftableConfig(ReftableConfig cfg) { + reftableConfig = cfg; + return this; } /** @@ -137,7 +154,19 @@ public class DfsPackCompactor { } /** - * Automatically select packs to be included, and add them. + * Add a reftable to be compacted. + * + * @param table + * a reftable to combine. + * @return {@code this} + */ + public DfsPackCompactor add(DfsReftable table) { + srcReftables.add(table); + return this; + } + + /** + * Automatically select pack and reftables to be included, and add them. *

* Packs are selected based on size, smaller packs get included while bigger * ones are omitted. @@ -155,6 +184,16 @@ public class DfsPackCompactor { else exclude(pack); } + + if (reftableConfig != null) { + for (DfsReftable table : objdb.getReftables()) { + DfsPackDescription d = table.getPackDescription(); + if (d.getPackSource() != GC + && d.getFileSize(REFTABLE) < autoAddSize) { + add(table); + } + } + } return this; } @@ -197,58 +236,68 @@ public class DfsPackCompactor { * the packs cannot be compacted. */ public void compact(ProgressMonitor pm) throws IOException { - if (pm == null) + if (pm == null) { pm = NullProgressMonitor.INSTANCE; + } DfsObjDatabase objdb = repo.getObjectDatabase(); try (DfsReader ctx = objdb.newReader()) { - PackConfig pc = new PackConfig(repo); - pc.setIndexVersion(2); - pc.setDeltaCompress(false); - pc.setReuseDeltas(true); - pc.setReuseObjects(true); + if (reftableConfig != null && !srcReftables.isEmpty()) { + compactReftables(ctx); + } + compactPacks(ctx, pm); + + List commit = getNewPacks(); + Collection remove = toPrune(); + if (!commit.isEmpty() || !remove.isEmpty()) { + objdb.commitPack(commit, remove); + } + } finally { + rw = null; + } + } - PackWriter pw = new PackWriter(pc, ctx); + private void compactPacks(DfsReader ctx, ProgressMonitor pm) + throws IOException, IncorrectObjectTypeException { + DfsObjDatabase objdb = repo.getObjectDatabase(); + PackConfig pc = new PackConfig(repo); + pc.setIndexVersion(2); + pc.setDeltaCompress(false); + pc.setReuseDeltas(true); + pc.setReuseObjects(true); + + PackWriter pw = new PackWriter(pc, ctx); + try { + pw.setDeltaBaseAsOffset(true); + pw.setReuseDeltaCommits(false); + + addObjectsToPack(pw, ctx, pm); + if (pw.getObjectCount() == 0) { + return; + } + + boolean rollback = true; + initOutDesc(objdb); try { - pw.setDeltaBaseAsOffset(true); - pw.setReuseDeltaCommits(false); - - addObjectsToPack(pw, ctx, pm); - if (pw.getObjectCount() == 0) { - List remove = toPrune(); - if (remove.size() > 0) - objdb.commitPack( - Collections.emptyList(), - remove); - return; - } + writePack(objdb, outDesc, pw, pm); + writeIndex(objdb, outDesc, pw); - boolean rollback = true; - DfsPackDescription pack = objdb.newPack(COMPACT, - estimatePackSize()); - try { - writePack(objdb, pack, pw, pm); - writeIndex(objdb, pack, pw); - - PackStatistics stats = pw.getStatistics(); - pw.close(); - pw = null; - - pack.setPackStats(stats); - objdb.commitPack(Collections.singletonList(pack), toPrune()); - newPacks.add(pack); - newStats.add(stats); - rollback = false; - } finally { - if (rollback) - objdb.rollbackPack(Collections.singletonList(pack)); - } + PackStatistics stats = pw.getStatistics(); + pw.close(); + pw = null; + + outDesc.setPackStats(stats); + newStats = stats; + rollback = false; } finally { - if (pw != null) - pw.close(); + if (rollback) { + objdb.rollbackPack(Collections.singletonList(outDesc)); + } } } finally { - rw = null; + if (pw != null) { + pw.close(); + } } } @@ -263,27 +312,81 @@ public class DfsPackCompactor { return size; } + private void compactReftables(DfsReader ctx) throws IOException { + DfsObjDatabase objdb = repo.getObjectDatabase(); + Collections.sort(srcReftables, objdb.reftableComparator()); + + try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) { + initOutDesc(objdb); + ReftableCompactor compact = new ReftableCompactor(); + compact.addAll(stack.readers()); + compact.setIncludeDeletes(true); + writeReftable(objdb, outDesc, compact); + } + } + + private void initOutDesc(DfsObjDatabase objdb) throws IOException { + if (outDesc == null) { + outDesc = objdb.newPack(COMPACT, estimatePackSize()); + } + } + /** @return all of the source packs that fed into this compaction. */ - public List getSourcePacks() { - return toPrune(); + public Collection getSourcePacks() { + Set src = new HashSet<>(); + for (DfsPackFile pack : srcPacks) { + src.add(pack.getPackDescription()); + } + for (DfsReftable table : srcReftables) { + src.add(table.getPackDescription()); + } + return src; } /** @return new packs created by this compaction. */ public List getNewPacks() { - return newPacks; + return outDesc != null + ? Collections.singletonList(outDesc) + : Collections.emptyList(); } /** @return statistics corresponding to the {@link #getNewPacks()}. */ public List getNewPackStatistics() { - return newStats; + return newStats != null + ? Collections.singletonList(newStats) + : Collections.emptyList(); } - private List toPrune() { - int cnt = srcPacks.size(); - List all = new ArrayList<>(cnt); - for (DfsPackFile pack : srcPacks) - all.add(pack.getPackDescription()); - return all; + private Collection toPrune() { + Set packs = new HashSet<>(); + for (DfsPackFile pack : srcPacks) { + packs.add(pack.getPackDescription()); + } + + Set reftables = new HashSet<>(); + for (DfsReftable table : srcReftables) { + reftables.add(table.getPackDescription()); + } + + for (Iterator i = packs.iterator(); i.hasNext();) { + DfsPackDescription d = i.next(); + if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) { + i.remove(); + } + } + + for (Iterator i = reftables.iterator(); + i.hasNext();) { + DfsPackDescription d = i.next(); + if (d.hasFileExt(PACK) && !packs.contains(d)) { + i.remove(); + } + } + + Set toPrune = new HashSet<>(); + toPrune.addAll(packs); + toPrune.addAll(reftables); + return toPrune; } private void addObjectsToPack(PackWriter pw, DfsReader ctx, @@ -390,6 +493,27 @@ public class DfsPackCompactor { } } + private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack, + ReftableCompactor compact) throws IOException { + try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) { + compact.setConfig(configureReftable(reftableConfig, out)); + compact.compact(out); + pack.addFileExt(REFTABLE); + pack.setReftableStats(compact.getStats()); + } + } + + static ReftableConfig configureReftable(ReftableConfig cfg, + DfsOutputStream out) { + int bs = out.blockSize(); + if (bs > 0) { + cfg = new ReftableConfig(cfg); + cfg.setRefBlockSize(bs); + cfg.setAlignBlocks(true); + } + return cfg; + } + private static class ObjectIdWithOffset extends ObjectId { final long offset; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java index 3927f7b17..c22157784 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java @@ -159,9 +159,16 @@ public class ReftableCompactor { * tables to compact. Tables should be ordered oldest first/most * recent last so that the more recent tables can shadow the * older results. Caller is responsible for closing the readers. + * @throws IOException + * update indexes of a reader cannot be accessed. */ - public void addAll(List readers) { + public void addAll(List readers) throws IOException { tables.addAll(readers); + for (Reftable r : readers) { + if (r instanceof ReftableReader) { + adjustUpdateIndexes((ReftableReader) r); + } + } } /** @@ -178,7 +185,7 @@ public class ReftableCompactor { * @return {@code true} if the compactor accepted this table; {@code false} * if the compactor has reached its limit. * @throws IOException - * if size of {@code reader} cannot be read. + * if size of {@code reader}, or its update indexes cannot be read. */ public boolean tryAddFirst(ReftableReader reader) throws IOException { long sz = reader.size(); @@ -186,10 +193,20 @@ public class ReftableCompactor { return false; } bytesToCompact += sz; + adjustUpdateIndexes(reader); tables.addFirst(reader); return true; } + private void adjustUpdateIndexes(ReftableReader reader) throws IOException { + if (minUpdateIndex == 0) { + minUpdateIndex = reader.minUpdateIndex(); + } else { + minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex()); + } + maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex()); + } + /** * Write a compaction to {@code out}. * From d13dfac9dc6b12a36dd14c68363eaabe3b4db109 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Thu, 10 Aug 2017 16:41:26 -0700 Subject: [PATCH 2/2] dfs: write reftable from DfsGarbageCollector If a ReftableConfig has been supplied by the caller, write out a reftable as a sibling of the the GC pack, alongside the heads. To bootstrap from a non-reftable system, the refs are read from the DfsRefDatabase if no GC reftables are present. Its assumed the references are fully current, and do not need to be merged with any other reftables. Any non-GC reftables will be pruned at the end of the GC cycle, just like any packs that were replaced. If a GC reftable is present, all existing reftables are compacted, and references from DfsRefDatabase are only used to seed the packer. Its assumed these are consistent with each other. Change-Id: Ie397eb58aaaefb6865c816d9b39de3ac12998019 --- .../storage/dfs/DfsGarbageCollectorTest.java | 190 ++++++++++++++++++ .../storage/dfs/DfsGarbageCollector.java | 163 +++++++++++++-- 2 files changed, 339 insertions(+), 14 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java index e4dcc2e87..55a5f726d 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java @@ -5,6 +5,7 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -13,19 +14,29 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.concurrent.TimeUnit; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; +import org.eclipse.jgit.internal.storage.reftable.RefCursor; +import org.eclipse.jgit.internal.storage.reftable.ReftableConfig; +import org.eclipse.jgit.internal.storage.reftable.ReftableReader; +import org.eclipse.jgit.internal.storage.reftable.ReftableWriter; import org.eclipse.jgit.junit.MockSystemReader; import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.BatchRefUpdate; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectIdRef; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.pack.PackConfig; +import org.eclipse.jgit.transport.ReceiveCommand; import org.eclipse.jgit.util.SystemReader; import org.junit.After; import org.junit.Before; @@ -653,6 +664,185 @@ public class DfsGarbageCollectorTest { assertEquals(2, odb.getPacks().length); } + @SuppressWarnings("boxing") + @Test + public void producesNewReftable() throws Exception { + String master = "refs/heads/master"; + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + + BatchRefUpdate bru = git.getRepository().getRefDatabase() + .newBatchUpdate(); + bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit1, master)); + for (int i = 1; i <= 5100; i++) { + bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit0, + String.format("refs/pulls/%04d", i))); + } + try (RevWalk rw = new RevWalk(git.getRepository())) { + bru.execute(rw, NullProgressMonitor.INSTANCE); + } + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setReftableConfig(new ReftableConfig()); + run(gc); + + // Single GC pack present with all objects. + assertEquals(1, odb.getPacks().length); + DfsPackFile pack = odb.getPacks()[0]; + DfsPackDescription desc = pack.getPackDescription(); + assertEquals(GC, desc.getPackSource()); + assertTrue("commit0 in pack", isObjectInPack(commit0, pack)); + assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); + + // Sibling REFTABLE is also present. + assertTrue(desc.hasFileExt(REFTABLE)); + ReftableWriter.Stats stats = desc.getReftableStats(); + assertNotNull(stats); + assertTrue(stats.totalBytes() > 0); + assertEquals(5101, stats.refCount()); + assertEquals(1, stats.minUpdateIndex()); + assertEquals(1, stats.maxUpdateIndex()); + + DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc); + try (DfsReader ctx = odb.newReader(); + ReftableReader rr = table.open(ctx); + RefCursor rc = rr.seekRef("refs/pulls/5100")) { + assertTrue(rc.next()); + assertEquals(commit0, rc.getRef().getObjectId()); + assertFalse(rc.next()); + } + } + + @Test + public void leavesNonGcReftablesIfNotConfigured() throws Exception { + String master = "refs/heads/master"; + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update(master, commit1); + + DfsPackDescription t1 = odb.newPack(INSERT); + try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) { + out.write("ignored".getBytes(StandardCharsets.UTF_8)); + t1.addFileExt(REFTABLE); + } + odb.commitPack(Collections.singleton(t1), null); + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setReftableConfig(null); + run(gc); + + // Single GC pack present with all objects. + assertEquals(1, odb.getPacks().length); + DfsPackFile pack = odb.getPacks()[0]; + DfsPackDescription desc = pack.getPackDescription(); + assertEquals(GC, desc.getPackSource()); + assertTrue("commit0 in pack", isObjectInPack(commit0, pack)); + assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); + + // Only INSERT REFTABLE above is present. + DfsReftable[] tables = odb.getReftables(); + assertEquals(1, tables.length); + assertEquals(t1, tables[0].getPackDescription()); + } + + @Test + public void prunesNonGcReftables() throws Exception { + String master = "refs/heads/master"; + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update(master, commit1); + + DfsPackDescription t1 = odb.newPack(INSERT); + try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) { + out.write("ignored".getBytes(StandardCharsets.UTF_8)); + t1.addFileExt(REFTABLE); + } + odb.commitPack(Collections.singleton(t1), null); + odb.clearCache(); + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setReftableConfig(new ReftableConfig()); + run(gc); + + // Single GC pack present with all objects. + assertEquals(1, odb.getPacks().length); + DfsPackFile pack = odb.getPacks()[0]; + DfsPackDescription desc = pack.getPackDescription(); + assertEquals(GC, desc.getPackSource()); + assertTrue("commit0 in pack", isObjectInPack(commit0, pack)); + assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); + + // Only sibling GC REFTABLE is present. + DfsReftable[] tables = odb.getReftables(); + assertEquals(1, tables.length); + assertEquals(desc, tables[0].getPackDescription()); + assertTrue(desc.hasFileExt(REFTABLE)); + } + + @Test + public void compactsReftables() throws Exception { + String master = "refs/heads/master"; + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update(master, commit1); + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setReftableConfig(new ReftableConfig()); + run(gc); + + DfsPackDescription t1 = odb.newPack(INSERT); + Ref next = new ObjectIdRef.PeeledNonTag(Ref.Storage.LOOSE, + "refs/heads/next", commit0.copy()); + try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) { + ReftableWriter w = new ReftableWriter(); + w.setMinUpdateIndex(42); + w.setMaxUpdateIndex(42); + w.begin(out); + w.sortAndWriteRefs(Collections.singleton(next)); + w.finish(); + t1.addFileExt(REFTABLE); + t1.setReftableStats(w.getStats()); + } + odb.commitPack(Collections.singleton(t1), null); + + gc = new DfsGarbageCollector(repo); + gc.setReftableConfig(new ReftableConfig()); + run(gc); + + // Single GC pack present with all objects. + assertEquals(1, odb.getPacks().length); + DfsPackFile pack = odb.getPacks()[0]; + DfsPackDescription desc = pack.getPackDescription(); + assertEquals(GC, desc.getPackSource()); + assertTrue("commit0 in pack", isObjectInPack(commit0, pack)); + assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); + + // Only sibling GC REFTABLE is present. + DfsReftable[] tables = odb.getReftables(); + assertEquals(1, tables.length); + assertEquals(desc, tables[0].getPackDescription()); + assertTrue(desc.hasFileExt(REFTABLE)); + + // GC reftable contains the compaction. + DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc); + try (DfsReader ctx = odb.newReader(); + ReftableReader rr = table.open(ctx); + RefCursor rc = rr.allRefs()) { + assertEquals(1, rr.minUpdateIndex()); + assertEquals(42, rr.maxUpdateIndex()); + + assertTrue(rc.next()); + assertEquals(master, rc.getRef().getName()); + assertEquals(commit1, rc.getRef().getObjectId()); + + assertTrue(rc.next()); + assertEquals(next.getName(), rc.getRef().getName()); + assertEquals(commit0, rc.getRef().getObjectId()); + + assertFalse(rc.next()); + } + } + private TestRepository.CommitBuilder commit() { return git.commit(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java index ce2b05382..7914d587a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java @@ -50,13 +50,16 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.RECEIVE; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; +import static org.eclipse.jgit.internal.storage.dfs.DfsPackCompactor.configureReftable; import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE; import static org.eclipse.jgit.internal.storage.pack.PackWriter.NONE; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; import java.util.Collection; import java.util.EnumSet; @@ -72,6 +75,9 @@ import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.internal.storage.pack.PackWriter; +import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor; +import org.eclipse.jgit.internal.storage.reftable.ReftableConfig; +import org.eclipse.jgit.internal.storage.reftable.ReftableWriter; import org.eclipse.jgit.internal.storage.reftree.RefTreeNames; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -94,14 +100,15 @@ public class DfsGarbageCollector { private final DfsObjDatabase objdb; private final List newPackDesc; - private final List newPackStats; - private final List newPackObj; private DfsReader ctx; private PackConfig packConfig; + private ReftableConfig reftableConfig; + private long reftableInitialMinUpdateIndex = 1; + private long reftableInitialMaxUpdateIndex = 1; // See packIsCoalesceableGarbage(), below, for how these two variables // interact. @@ -110,8 +117,10 @@ public class DfsGarbageCollector { private long startTimeMillis; private List packsBefore; + private List reftablesBefore; private List expiredGarbagePacks; + private Collection refsBefore; private Set allHeadsAndTags; private Set allTags; private Set nonHeads; @@ -151,6 +160,57 @@ public class DfsGarbageCollector { return this; } + /** + * @param cfg + * configuration to write a reftable. Reftable writing is + * disabled (default) when {@code cfg} is {@code null}. + * @return {@code this} + */ + public DfsGarbageCollector setReftableConfig(ReftableConfig cfg) { + reftableConfig = cfg; + return this; + } + + /** + * Set minUpdateIndex for the initial reftable created during conversion. + *

+ * Warning: A setting {@code != 1} disables cache refreshes + * normally performed at the start of {@link #pack(ProgressMonitor)}. + * Callers must ensure the reference cache is current and will have been + * read before the pack list. + * + * @param u + * minUpdateIndex for the initial reftable created by scanning + * {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller + * has also set {@link #setReftableConfig(ReftableConfig)}. + * Defaults to {@code 1}. Must be {@code u >= 0}. + * @return {@code this} + */ + public DfsGarbageCollector setReftableInitialMinUpdateIndex(long u) { + reftableInitialMinUpdateIndex = Math.max(u, 0); + return this; + } + + /** + * Set maxUpdateIndex for the initial reftable created during conversion. + *

+ * Warning: A setting {@code != 1} disables cache refreshes + * normally performed at the start of {@link #pack(ProgressMonitor)}. + * Callers must ensure the reference cache is current and will have been + * read before the pack list. + * + * @param u + * maxUpdateIndex for the initial reftable created by scanning + * {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller + * has also set {@link #setReftableConfig(ReftableConfig)}. + * Defaults to {@code 1}. Must be {@code u >= 0}. + * @return {@code this} + */ + public DfsGarbageCollector setReftableInitialMaxUpdateIndex(long u) { + reftableInitialMaxUpdateIndex = Math.max(0, u); + return this; + } + /** @return garbage packs smaller than this size will be repacked. */ public long getCoalesceGarbageLimit() { return coalesceGarbageLimit; @@ -240,8 +300,9 @@ public class DfsGarbageCollector { refdb.refresh(); objdb.clearCache(); - Collection refsBefore = getAllRefs(); + refsBefore = getAllRefs(); readPacksBefore(); + readReftablesBefore(); Set allHeads = new HashSet<>(); allHeadsAndTags = new HashSet<>(); @@ -333,6 +394,11 @@ public class DfsGarbageCollector { } } + private void readReftablesBefore() throws IOException { + DfsReftable[] tables = objdb.getReftables(); + reftablesBefore = new ArrayList<>(Arrays.asList(tables)); + } + private boolean packIsExpiredGarbage(DfsPackDescription d, long now) { // Consider the garbage pack as expired when it's older than // garbagePackTtl. This check gives concurrent inserter threads @@ -407,7 +473,7 @@ public class DfsGarbageCollector { } /** @return all of the source packs that fed into this compaction. */ - public List getSourcePacks() { + public Set getSourcePacks() { return toPrune(); } @@ -421,28 +487,37 @@ public class DfsGarbageCollector { return newPackStats; } - private List toPrune() { - int cnt = packsBefore.size(); - List all = new ArrayList<>(cnt); + private Set toPrune() { + Set toPrune = new HashSet<>(); for (DfsPackFile pack : packsBefore) { - all.add(pack.getPackDescription()); + toPrune.add(pack.getPackDescription()); + } + if (reftableConfig != null) { + for (DfsReftable table : reftablesBefore) { + toPrune.add(table.getPackDescription()); + } } for (DfsPackFile pack : expiredGarbagePacks) { - all.add(pack.getPackDescription()); + toPrune.add(pack.getPackDescription()); } - return all; + return toPrune; } private void packHeads(ProgressMonitor pm) throws IOException { - if (allHeadsAndTags.isEmpty()) + if (allHeadsAndTags.isEmpty()) { + writeReftable(); return; + } try (PackWriter pw = newPackWriter()) { pw.setTagTargets(tagTargets); pw.preparePack(pm, allHeadsAndTags, NONE, NONE, allTags); - if (0 < pw.getObjectCount()) - writePack(GC, pw, pm, - estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC)); + if (0 < pw.getObjectCount()) { + long estSize = estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC); + writePack(GC, pw, pm, estSize); + } else { + writeReftable(); + } } } @@ -560,6 +635,10 @@ public class DfsGarbageCollector { estimatedPackSize); newPackDesc.add(pack); + if (source == GC && reftableConfig != null) { + writeReftable(pack); + } + try (DfsOutputStream out = objdb.writeFile(pack, PACK)) { pw.writePack(pm, pm, out); pack.addFileExt(PACK); @@ -592,4 +671,60 @@ public class DfsGarbageCollector { newPackObj.add(pw.getObjectSet()); return pack; } + + private void writeReftable() throws IOException { + if (reftableConfig != null) { + DfsPackDescription pack = objdb.newPack(GC); + newPackDesc.add(pack); + writeReftable(pack); + } + } + + private void writeReftable(DfsPackDescription pack) throws IOException { + if (!hasGcReftable()) { + writeReftable(pack, refsBefore); + return; + } + + try (ReftableStack stack = ReftableStack.open(ctx, reftablesBefore)) { + ReftableCompactor compact = new ReftableCompactor(); + compact.addAll(stack.readers()); + compact.setIncludeDeletes(false); + compactReftable(pack, compact); + } + } + + private boolean hasGcReftable() { + for (DfsReftable table : reftablesBefore) { + if (table.getPackDescription().getPackSource() == GC) { + return true; + } + } + return false; + } + + private void writeReftable(DfsPackDescription pack, Collection refs) + throws IOException { + try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) { + ReftableConfig cfg = configureReftable(reftableConfig, out); + ReftableWriter writer = new ReftableWriter(cfg) + .setMinUpdateIndex(reftableInitialMinUpdateIndex) + .setMaxUpdateIndex(reftableInitialMaxUpdateIndex) + .begin(out) + .sortAndWriteRefs(refs) + .finish(); + pack.addFileExt(REFTABLE); + pack.setReftableStats(writer.getStats()); + } + } + + private void compactReftable(DfsPackDescription pack, + ReftableCompactor compact) throws IOException { + try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) { + compact.setConfig(configureReftable(reftableConfig, out)); + compact.compact(out); + pack.addFileExt(REFTABLE); + pack.setReftableStats(compact.getStats()); + } + } }