Browse Source

dfs: write reftable from DfsGarbageCollector

If a ReftableConfig has been supplied by the caller, write out a
reftable as a sibling of the the GC pack, alongside the heads.

To bootstrap from a non-reftable system, the refs are read from the
DfsRefDatabase if no GC reftables are present.  Its assumed the
references are fully current, and do not need to be merged with any
other reftables.  Any non-GC reftables will be pruned at the end of
the GC cycle, just like any packs that were replaced.

If a GC reftable is present, all existing reftables are compacted, and
references from DfsRefDatabase are only used to seed the packer.  Its
assumed these are consistent with each other.

Change-Id: Ie397eb58aaaefb6865c816d9b39de3ac12998019
stable-4.9
Shawn Pearce 7 years ago
parent
commit
d13dfac9dc
  1. 190
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
  2. 163
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java

190
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java

@ -5,6 +5,7 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@ -13,19 +14,29 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.reftable.RefCursor;
import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
import org.eclipse.jgit.internal.storage.reftable.ReftableReader;
import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
import org.eclipse.jgit.junit.MockSystemReader;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.BatchRefUpdate;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdRef;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.transport.ReceiveCommand;
import org.eclipse.jgit.util.SystemReader;
import org.junit.After;
import org.junit.Before;
@ -653,6 +664,185 @@ public class DfsGarbageCollectorTest {
assertEquals(2, odb.getPacks().length);
}
@SuppressWarnings("boxing")
@Test
public void producesNewReftable() throws Exception {
String master = "refs/heads/master";
RevCommit commit0 = commit().message("0").create();
RevCommit commit1 = commit().message("1").parent(commit0).create();
BatchRefUpdate bru = git.getRepository().getRefDatabase()
.newBatchUpdate();
bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit1, master));
for (int i = 1; i <= 5100; i++) {
bru.addCommand(new ReceiveCommand(ObjectId.zeroId(), commit0,
String.format("refs/pulls/%04d", i)));
}
try (RevWalk rw = new RevWalk(git.getRepository())) {
bru.execute(rw, NullProgressMonitor.INSTANCE);
}
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setReftableConfig(new ReftableConfig());
run(gc);
// Single GC pack present with all objects.
assertEquals(1, odb.getPacks().length);
DfsPackFile pack = odb.getPacks()[0];
DfsPackDescription desc = pack.getPackDescription();
assertEquals(GC, desc.getPackSource());
assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
// Sibling REFTABLE is also present.
assertTrue(desc.hasFileExt(REFTABLE));
ReftableWriter.Stats stats = desc.getReftableStats();
assertNotNull(stats);
assertTrue(stats.totalBytes() > 0);
assertEquals(5101, stats.refCount());
assertEquals(1, stats.minUpdateIndex());
assertEquals(1, stats.maxUpdateIndex());
DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc);
try (DfsReader ctx = odb.newReader();
ReftableReader rr = table.open(ctx);
RefCursor rc = rr.seekRef("refs/pulls/5100")) {
assertTrue(rc.next());
assertEquals(commit0, rc.getRef().getObjectId());
assertFalse(rc.next());
}
}
@Test
public void leavesNonGcReftablesIfNotConfigured() throws Exception {
String master = "refs/heads/master";
RevCommit commit0 = commit().message("0").create();
RevCommit commit1 = commit().message("1").parent(commit0).create();
git.update(master, commit1);
DfsPackDescription t1 = odb.newPack(INSERT);
try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
out.write("ignored".getBytes(StandardCharsets.UTF_8));
t1.addFileExt(REFTABLE);
}
odb.commitPack(Collections.singleton(t1), null);
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setReftableConfig(null);
run(gc);
// Single GC pack present with all objects.
assertEquals(1, odb.getPacks().length);
DfsPackFile pack = odb.getPacks()[0];
DfsPackDescription desc = pack.getPackDescription();
assertEquals(GC, desc.getPackSource());
assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
// Only INSERT REFTABLE above is present.
DfsReftable[] tables = odb.getReftables();
assertEquals(1, tables.length);
assertEquals(t1, tables[0].getPackDescription());
}
@Test
public void prunesNonGcReftables() throws Exception {
String master = "refs/heads/master";
RevCommit commit0 = commit().message("0").create();
RevCommit commit1 = commit().message("1").parent(commit0).create();
git.update(master, commit1);
DfsPackDescription t1 = odb.newPack(INSERT);
try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
out.write("ignored".getBytes(StandardCharsets.UTF_8));
t1.addFileExt(REFTABLE);
}
odb.commitPack(Collections.singleton(t1), null);
odb.clearCache();
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setReftableConfig(new ReftableConfig());
run(gc);
// Single GC pack present with all objects.
assertEquals(1, odb.getPacks().length);
DfsPackFile pack = odb.getPacks()[0];
DfsPackDescription desc = pack.getPackDescription();
assertEquals(GC, desc.getPackSource());
assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
// Only sibling GC REFTABLE is present.
DfsReftable[] tables = odb.getReftables();
assertEquals(1, tables.length);
assertEquals(desc, tables[0].getPackDescription());
assertTrue(desc.hasFileExt(REFTABLE));
}
@Test
public void compactsReftables() throws Exception {
String master = "refs/heads/master";
RevCommit commit0 = commit().message("0").create();
RevCommit commit1 = commit().message("1").parent(commit0).create();
git.update(master, commit1);
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setReftableConfig(new ReftableConfig());
run(gc);
DfsPackDescription t1 = odb.newPack(INSERT);
Ref next = new ObjectIdRef.PeeledNonTag(Ref.Storage.LOOSE,
"refs/heads/next", commit0.copy());
try (DfsOutputStream out = odb.writeFile(t1, REFTABLE)) {
ReftableWriter w = new ReftableWriter();
w.setMinUpdateIndex(42);
w.setMaxUpdateIndex(42);
w.begin(out);
w.sortAndWriteRefs(Collections.singleton(next));
w.finish();
t1.addFileExt(REFTABLE);
t1.setReftableStats(w.getStats());
}
odb.commitPack(Collections.singleton(t1), null);
gc = new DfsGarbageCollector(repo);
gc.setReftableConfig(new ReftableConfig());
run(gc);
// Single GC pack present with all objects.
assertEquals(1, odb.getPacks().length);
DfsPackFile pack = odb.getPacks()[0];
DfsPackDescription desc = pack.getPackDescription();
assertEquals(GC, desc.getPackSource());
assertTrue("commit0 in pack", isObjectInPack(commit0, pack));
assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
// Only sibling GC REFTABLE is present.
DfsReftable[] tables = odb.getReftables();
assertEquals(1, tables.length);
assertEquals(desc, tables[0].getPackDescription());
assertTrue(desc.hasFileExt(REFTABLE));
// GC reftable contains the compaction.
DfsReftable table = new DfsReftable(DfsBlockCache.getInstance(), desc);
try (DfsReader ctx = odb.newReader();
ReftableReader rr = table.open(ctx);
RefCursor rc = rr.allRefs()) {
assertEquals(1, rr.minUpdateIndex());
assertEquals(42, rr.maxUpdateIndex());
assertTrue(rc.next());
assertEquals(master, rc.getRef().getName());
assertEquals(commit1, rc.getRef().getObjectId());
assertTrue(rc.next());
assertEquals(next.getName(), rc.getRef().getName());
assertEquals(commit0, rc.getRef().getObjectId());
assertFalse(rc.next());
}
}
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
return git.commit();
}

163
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java

@ -50,13 +50,16 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.INSERT;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.RECEIVE;
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
import static org.eclipse.jgit.internal.storage.dfs.DfsPackCompactor.configureReftable;
import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
import static org.eclipse.jgit.internal.storage.pack.PackWriter.NONE;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.EnumSet;
@ -72,6 +75,9 @@ import org.eclipse.jgit.internal.storage.file.PackIndex;
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
import org.eclipse.jgit.internal.storage.reftable.ReftableWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Constants;
@ -94,14 +100,15 @@ public class DfsGarbageCollector {
private final DfsObjDatabase objdb;
private final List<DfsPackDescription> newPackDesc;
private final List<PackStatistics> newPackStats;
private final List<ObjectIdSet> newPackObj;
private DfsReader ctx;
private PackConfig packConfig;
private ReftableConfig reftableConfig;
private long reftableInitialMinUpdateIndex = 1;
private long reftableInitialMaxUpdateIndex = 1;
// See packIsCoalesceableGarbage(), below, for how these two variables
// interact.
@ -110,8 +117,10 @@ public class DfsGarbageCollector {
private long startTimeMillis;
private List<DfsPackFile> packsBefore;
private List<DfsReftable> reftablesBefore;
private List<DfsPackFile> expiredGarbagePacks;
private Collection<Ref> refsBefore;
private Set<ObjectId> allHeadsAndTags;
private Set<ObjectId> allTags;
private Set<ObjectId> nonHeads;
@ -151,6 +160,57 @@ public class DfsGarbageCollector {
return this;
}
/**
* @param cfg
* configuration to write a reftable. Reftable writing is
* disabled (default) when {@code cfg} is {@code null}.
* @return {@code this}
*/
public DfsGarbageCollector setReftableConfig(ReftableConfig cfg) {
reftableConfig = cfg;
return this;
}
/**
* Set minUpdateIndex for the initial reftable created during conversion.
* <p>
* <b>Warning:</b> A setting {@code != 1} <b>disables cache refreshes</b>
* normally performed at the start of {@link #pack(ProgressMonitor)}.
* Callers must ensure the reference cache is current and will have been
* read before the pack list.
*
* @param u
* minUpdateIndex for the initial reftable created by scanning
* {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller
* has also set {@link #setReftableConfig(ReftableConfig)}.
* Defaults to {@code 1}. Must be {@code u >= 0}.
* @return {@code this}
*/
public DfsGarbageCollector setReftableInitialMinUpdateIndex(long u) {
reftableInitialMinUpdateIndex = Math.max(u, 0);
return this;
}
/**
* Set maxUpdateIndex for the initial reftable created during conversion.
* <p>
* <b>Warning:</b> A setting {@code != 1} <b>disables cache refreshes</b>
* normally performed at the start of {@link #pack(ProgressMonitor)}.
* Callers must ensure the reference cache is current and will have been
* read before the pack list.
*
* @param u
* maxUpdateIndex for the initial reftable created by scanning
* {@link DfsRefDatabase#getRefs(String)}. Ignored unless caller
* has also set {@link #setReftableConfig(ReftableConfig)}.
* Defaults to {@code 1}. Must be {@code u >= 0}.
* @return {@code this}
*/
public DfsGarbageCollector setReftableInitialMaxUpdateIndex(long u) {
reftableInitialMaxUpdateIndex = Math.max(0, u);
return this;
}
/** @return garbage packs smaller than this size will be repacked. */
public long getCoalesceGarbageLimit() {
return coalesceGarbageLimit;
@ -240,8 +300,9 @@ public class DfsGarbageCollector {
refdb.refresh();
objdb.clearCache();
Collection<Ref> refsBefore = getAllRefs();
refsBefore = getAllRefs();
readPacksBefore();
readReftablesBefore();
Set<ObjectId> allHeads = new HashSet<>();
allHeadsAndTags = new HashSet<>();
@ -333,6 +394,11 @@ public class DfsGarbageCollector {
}
}
private void readReftablesBefore() throws IOException {
DfsReftable[] tables = objdb.getReftables();
reftablesBefore = new ArrayList<>(Arrays.asList(tables));
}
private boolean packIsExpiredGarbage(DfsPackDescription d, long now) {
// Consider the garbage pack as expired when it's older than
// garbagePackTtl. This check gives concurrent inserter threads
@ -407,7 +473,7 @@ public class DfsGarbageCollector {
}
/** @return all of the source packs that fed into this compaction. */
public List<DfsPackDescription> getSourcePacks() {
public Set<DfsPackDescription> getSourcePacks() {
return toPrune();
}
@ -421,28 +487,37 @@ public class DfsGarbageCollector {
return newPackStats;
}
private List<DfsPackDescription> toPrune() {
int cnt = packsBefore.size();
List<DfsPackDescription> all = new ArrayList<>(cnt);
private Set<DfsPackDescription> toPrune() {
Set<DfsPackDescription> toPrune = new HashSet<>();
for (DfsPackFile pack : packsBefore) {
all.add(pack.getPackDescription());
toPrune.add(pack.getPackDescription());
}
if (reftableConfig != null) {
for (DfsReftable table : reftablesBefore) {
toPrune.add(table.getPackDescription());
}
}
for (DfsPackFile pack : expiredGarbagePacks) {
all.add(pack.getPackDescription());
toPrune.add(pack.getPackDescription());
}
return all;
return toPrune;
}
private void packHeads(ProgressMonitor pm) throws IOException {
if (allHeadsAndTags.isEmpty())
if (allHeadsAndTags.isEmpty()) {
writeReftable();
return;
}
try (PackWriter pw = newPackWriter()) {
pw.setTagTargets(tagTargets);
pw.preparePack(pm, allHeadsAndTags, NONE, NONE, allTags);
if (0 < pw.getObjectCount())
writePack(GC, pw, pm,
estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC));
if (0 < pw.getObjectCount()) {
long estSize = estimateGcPackSize(INSERT, RECEIVE, COMPACT, GC);
writePack(GC, pw, pm, estSize);
} else {
writeReftable();
}
}
}
@ -560,6 +635,10 @@ public class DfsGarbageCollector {
estimatedPackSize);
newPackDesc.add(pack);
if (source == GC && reftableConfig != null) {
writeReftable(pack);
}
try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
pw.writePack(pm, pm, out);
pack.addFileExt(PACK);
@ -592,4 +671,60 @@ public class DfsGarbageCollector {
newPackObj.add(pw.getObjectSet());
return pack;
}
private void writeReftable() throws IOException {
if (reftableConfig != null) {
DfsPackDescription pack = objdb.newPack(GC);
newPackDesc.add(pack);
writeReftable(pack);
}
}
private void writeReftable(DfsPackDescription pack) throws IOException {
if (!hasGcReftable()) {
writeReftable(pack, refsBefore);
return;
}
try (ReftableStack stack = ReftableStack.open(ctx, reftablesBefore)) {
ReftableCompactor compact = new ReftableCompactor();
compact.addAll(stack.readers());
compact.setIncludeDeletes(false);
compactReftable(pack, compact);
}
}
private boolean hasGcReftable() {
for (DfsReftable table : reftablesBefore) {
if (table.getPackDescription().getPackSource() == GC) {
return true;
}
}
return false;
}
private void writeReftable(DfsPackDescription pack, Collection<Ref> refs)
throws IOException {
try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
ReftableConfig cfg = configureReftable(reftableConfig, out);
ReftableWriter writer = new ReftableWriter(cfg)
.setMinUpdateIndex(reftableInitialMinUpdateIndex)
.setMaxUpdateIndex(reftableInitialMaxUpdateIndex)
.begin(out)
.sortAndWriteRefs(refs)
.finish();
pack.addFileExt(REFTABLE);
pack.setReftableStats(writer.getStats());
}
}
private void compactReftable(DfsPackDescription pack,
ReftableCompactor compact) throws IOException {
try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
compact.setConfig(configureReftable(reftableConfig, out));
compact.compact(out);
pack.addFileExt(REFTABLE);
pack.setReftableStats(compact.getStats());
}
}
}

Loading…
Cancel
Save