Browse Source

Prefer smaller GC files during DFS garbage collection

In 8ac65d33ed PackWriter changed its
behavior to always prefer the last object representation presented
to it by the ObjectReuseAsIs implementation. This was a fix to avoid
delta chain cycles.

Unfortunately it can lead to suboptimal compression when concurrent
GCs are run on the same repository. One case is automatic GC running
(with default settings) in parallel to a manual GC that has disabled
delta reuse in order to generate new smaller deltas for the entire
history of the repository.

Running GC with no-reuse generally requires more CPU time, which
also translates to a longer running time.  This can lead to a race
where the automatic GC completes before the no-reuse GC, leaving
the repository in a state such as:

  no-reuse GC:   size 1 GiB, mtime = 18:45
  auto GC:       size 8 GiB, mtime = 17:30

With the default sort ordering, the smaller no-reuse GC pack is
sorted earlier in the pack list, due to its more recent mtime.

During object reuse in a future GC, these smaller representations
are considered first by PackWriter, but are all discarded when the
auto GC file from 17:30 is examined second (due to its older mtime).

Work around this in two ways.

Well formed DFS repositories should have at most 1 GC pack. If
2 or more GC packs exist, break the sorting tie by selecting the
smaller file earlier in the pack list. This allows all normal read
code paths to favor the smaller file, which places less pressure
on the DfsBlockCache. If any GC race happens, readers serving clone
requests will prefer the file that is smaller.

During object reuse, flip this ordering so that the smaller file is
last. This allows PackWriter to see smaller deltas last, replacing
larger representations that were previously considered from other
pack files.

Change-Id: I0b7dc8bb9711c82abd6bd16643f518cfccc6d31a
stable-4.7
Shawn Pearce 8 years ago
parent
commit
d67b183537
  1. 68
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
  2. 22
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java
  3. 34
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java

68
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java

@ -9,10 +9,12 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
@ -21,8 +23,10 @@ import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevBlob;
import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.util.SystemReader; import org.eclipse.jgit.util.SystemReader;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -74,6 +78,70 @@ public class DfsGarbageCollectorTest {
assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); assertTrue("commit1 in pack", isObjectInPack(commit1, pack));
} }
@Test
public void testRacyNoReusePrefersSmaller() throws Exception {
StringBuilder msg = new StringBuilder();
for (int i = 0; i < 100; i++) {
msg.append(i).append(": i am a teapot\n");
}
RevBlob a = git.blob(msg.toString());
RevCommit c0 = git.commit()
.add("tea", a)
.message("0")
.create();
msg.append("short and stout\n");
RevBlob b = git.blob(msg.toString());
RevCommit c1 = git.commit().parent(c0).tick(1)
.add("tea", b)
.message("1")
.create();
git.update("master", c1);
PackConfig cfg = new PackConfig();
cfg.setReuseObjects(false);
cfg.setReuseDeltas(false);
cfg.setDeltaCompress(false);
cfg.setThreads(1);
DfsGarbageCollector gc = new DfsGarbageCollector(repo);
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL
gc.setPackConfig(cfg);
run(gc);
assertEquals(1, odb.getPacks().length);
DfsPackDescription large = odb.getPacks()[0].getPackDescription();
assertSame(PackSource.GC, large.getPackSource());
cfg.setDeltaCompress(true);
gc = new DfsGarbageCollector(repo);
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL
gc.setPackConfig(cfg);
run(gc);
assertEquals(1, odb.getPacks().length);
DfsPackDescription small = odb.getPacks()[0].getPackDescription();
assertSame(PackSource.GC, small.getPackSource());
assertTrue(
"delta compression pack is smaller",
small.getFileSize(PACK) < large.getFileSize(PACK));
assertTrue(
"large pack is older",
large.getLastModified() < small.getLastModified());
// Forcefully reinsert the older larger GC pack.
odb.commitPack(Collections.singleton(large), null);
odb.clearCache();
assertEquals(2, odb.getPacks().length);
gc = new DfsGarbageCollector(repo);
gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL
run(gc);
assertEquals(1, odb.getPacks().length);
DfsPackDescription rebuilt = odb.getPacks()[0].getPackDescription();
assertEquals(small.getFileSize(PACK), rebuilt.getFileSize(PACK));
}
@Test @Test
public void testCollectionWithGarbage() throws Exception { public void testCollectionWithGarbage() throws Exception {
RevCommit commit0 = commit().message("0").create(); RevCommit commit0 = commit().message("0").create();

22
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java

@ -319,6 +319,17 @@ public class DfsPackDescription implements Comparable<DfsPackDescription> {
return cmp; return cmp;
} }
// Tie break GC type packs by smallest first. There should be at most
// one of each source, but when multiple exist concurrent GCs may have
// run. Preferring the smaller file selects higher quality delta
// compression, placing less demand on the DfsBlockCache.
if (as != null && as == bs && isGC(as)) {
int cmp = Long.signum(getFileSize(PACK) - b.getFileSize(PACK));
if (cmp != 0) {
return cmp;
}
}
// Newer packs should sort first. // Newer packs should sort first.
int cmp = Long.signum(b.getLastModified() - getLastModified()); int cmp = Long.signum(b.getLastModified() - getLastModified());
if (cmp != 0) if (cmp != 0)
@ -330,6 +341,17 @@ public class DfsPackDescription implements Comparable<DfsPackDescription> {
return Long.signum(getObjectCount() - b.getObjectCount()); return Long.signum(getObjectCount() - b.getObjectCount());
} }
static boolean isGC(PackSource s) {
switch (s) {
case GC:
case GC_REST:
case GC_TXN:
return true;
default:
return false;
}
}
@Override @Override
public String toString() { public String toString() {
return getFileName(PackExt.PACK); return getFileName(PackExt.PACK);

34
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java

@ -44,10 +44,12 @@
package org.eclipse.jgit.internal.storage.dfs; package org.eclipse.jgit.internal.storage.dfs;
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -64,6 +66,7 @@ import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException;
import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackList; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackList;
import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl; import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl;
import org.eclipse.jgit.internal.storage.file.PackBitmapIndex; import org.eclipse.jgit.internal.storage.file.PackBitmapIndex;
import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.file.PackIndex;
@ -537,7 +540,7 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs {
throws IOException, MissingObjectException { throws IOException, MissingObjectException {
// Don't check dirty bit on PackList; assume ObjectToPacks all came from the // Don't check dirty bit on PackList; assume ObjectToPacks all came from the
// current list. // current list.
for (DfsPackFile pack : db.getPacks()) { for (DfsPackFile pack : sortPacksForSelectRepresentation()) {
List<DfsObjectToPack> tmp = findAllFromPack(pack, objects); List<DfsObjectToPack> tmp = findAllFromPack(pack, objects);
if (tmp.isEmpty()) if (tmp.isEmpty())
continue; continue;
@ -556,6 +559,35 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs {
} }
} }
private static final Comparator<DfsPackFile> PACK_SORT_FOR_REUSE = new Comparator<DfsPackFile>() {
public int compare(DfsPackFile af, DfsPackFile bf) {
DfsPackDescription ad = af.getPackDescription();
DfsPackDescription bd = bf.getPackDescription();
PackSource as = ad.getPackSource();
PackSource bs = bd.getPackSource();
if (as != null && as == bs && DfsPackDescription.isGC(as)) {
// Push smaller GC files last; these likely have higher quality
// delta compression and the contained representation should be
// favored over other files.
return Long.signum(bd.getFileSize(PACK) - ad.getFileSize(PACK));
}
// DfsPackDescription.compareTo already did a reasonable sort.
// Rely on Arrays.sort being stable, leaving equal elements.
return 0;
}
};
private DfsPackFile[] sortPacksForSelectRepresentation()
throws IOException {
DfsPackFile[] packs = db.getPacks();
DfsPackFile[] sorted = new DfsPackFile[packs.length];
System.arraycopy(packs, 0, sorted, 0, packs.length);
Arrays.sort(sorted, PACK_SORT_FOR_REUSE);
return sorted;
}
private List<DfsObjectToPack> findAllFromPack(DfsPackFile pack, private List<DfsObjectToPack> findAllFromPack(DfsPackFile pack,
Iterable<ObjectToPack> objects) throws IOException { Iterable<ObjectToPack> objects) throws IOException {
List<DfsObjectToPack> tmp = new BlockList<DfsObjectToPack>(); List<DfsObjectToPack> tmp = new BlockList<DfsObjectToPack>();

Loading…
Cancel
Save