diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java index 771aadc84..c70b6f299 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java @@ -9,10 +9,12 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.util.Collections; import java.util.concurrent.TimeUnit; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; @@ -21,8 +23,10 @@ import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevBlob; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.util.SystemReader; import org.junit.After; import org.junit.Before; @@ -74,6 +78,70 @@ public class DfsGarbageCollectorTest { assertTrue("commit1 in pack", isObjectInPack(commit1, pack)); } + @Test + public void testRacyNoReusePrefersSmaller() throws Exception { + StringBuilder msg = new StringBuilder(); + for (int i = 0; i < 100; i++) { + msg.append(i).append(": i am a teapot\n"); + } + RevBlob a = git.blob(msg.toString()); + RevCommit c0 = git.commit() + .add("tea", a) + .message("0") + .create(); + + msg.append("short and stout\n"); + RevBlob b = git.blob(msg.toString()); + RevCommit c1 = git.commit().parent(c0).tick(1) + .add("tea", b) + .message("1") + .create(); + git.update("master", c1); + + PackConfig cfg = new PackConfig(); + cfg.setReuseObjects(false); + cfg.setReuseDeltas(false); + cfg.setDeltaCompress(false); + cfg.setThreads(1); + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + gc.setPackConfig(cfg); + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription large = odb.getPacks()[0].getPackDescription(); + assertSame(PackSource.GC, large.getPackSource()); + + cfg.setDeltaCompress(true); + gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + gc.setPackConfig(cfg); + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription small = odb.getPacks()[0].getPackDescription(); + assertSame(PackSource.GC, small.getPackSource()); + assertTrue( + "delta compression pack is smaller", + small.getFileSize(PACK) < large.getFileSize(PACK)); + assertTrue( + "large pack is older", + large.getLastModified() < small.getLastModified()); + + // Forcefully reinsert the older larger GC pack. + odb.commitPack(Collections.singleton(large), null); + odb.clearCache(); + assertEquals(2, odb.getPacks().length); + + gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(0, TimeUnit.MILLISECONDS); // disable TTL + run(gc); + + assertEquals(1, odb.getPacks().length); + DfsPackDescription rebuilt = odb.getPacks()[0].getPackDescription(); + assertEquals(small.getFileSize(PACK), rebuilt.getFileSize(PACK)); + } + @Test public void testCollectionWithGarbage() throws Exception { RevCommit commit0 = commit().message("0").create(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java index 0411bbe5e..683094c32 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackDescription.java @@ -319,6 +319,17 @@ public class DfsPackDescription implements Comparable { return cmp; } + // Tie break GC type packs by smallest first. There should be at most + // one of each source, but when multiple exist concurrent GCs may have + // run. Preferring the smaller file selects higher quality delta + // compression, placing less demand on the DfsBlockCache. + if (as != null && as == bs && isGC(as)) { + int cmp = Long.signum(getFileSize(PACK) - b.getFileSize(PACK)); + if (cmp != 0) { + return cmp; + } + } + // Newer packs should sort first. int cmp = Long.signum(b.getLastModified() - getLastModified()); if (cmp != 0) @@ -330,6 +341,17 @@ public class DfsPackDescription implements Comparable { return Long.signum(getObjectCount() - b.getObjectCount()); } + static boolean isGC(PackSource s) { + switch (s) { + case GC: + case GC_REST: + case GC_TXN: + return true; + default: + return false; + } + } + @Override public String toString() { return getFileName(PackExt.PACK); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java index 8c9329503..accab731b 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java @@ -44,10 +44,12 @@ package org.eclipse.jgit.internal.storage.dfs; +import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -64,6 +66,7 @@ import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackList; +import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl; import org.eclipse.jgit.internal.storage.file.PackBitmapIndex; import org.eclipse.jgit.internal.storage.file.PackIndex; @@ -537,7 +540,7 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { throws IOException, MissingObjectException { // Don't check dirty bit on PackList; assume ObjectToPacks all came from the // current list. - for (DfsPackFile pack : db.getPacks()) { + for (DfsPackFile pack : sortPacksForSelectRepresentation()) { List tmp = findAllFromPack(pack, objects); if (tmp.isEmpty()) continue; @@ -556,6 +559,35 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { } } + private static final Comparator PACK_SORT_FOR_REUSE = new Comparator() { + public int compare(DfsPackFile af, DfsPackFile bf) { + DfsPackDescription ad = af.getPackDescription(); + DfsPackDescription bd = bf.getPackDescription(); + PackSource as = ad.getPackSource(); + PackSource bs = bd.getPackSource(); + + if (as != null && as == bs && DfsPackDescription.isGC(as)) { + // Push smaller GC files last; these likely have higher quality + // delta compression and the contained representation should be + // favored over other files. + return Long.signum(bd.getFileSize(PACK) - ad.getFileSize(PACK)); + } + + // DfsPackDescription.compareTo already did a reasonable sort. + // Rely on Arrays.sort being stable, leaving equal elements. + return 0; + } + }; + + private DfsPackFile[] sortPacksForSelectRepresentation() + throws IOException { + DfsPackFile[] packs = db.getPacks(); + DfsPackFile[] sorted = new DfsPackFile[packs.length]; + System.arraycopy(packs, 0, sorted, 0, packs.length); + Arrays.sort(sorted, PACK_SORT_FOR_REUSE); + return sorted; + } + private List findAllFromPack(DfsPackFile pack, Iterable objects) throws IOException { List tmp = new BlockList();