From 006f4d4d29eecc3d357b442cc76b76fdeff51de0 Mon Sep 17 00:00:00 2001 From: Thirumala Reddy Mutchukota Date: Mon, 6 Feb 2017 14:08:27 -0800 Subject: [PATCH] Reintroduce garbage pack coalescing when ttl > 0. Disabling the garbage pack coalescing when garbageTtl > 0 can result in lot of garbage packs if they are created within the garbageTtl time. To avoid a large number of garbage packs, re-introducing garbage pack coalescing for the packs that are created within a single calendar day when the garbageTtl is more than one day or one third of the garbageTtl. Change-Id: If969716aeb55fb4fd0ff71d75f41a07638cd5a69 Signed-off-by: Thirumala Reddy Mutchukota --- .../storage/dfs/DfsGarbageCollectorTest.java | 78 +++++++++++++++--- .../storage/dfs/DfsGarbageCollector.java | 80 ++++++++++++++++--- 2 files changed, 139 insertions(+), 19 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java index 188b723e6..771aadc84 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java @@ -16,12 +16,15 @@ import java.io.IOException; import java.util.concurrent.TimeUnit; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; +import org.eclipse.jgit.junit.MockSystemReader; import org.eclipse.jgit.junit.TestRepository; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.util.SystemReader; +import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -29,6 +32,7 @@ public class DfsGarbageCollectorTest { private TestRepository git; private InMemoryRepository repo; private DfsObjDatabase odb; + private MockSystemReader mockSystemReader; @Before public void setUp() throws IOException { @@ -36,6 +40,13 @@ public class DfsGarbageCollectorTest { git = new TestRepository<>(new InMemoryRepository(desc)); repo = git.getRepository(); odb = repo.getObjectDatabase(); + mockSystemReader = new MockSystemReader(); + SystemReader.setInstance(mockSystemReader); + } + + @After + public void tearDown() { + SystemReader.setInstance(null); } @Test @@ -171,6 +182,58 @@ public class DfsGarbageCollectorTest { } } + @Test + public void testCollectionWithGarbageCoalescenceWithShortTtl() + throws Exception { + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update("master", commit0); + + // Create commits at 1 minute intervals with 1 hour ttl. + for (int i = 0; i < 100; i++) { + mockSystemReader.tick(60); + commit1 = commit().message("g" + i).parent(commit1).create(); + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(1, TimeUnit.HOURS); + run(gc); + + // Make sure we don't have more than 4 UNREACHABLE_GARBAGE packs + // because all the packs that are created in a 20 minutes interval + // should be coalesced and the packs older than 60 minutes should be + // removed due to ttl. + int count = countPacks(UNREACHABLE_GARBAGE); + assertTrue("Garbage pack count should not exceed 4, but found " + + count, count <= 4); + } + } + + @Test + public void testCollectionWithGarbageCoalescenceWithLongTtl() + throws Exception { + RevCommit commit0 = commit().message("0").create(); + RevCommit commit1 = commit().message("1").parent(commit0).create(); + git.update("master", commit0); + + // Create commits at 1 hour intervals with 2 days ttl. + for (int i = 0; i < 100; i++) { + mockSystemReader.tick(3600); + commit1 = commit().message("g" + i).parent(commit1).create(); + + DfsGarbageCollector gc = new DfsGarbageCollector(repo); + gc.setGarbageTtl(2, TimeUnit.DAYS); + run(gc); + + // Make sure we don't have more than 3 UNREACHABLE_GARBAGE packs + // because all the packs that are created in a single day should + // be coalesced and the packs older than 2 days should be + // removed due to ttl. + int count = countPacks(UNREACHABLE_GARBAGE); + assertTrue("Garbage pack count should not exceed 3, but found " + + count, count <= 3); + } + } + @Test public void testEstimateGcPackSizeInNewRepo() throws Exception { RevCommit commit0 = commit().message("0").create(); @@ -420,20 +483,17 @@ public class DfsGarbageCollectorTest { run(gc); } - private void gcWithTtl() throws InterruptedException, IOException { - // Wait for the system clock to move by at least 1 millisecond. - // This allows the DfsGarbageCollector to recognize the boundary. - long start = System.currentTimeMillis(); - do { - Thread.sleep(10); - } while (System.currentTimeMillis() <= start); - + private void gcWithTtl() throws IOException { + // Move the clock forward by 1 minute and use the same as ttl. + mockSystemReader.tick(60); DfsGarbageCollector gc = new DfsGarbageCollector(repo); - gc.setGarbageTtl(1, TimeUnit.MILLISECONDS); + gc.setGarbageTtl(1, TimeUnit.MINUTES); run(gc); } private void run(DfsGarbageCollector gc) throws IOException { + // adjust the current time that will be used by the gc operation. + mockSystemReader.tick(1); assertTrue("gc repacked", gc.pack(null)); odb.clearCache(); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java index 56ff1b5c2..968313254 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java @@ -56,9 +56,11 @@ import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; import java.io.IOException; import java.util.ArrayList; +import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; +import java.util.GregorianCalendar; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -82,6 +84,7 @@ import org.eclipse.jgit.lib.RefDatabase; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.storage.pack.PackStatistics; +import org.eclipse.jgit.util.SystemReader; import org.eclipse.jgit.util.io.CountingOutputStream; /** Repack and garbage collect a repository. */ @@ -100,7 +103,8 @@ public class DfsGarbageCollector { private PackConfig packConfig; - // See pack(), below, for how these two variables interact. + // See packIsCoalesceableGarbage(), below, for how these two variables + // interact. private long coalesceGarbageLimit = 50 << 20; private long garbageTtlMillis = TimeUnit.DAYS.toMillis(1); @@ -228,14 +232,8 @@ public class DfsGarbageCollector { if (packConfig.getIndexVersion() != 2) throw new IllegalStateException( JGitText.get().supportOnlyPackIndexVersion2); - if (garbageTtlMillis > 0) { - // We disable coalescing because the coalescing step will keep - // refreshing the UNREACHABLE_GARBAGE pack and we wouldn't - // actually prune anything. - coalesceGarbageLimit = 0; - } - startTimeMillis = System.currentTimeMillis(); + startTimeMillis = SystemReader.getInstance().getCurrentTime(); ctx = (DfsReader) objdb.newReader(); try { refdb.refresh(); @@ -310,14 +308,14 @@ public class DfsGarbageCollector { expiredGarbagePacks = new ArrayList(packs.length); long mostRecentGC = mostRecentGC(packs); - long now = System.currentTimeMillis(); + long now = SystemReader.getInstance().getCurrentTime(); for (DfsPackFile p : packs) { DfsPackDescription d = p.getPackDescription(); if (d.getPackSource() != UNREACHABLE_GARBAGE) { packsBefore.add(p); } else if (packIsExpiredGarbage(d, mostRecentGC, now)) { expiredGarbagePacks.add(p); - } else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) { + } else if (packIsCoalesceableGarbage(d, now)) { packsBefore.add(p); } } @@ -360,6 +358,68 @@ public class DfsGarbageCollector { && now - d.getLastModified() >= garbageTtlMillis; } + private boolean packIsCoalesceableGarbage(DfsPackDescription d, long now) { + // An UNREACHABLE_GARBAGE pack can be coalesced if its size is less than + // the coalesceGarbageLimit and either garbageTtl is zero or if the pack + // is created in a close time interval (on a single calendar day when + // the garbageTtl is more than one day or one third of the garbageTtl). + // + // When the garbageTtl is more than 24 hours, garbage packs that are + // created within a single calendar day are coalesced together. This + // would make the effective ttl of the garbage pack as garbageTtl+23:59 + // and limit the number of garbage to a maximum number of + // garbageTtl_in_days + 1 (assuming all of them are less than the size + // of coalesceGarbageLimit). + // + // When the garbageTtl is less than or equal to 24 hours, garbage packs + // that are created within a one third of garbageTtl are coalesced + // together. This would make the effective ttl of the garbage packs as + // garbageTtl + (garbageTtl / 3) and would limit the number of garbage + // packs to a maximum number of 4 (assuming all of them are less than + // the size of coalesceGarbageLimit). + + if (d.getPackSource() != UNREACHABLE_GARBAGE + || d.getFileSize(PackExt.PACK) >= coalesceGarbageLimit) { + return false; + } + + if (garbageTtlMillis == 0) { + return true; + } + + long lastModified = d.getLastModified(); + long dayStartLastModified = dayStartInMillis(lastModified); + long dayStartToday = dayStartInMillis(now); + + if (dayStartLastModified != dayStartToday) { + return false; // this pack is not created today. + } + + if (garbageTtlMillis > TimeUnit.DAYS.toMillis(1)) { + return true; // ttl is more than one day and pack is created today. + } + + long timeInterval = garbageTtlMillis / 3; + if (timeInterval == 0) { + return false; // ttl is too small, don't try to coalesce. + } + + long modifiedTimeSlot = (lastModified - dayStartLastModified) / timeInterval; + long presentTimeSlot = (now - dayStartToday) / timeInterval; + return modifiedTimeSlot == presentTimeSlot; + } + + private static long dayStartInMillis(long timeInMillis) { + Calendar cal = new GregorianCalendar( + SystemReader.getInstance().getTimeZone()); + cal.setTimeInMillis(timeInMillis); + cal.set(Calendar.HOUR_OF_DAY, 0); + cal.set(Calendar.MINUTE, 0); + cal.set(Calendar.SECOND, 0); + cal.set(Calendar.MILLISECOND, 0); + return cal.getTimeInMillis(); + } + /** @return all of the source packs that fed into this compaction. */ public List getSourcePacks() { return toPrune();