From 8bec98cec089fa488f3903e97ee6493b727d7d21 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Jan 2017 11:31:22 -0500 Subject: [PATCH] gc: loosen unreferenced objects An unreferenced object might appear in a pack. This could only happen because it was previously referenced, and then later that reference was removed. When we gc, we copy the referenced objects into a new pack, and delete the old pack. This would remove the unreferenced object. Now we first create a loose object from any unreferenced object in the doomed pack. This kicks off the two-week grace period for that object, after which it will be collected if it's not referenced. This matches the behavior of regular git. Change-Id: I59539aca1d0d83622c41aa9bfbdd72fa868ee9fb Signed-off-by: David Turner Signed-off-by: Jonathan Nieder --- .../storage/file/GcBasicPackingTest.java | 23 +++++++++-- .../jgit/internal/storage/file/GC.java | 41 +++++++++++++++++++ .../storage/file/ObjectDirectoryInserter.java | 34 ++++++++++++--- 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java index 8a9ad8960..c7e5973f3 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java @@ -55,6 +55,7 @@ import java.util.Date; import java.util.List; import org.eclipse.jgit.junit.TestRepository.BranchBuilder; +import org.eclipse.jgit.lib.RefUpdate; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.storage.pack.PackConfig; import org.junit.Test; @@ -188,16 +189,26 @@ public class GcBasicPackingTest extends GcTestCase { BranchBuilder bb = tr.branch("refs/heads/master"); bb.commit().message("M").add("M", "M").create(); + String tempRef = "refs/heads/soon-to-be-unreferenced"; + BranchBuilder bb2 = tr.branch(tempRef); + bb2.commit().message("M").add("M", "M").create(); + gc.setExpireAgeMillis(0); gc.gc(); stats = gc.getStatistics(); assertEquals(0, stats.numberOfLooseObjects); - assertEquals(3, stats.numberOfPackedObjects); + assertEquals(4, stats.numberOfPackedObjects); assertEquals(1, stats.numberOfPackFiles); File oldPackfile = tr.getRepository().getObjectDatabase().getPacks() .iterator().next().getPackFile(); fsTick(); + + // delete the temp ref, orphaning its commit + RefUpdate update = tr.getRepository().getRefDatabase().newUpdate(tempRef, false); + update.setForceUpdate(true); + update.delete(); + bb.commit().message("B").add("B", "Q").create(); // The old packfile is too young to be deleted. We should end up with @@ -208,7 +219,7 @@ public class GcBasicPackingTest extends GcTestCase { assertEquals(0, stats.numberOfLooseObjects); // if objects exist in multiple packFiles then they are counted multiple // times - assertEquals(9, stats.numberOfPackedObjects); + assertEquals(10, stats.numberOfPackedObjects); assertEquals(2, stats.numberOfPackFiles); // repack again but now without a grace period for loose objects. Since @@ -219,15 +230,19 @@ public class GcBasicPackingTest extends GcTestCase { assertEquals(0, stats.numberOfLooseObjects); // if objects exist in multiple packFiles then they are counted multiple // times - assertEquals(9, stats.numberOfPackedObjects); + assertEquals(10, stats.numberOfPackedObjects); assertEquals(2, stats.numberOfPackFiles); // repack again but now without a grace period for packfiles. We should // end up with one packfile gc.setPackExpireAgeMillis(0); + + // we want to keep newly-loosened objects though + gc.setExpireAgeMillis(-1); + gc.gc(); stats = gc.getStatistics(); - assertEquals(0, stats.numberOfLooseObjects); + assertEquals(1, stats.numberOfLooseObjects); // if objects exist in multiple packFiles then they are counted multiple // times assertEquals(6, stats.numberOfPackedObjects); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java index 990b0be33..32ea382a2 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java @@ -81,6 +81,8 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.file.ObjectDirectory; +import org.eclipse.jgit.internal.storage.file.ObjectDirectoryInserter; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.internal.storage.reftree.RefTreeNames; @@ -90,6 +92,8 @@ import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectIdSet; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Ref.Storage; @@ -208,6 +212,26 @@ public class GC { return newPacks; } + /** + * Loosen objects in a pack file which are not also in the newly-created + * pack files. + */ + private void loosen(ObjectDirectoryInserter inserter, ObjectReader reader, PackFile pack, HashSet existing) + throws IOException { + for (PackIndex.MutableEntry entry : pack) { + ObjectId oid = entry.toObjectId(); + if (existing.contains(oid)) { + continue; + } + existing.add(oid); + ObjectLoader loader = reader.open(oid); + inserter.insert(loader.getType(), + loader.getSize(), + loader.openStream(), + true /* create this object even though it's a duplicate */); + } + } + /** * Delete old pack files. What is 'old' is defined by specifying a set of * old pack files and a set of new pack files. Each pack file contained in @@ -215,6 +239,9 @@ public class GC { * preserveOldPacks is set, keep a copy of the pack file in the preserve * directory. If an expirationDate is set then pack files which are younger * than the expirationDate will not be deleted nor preserved. + *

+ * If we're not immediately expiring loose objects, loosen any objects + * in the old pack files which aren't in the new pack files. * * @param oldPacks * @param newPacks @@ -223,6 +250,17 @@ public class GC { */ private void deleteOldPacks(Collection oldPacks, Collection newPacks) throws ParseException, IOException { + HashSet ids = new HashSet<>(); + for (PackFile pack : newPacks) { + for (PackIndex.MutableEntry entry : pack) { + ids.add(entry.toObjectId()); + } + } + ObjectReader reader = repo.newObjectReader(); + ObjectDirectory dir = repo.getObjectDatabase(); + ObjectDirectoryInserter inserter = dir.newInserter(); + boolean shouldLoosen = getExpireDate() < Long.MAX_VALUE; + prunePreserved(); long packExpireDate = getPackExpireDate(); oldPackLoop: for (PackFile oldPack : oldPacks) { @@ -237,6 +275,9 @@ public class GC { && repo.getFS().lastModified( oldPack.getPackFile()) < packExpireDate) { oldPack.close(); + if (shouldLoosen) { + loosen(inserter, reader, oldPack, ids); + } prunePack(oldName); } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java index 9820e0ea3..ca70c4979 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java @@ -86,34 +86,56 @@ class ObjectDirectoryInserter extends ObjectInserter { @Override public ObjectId insert(int type, byte[] data, int off, int len) throws IOException { + return insert(type, data, off, len, false); + } + + /** + * Insert a loose object into the database. If createDuplicate is + * true, write the loose object even if we already have it in the + * loose or packed ODB. + */ + private ObjectId insert( + int type, byte[] data, int off, int len, boolean createDuplicate) + throws IOException { ObjectId id = idFor(type, data, off, len); - if (db.has(id)) { + if (!createDuplicate && db.has(id)) { return id; } else { File tmp = toTemp(type, data, off, len); - return insertOneObject(tmp, id); + return insertOneObject(tmp, id, createDuplicate); } } @Override public ObjectId insert(final int type, long len, final InputStream is) throws IOException { + return insert(type, len, is, false); + } + + /** + * Insert a loose object into the database. If createDuplicate is + * true, write the loose object even if we already have it in the + * loose or packed ODB. + */ + ObjectId insert(int type, long len, InputStream is, boolean createDuplicate) + throws IOException { if (len <= buffer().length) { byte[] buf = buffer(); int actLen = IO.readFully(is, buf, 0); - return insert(type, buf, 0, actLen); + return insert(type, buf, 0, actLen, createDuplicate); } else { MessageDigest md = digest(); File tmp = toTemp(md, type, len, is); ObjectId id = ObjectId.fromRaw(md.digest()); - return insertOneObject(tmp, id); + return insertOneObject(tmp, id, createDuplicate); } } - private ObjectId insertOneObject(final File tmp, final ObjectId id) + private ObjectId insertOneObject( + File tmp, ObjectId id, boolean createDuplicate) throws IOException, ObjectWritingException { - switch (db.insertUnpackedObject(tmp, id, false /* no duplicate */)) { + switch (db.insertUnpackedObject(tmp, id, createDuplicate)) { case INSERTED: case EXISTS_PACKED: case EXISTS_LOOSE: