Browse Source

gc: loosen unreferenced objects

An unreferenced object might appear in a pack.  This could only happen
because it was previously referenced, and then later that reference
was removed.  When we gc, we copy the referenced objects into a new
pack, and delete the old pack.  This would remove the unreferenced
object.  Now we first create a loose object from any unreferenced
object in the doomed pack.  This kicks off the two-week grace period
for that object, after which it will be collected if it's not
referenced.

This matches the behavior of regular git.

Change-Id: I59539aca1d0d83622c41aa9bfbdd72fa868ee9fb
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Jonathan Nieder <jrn@google.com>
stable-4.7
David Turner 8 years ago committed by Jonathan Nieder
parent
commit
8bec98cec0
  1. 23
      org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java
  2. 41
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
  3. 34
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java

23
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcBasicPackingTest.java

@ -55,6 +55,7 @@ import java.util.Date;
import java.util.List; import java.util.List;
import org.eclipse.jgit.junit.TestRepository.BranchBuilder; import org.eclipse.jgit.junit.TestRepository.BranchBuilder;
import org.eclipse.jgit.lib.RefUpdate;
import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.storage.pack.PackConfig; import org.eclipse.jgit.storage.pack.PackConfig;
import org.junit.Test; import org.junit.Test;
@ -188,16 +189,26 @@ public class GcBasicPackingTest extends GcTestCase {
BranchBuilder bb = tr.branch("refs/heads/master"); BranchBuilder bb = tr.branch("refs/heads/master");
bb.commit().message("M").add("M", "M").create(); bb.commit().message("M").add("M", "M").create();
String tempRef = "refs/heads/soon-to-be-unreferenced";
BranchBuilder bb2 = tr.branch(tempRef);
bb2.commit().message("M").add("M", "M").create();
gc.setExpireAgeMillis(0); gc.setExpireAgeMillis(0);
gc.gc(); gc.gc();
stats = gc.getStatistics(); stats = gc.getStatistics();
assertEquals(0, stats.numberOfLooseObjects); assertEquals(0, stats.numberOfLooseObjects);
assertEquals(3, stats.numberOfPackedObjects); assertEquals(4, stats.numberOfPackedObjects);
assertEquals(1, stats.numberOfPackFiles); assertEquals(1, stats.numberOfPackFiles);
File oldPackfile = tr.getRepository().getObjectDatabase().getPacks() File oldPackfile = tr.getRepository().getObjectDatabase().getPacks()
.iterator().next().getPackFile(); .iterator().next().getPackFile();
fsTick(); fsTick();
// delete the temp ref, orphaning its commit
RefUpdate update = tr.getRepository().getRefDatabase().newUpdate(tempRef, false);
update.setForceUpdate(true);
update.delete();
bb.commit().message("B").add("B", "Q").create(); bb.commit().message("B").add("B", "Q").create();
// The old packfile is too young to be deleted. We should end up with // The old packfile is too young to be deleted. We should end up with
@ -208,7 +219,7 @@ public class GcBasicPackingTest extends GcTestCase {
assertEquals(0, stats.numberOfLooseObjects); assertEquals(0, stats.numberOfLooseObjects);
// if objects exist in multiple packFiles then they are counted multiple // if objects exist in multiple packFiles then they are counted multiple
// times // times
assertEquals(9, stats.numberOfPackedObjects); assertEquals(10, stats.numberOfPackedObjects);
assertEquals(2, stats.numberOfPackFiles); assertEquals(2, stats.numberOfPackFiles);
// repack again but now without a grace period for loose objects. Since // repack again but now without a grace period for loose objects. Since
@ -219,15 +230,19 @@ public class GcBasicPackingTest extends GcTestCase {
assertEquals(0, stats.numberOfLooseObjects); assertEquals(0, stats.numberOfLooseObjects);
// if objects exist in multiple packFiles then they are counted multiple // if objects exist in multiple packFiles then they are counted multiple
// times // times
assertEquals(9, stats.numberOfPackedObjects); assertEquals(10, stats.numberOfPackedObjects);
assertEquals(2, stats.numberOfPackFiles); assertEquals(2, stats.numberOfPackFiles);
// repack again but now without a grace period for packfiles. We should // repack again but now without a grace period for packfiles. We should
// end up with one packfile // end up with one packfile
gc.setPackExpireAgeMillis(0); gc.setPackExpireAgeMillis(0);
// we want to keep newly-loosened objects though
gc.setExpireAgeMillis(-1);
gc.gc(); gc.gc();
stats = gc.getStatistics(); stats = gc.getStatistics();
assertEquals(0, stats.numberOfLooseObjects); assertEquals(1, stats.numberOfLooseObjects);
// if objects exist in multiple packFiles then they are counted multiple // if objects exist in multiple packFiles then they are counted multiple
// times // times
assertEquals(6, stats.numberOfPackedObjects); assertEquals(6, stats.numberOfPackedObjects);

41
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java

@ -81,6 +81,8 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.NoWorkTreeException; import org.eclipse.jgit.errors.NoWorkTreeException;
import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.file.ObjectDirectory;
import org.eclipse.jgit.internal.storage.file.ObjectDirectoryInserter;
import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.internal.storage.pack.PackExt;
import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.internal.storage.pack.PackWriter;
import org.eclipse.jgit.internal.storage.reftree.RefTreeNames; import org.eclipse.jgit.internal.storage.reftree.RefTreeNames;
@ -90,6 +92,8 @@ import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSet; import org.eclipse.jgit.lib.ObjectIdSet;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Ref.Storage; import org.eclipse.jgit.lib.Ref.Storage;
@ -208,6 +212,26 @@ public class GC {
return newPacks; return newPacks;
} }
/**
* Loosen objects in a pack file which are not also in the newly-created
* pack files.
*/
private void loosen(ObjectDirectoryInserter inserter, ObjectReader reader, PackFile pack, HashSet<ObjectId> existing)
throws IOException {
for (PackIndex.MutableEntry entry : pack) {
ObjectId oid = entry.toObjectId();
if (existing.contains(oid)) {
continue;
}
existing.add(oid);
ObjectLoader loader = reader.open(oid);
inserter.insert(loader.getType(),
loader.getSize(),
loader.openStream(),
true /* create this object even though it's a duplicate */);
}
}
/** /**
* Delete old pack files. What is 'old' is defined by specifying a set of * Delete old pack files. What is 'old' is defined by specifying a set of
* old pack files and a set of new pack files. Each pack file contained in * old pack files and a set of new pack files. Each pack file contained in
@ -215,6 +239,9 @@ public class GC {
* preserveOldPacks is set, keep a copy of the pack file in the preserve * preserveOldPacks is set, keep a copy of the pack file in the preserve
* directory. If an expirationDate is set then pack files which are younger * directory. If an expirationDate is set then pack files which are younger
* than the expirationDate will not be deleted nor preserved. * than the expirationDate will not be deleted nor preserved.
* <p>
* If we're not immediately expiring loose objects, loosen any objects
* in the old pack files which aren't in the new pack files.
* *
* @param oldPacks * @param oldPacks
* @param newPacks * @param newPacks
@ -223,6 +250,17 @@ public class GC {
*/ */
private void deleteOldPacks(Collection<PackFile> oldPacks, private void deleteOldPacks(Collection<PackFile> oldPacks,
Collection<PackFile> newPacks) throws ParseException, IOException { Collection<PackFile> newPacks) throws ParseException, IOException {
HashSet<ObjectId> ids = new HashSet<>();
for (PackFile pack : newPacks) {
for (PackIndex.MutableEntry entry : pack) {
ids.add(entry.toObjectId());
}
}
ObjectReader reader = repo.newObjectReader();
ObjectDirectory dir = repo.getObjectDatabase();
ObjectDirectoryInserter inserter = dir.newInserter();
boolean shouldLoosen = getExpireDate() < Long.MAX_VALUE;
prunePreserved(); prunePreserved();
long packExpireDate = getPackExpireDate(); long packExpireDate = getPackExpireDate();
oldPackLoop: for (PackFile oldPack : oldPacks) { oldPackLoop: for (PackFile oldPack : oldPacks) {
@ -237,6 +275,9 @@ public class GC {
&& repo.getFS().lastModified( && repo.getFS().lastModified(
oldPack.getPackFile()) < packExpireDate) { oldPack.getPackFile()) < packExpireDate) {
oldPack.close(); oldPack.close();
if (shouldLoosen) {
loosen(inserter, reader, oldPack, ids);
}
prunePack(oldName); prunePack(oldName);
} }
} }

34
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/ObjectDirectoryInserter.java

@ -86,34 +86,56 @@ class ObjectDirectoryInserter extends ObjectInserter {
@Override @Override
public ObjectId insert(int type, byte[] data, int off, int len) public ObjectId insert(int type, byte[] data, int off, int len)
throws IOException { throws IOException {
return insert(type, data, off, len, false);
}
/**
* Insert a loose object into the database. If createDuplicate is
* true, write the loose object even if we already have it in the
* loose or packed ODB.
*/
private ObjectId insert(
int type, byte[] data, int off, int len, boolean createDuplicate)
throws IOException {
ObjectId id = idFor(type, data, off, len); ObjectId id = idFor(type, data, off, len);
if (db.has(id)) { if (!createDuplicate && db.has(id)) {
return id; return id;
} else { } else {
File tmp = toTemp(type, data, off, len); File tmp = toTemp(type, data, off, len);
return insertOneObject(tmp, id); return insertOneObject(tmp, id, createDuplicate);
} }
} }
@Override @Override
public ObjectId insert(final int type, long len, final InputStream is) public ObjectId insert(final int type, long len, final InputStream is)
throws IOException { throws IOException {
return insert(type, len, is, false);
}
/**
* Insert a loose object into the database. If createDuplicate is
* true, write the loose object even if we already have it in the
* loose or packed ODB.
*/
ObjectId insert(int type, long len, InputStream is, boolean createDuplicate)
throws IOException {
if (len <= buffer().length) { if (len <= buffer().length) {
byte[] buf = buffer(); byte[] buf = buffer();
int actLen = IO.readFully(is, buf, 0); int actLen = IO.readFully(is, buf, 0);
return insert(type, buf, 0, actLen); return insert(type, buf, 0, actLen, createDuplicate);
} else { } else {
MessageDigest md = digest(); MessageDigest md = digest();
File tmp = toTemp(md, type, len, is); File tmp = toTemp(md, type, len, is);
ObjectId id = ObjectId.fromRaw(md.digest()); ObjectId id = ObjectId.fromRaw(md.digest());
return insertOneObject(tmp, id); return insertOneObject(tmp, id, createDuplicate);
} }
} }
private ObjectId insertOneObject(final File tmp, final ObjectId id) private ObjectId insertOneObject(
File tmp, ObjectId id, boolean createDuplicate)
throws IOException, ObjectWritingException { throws IOException, ObjectWritingException {
switch (db.insertUnpackedObject(tmp, id, false /* no duplicate */)) { switch (db.insertUnpackedObject(tmp, id, createDuplicate)) {
case INSERTED: case INSERTED:
case EXISTS_PACKED: case EXISTS_PACKED:
case EXISTS_LOOSE: case EXISTS_LOOSE:

Loading…
Cancel
Save