Browse Source

Update DfsGarbageCollector to not read back a pack index.

Previously, the Dfs GC excluded objects from packs by passing a
previously written index to the PackWriter. Reading back a file on
Dfs is slow. Instead, allow the PackWriter to expose the objects
included in a pack and forward that to invocations of excludeObjects() .

Change-Id: I377cb4ab07f62cf790505e1eeb0b2efe81897c79
stable-2.3
Colby Ranger 12 years ago
parent
commit
7c58f6282a
  1. 18
      org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java
  2. 1
      org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
  3. 1
      org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
  4. 30
      org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java
  5. 21
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java
  6. 50
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

18
org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackWriterTest.java

@ -66,6 +66,7 @@ import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.junit.JGitTestUtil;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.junit.TestRepository.BranchBuilder;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
@ -77,6 +78,7 @@ import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.PackIndex.MutableEntry;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet;
import org.eclipse.jgit.transport.PackParser;
import org.junit.After;
import org.junit.Before;
@ -463,7 +465,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
RevCommit c1 = bb.commit().add("f", contentA).create();
testRepo.getRevWalk().parseHeaders(c1);
PackIndex pf1 = writePack(repo, Collections.singleton(c1),
Collections.<PackIndex> emptySet());
Collections.<ObjectIdSet> emptySet());
assertContent(
pf1,
Arrays.asList(c1.getId(), c1.getTree().getId(),
@ -472,7 +474,7 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
RevCommit c2 = bb.commit().add("f", contentB).create();
testRepo.getRevWalk().parseHeaders(c2);
PackIndex pf2 = writePack(repo, Collections.singleton(c2),
Collections.singleton(pf1));
Collections.singleton(objectIdSet(pf1)));
assertContent(
pf2,
Arrays.asList(c2.getId(), c2.getTree().getId(),
@ -490,12 +492,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
}
private static PackIndex writePack(FileRepository repo,
Set<? extends ObjectId> want, Set<PackIndex> excludeObjects)
Set<? extends ObjectId> want, Set<ObjectIdSet> excludeObjects)
throws IOException {
PackWriter pw = new PackWriter(repo);
pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(false);
for (PackIndex idx : excludeObjects)
for (ObjectIdSet idx : excludeObjects)
pw.excludeObjects(idx);
pw.preparePack(NullProgressMonitor.INSTANCE, want,
Collections.<ObjectId> emptySet());
@ -668,4 +670,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase {
assertEquals(objectsOrder[i++].toObjectId(), me.toObjectId());
}
}
private static ObjectIdSet objectIdSet(final PackIndex idx) {
return new ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return idx.hasObject(objectId);
}
};
}
}

1
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties

@ -30,6 +30,7 @@ blobNotFound=Blob not found: {0}
blobNotFoundForPath=Blob not found: {0} for path: {1}
branchNameInvalid=Branch name {0} is not allowed
cachedPacksPreventsIndexCreation=Using cached packs prevents index creation
cachedPacksPreventsListingObjects=Using cached packs prevents listing objects
cannotBeCombined=Cannot be combined.
cannotBeRecursiveWhenTreesAreIncluded=TreeWalk shouldn't be recursive when tree objects are included.
cannotCombineSquashWithNoff=Cannot combine --squash with --no-ff.

1
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java

@ -91,6 +91,7 @@ public class JGitText extends TranslationBundle {
/***/ public String blobNotFoundForPath;
/***/ public String branchNameInvalid;
/***/ public String cachedPacksPreventsIndexCreation;
/***/ public String cachedPacksPreventsListingObjects;
/***/ public String cannotBeCombined;
/***/ public String cannotBeRecursiveWhenTreesAreIncluded;
/***/ public String cannotCombineSquashWithNoff;

30
org.eclipse.jgit/src/org/eclipse/jgit/storage/dfs/DfsGarbageCollector.java

@ -82,7 +82,7 @@ public class DfsGarbageCollector {
private final List<PackWriter.Statistics> newPackStats;
private final List<DfsPackFile> newPackList;
private final List<PackWriter.ObjectIdSet> newPackObj;
private DfsReader ctx;
@ -116,7 +116,7 @@ public class DfsGarbageCollector {
objdb = repo.getObjectDatabase();
newPackDesc = new ArrayList<DfsPackDescription>(4);
newPackStats = new ArrayList<PackWriter.Statistics>(4);
newPackList = new ArrayList<DfsPackFile>(4);
newPackObj = new ArrayList<PackWriter.ObjectIdSet>(4);
packConfig = new PackConfig(repo);
packConfig.setIndexVersion(2);
@ -244,8 +244,8 @@ public class DfsGarbageCollector {
PackWriter pw = newPackWriter();
try {
for (DfsPackFile pack : newPackList)
pw.excludeObjects(pack.getPackIndex(ctx));
for (PackWriter.ObjectIdSet packedObjs : newPackObj)
pw.excludeObjects(packedObjs);
pw.preparePack(pm, nonHeads, allHeads);
if (0 < pw.getObjectCount())
writePack(GC, pw, pm);
@ -259,10 +259,6 @@ public class DfsGarbageCollector {
return;
// TODO(sop) This is ugly. The garbage pack needs to be deleted.
List<PackIndex> newIdx = new ArrayList<PackIndex>(newPackList.size());
for (DfsPackFile pack : newPackList)
newIdx.add(pack.getPackIndex(ctx));
PackWriter pw = newPackWriter();
try {
RevWalk pool = new RevWalk(ctx);
@ -272,7 +268,7 @@ public class DfsGarbageCollector {
for (PackIndex.MutableEntry ent : oldIdx) {
pm.update(1);
ObjectId id = ent.toObjectId();
if (pool.lookupOrNull(id) != null || anyIndexHas(newIdx, id))
if (pool.lookupOrNull(id) != null || anyPackHas(id))
continue;
int type = oldPack.getObjectType(ctx, ent.getOffset());
@ -287,9 +283,9 @@ public class DfsGarbageCollector {
}
}
private static boolean anyIndexHas(List<PackIndex> list, AnyObjectId id) {
for (PackIndex idx : list)
if (idx.hasObject(id))
private boolean anyPackHas(AnyObjectId id) {
for (PackWriter.ObjectIdSet packedObjs : newPackObj)
if (packedObjs.contains(id))
return true;
return false;
}
@ -336,6 +332,13 @@ public class DfsGarbageCollector {
out.close();
}
final List<ObjectId> packedObjs = pw.getObjectList();
newPackObj.add(new PackWriter.ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return 0 <= Collections.binarySearch(packedObjs, objectId);
}
});
PackWriter.Statistics stats = pw.getStatistics();
pack.setPackStats(stats);
pack.setFileSize(PACK, stats.getTotalBytes());
@ -343,7 +346,8 @@ public class DfsGarbageCollector {
pack.setDeltaCount(stats.getTotalDeltas());
objectsPacked += stats.getTotalObjects();
newPackStats.add(stats);
newPackList.add(DfsBlockCache.getInstance().getOrCreate(pack, null));
DfsBlockCache.getInstance().getOrCreate(pack, null);
return pack;
}
}

21
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/GC.java

@ -70,6 +70,7 @@ import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.errors.NoWorkTreeException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@ -83,6 +84,7 @@ import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.storage.pack.PackWriter.ObjectIdSet;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
import org.eclipse.jgit.util.FileUtils;
@ -498,10 +500,10 @@ public class GC {
tagTargets.add(ref.getPeeledObjectId());
}
List<PackIndex> excluded = new LinkedList<PackIndex>();
for (PackFile f : repo.getObjectDatabase().getPacks())
List<ObjectIdSet> excluded = new LinkedList<ObjectIdSet>();
for (final PackFile f : repo.getObjectDatabase().getPacks())
if (f.shouldBeKept())
excluded.add(f.getIndex());
excluded.add(objectIdSet(f.getIndex()));
tagTargets.addAll(allHeads);
nonHeads.addAll(indexObjects);
@ -513,7 +515,7 @@ public class GC {
tagTargets, excluded);
if (heads != null) {
ret.add(heads);
excluded.add(0, heads.getIndex());
excluded.add(0, objectIdSet(heads.getIndex()));
}
}
if (!nonHeads.isEmpty()) {
@ -632,7 +634,7 @@ public class GC {
private PackFile writePack(Set<? extends ObjectId> want,
Set<? extends ObjectId> have, Set<ObjectId> tagTargets,
List<PackIndex> excludeObjects) throws IOException {
List<ObjectIdSet> excludeObjects) throws IOException {
File tmpPack = null;
File tmpIdx = null;
PackWriter pw = new PackWriter(repo);
@ -643,7 +645,7 @@ public class GC {
if (tagTargets != null)
pw.setTagTargets(tagTargets);
if (excludeObjects != null)
for (PackIndex idx : excludeObjects)
for (ObjectIdSet idx : excludeObjects)
pw.excludeObjects(idx);
pw.preparePack(pm, want, have);
if (pw.getObjectCount() == 0)
@ -855,4 +857,11 @@ public class GC {
expireAgeMillis = -1;
}
private static ObjectIdSet objectIdSet(final PackIndex idx) {
return new ObjectIdSet() {
public boolean contains(AnyObjectId objectId) {
return idx.hasObject(objectId);
}
};
}
}

50
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -103,7 +103,6 @@ import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevTag;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.TemporaryBuffer;
@ -144,6 +143,18 @@ import org.eclipse.jgit.util.TemporaryBuffer;
public class PackWriter {
private static final int PACK_VERSION_GENERATED = 2;
/** A collection of object ids. */
public interface ObjectIdSet {
/**
* Returns true if the objectId is contained within the collection.
*
* @param objectId
* the objectId to find
* @return whether the collection contains the objectId.
*/
boolean contains(AnyObjectId objectId);
}
private static final Map<WeakReference<PackWriter>, Boolean> instances =
new ConcurrentHashMap<WeakReference<PackWriter>, Boolean>();
@ -206,9 +217,9 @@ public class PackWriter {
private Set<ObjectId> tagTargets = Collections.emptySet();
private PackIndex[] excludeInPacks;
private ObjectIdSet[] excludeInPacks;
private PackIndex excludeInPackLast;
private ObjectIdSet excludeInPackLast;
private Deflater myDeflater;
@ -507,19 +518,40 @@ public class PackWriter {
return stats.totalObjects;
}
/**
* Returns the object ids in the pack file that was created by this writer,
* sorted by name.
*
* This method can only be invoked after
* {@link #writePack(ProgressMonitor, ProgressMonitor, OutputStream)} has
* been invoked and completed successfully.
*
* @return number of objects in pack.
* @throws IOException
* a cached pack cannot supply its object ids.
*/
public List<ObjectId> getObjectList() throws IOException {
if (!cachedPacks.isEmpty())
throw new IOException(
JGitText.get().cachedPacksPreventsListingObjects);
return Collections.unmodifiableList(
(List<? extends ObjectId>) sortByName());
}
/**
* Add a pack index whose contents should be excluded from the result.
*
* @param idx
* objects in this index will not be in the output pack.
*/
public void excludeObjects(PackIndex idx) {
public void excludeObjects(ObjectIdSet idx) {
if (excludeInPacks == null) {
excludeInPacks = new PackIndex[] { idx };
excludeInPacks = new ObjectIdSet[] { idx };
excludeInPackLast = idx;
} else {
int cnt = excludeInPacks.length;
PackIndex[] newList = new PackIndex[cnt + 1];
ObjectIdSet[] newList = new ObjectIdSet[cnt + 1];
System.arraycopy(excludeInPacks, 0, newList, 0, cnt);
newList[cnt] = idx;
excludeInPacks = newList;
@ -1798,10 +1830,10 @@ public class PackWriter {
private boolean exclude(AnyObjectId objectId) {
if (excludeInPacks == null)
return false;
if (excludeInPackLast.hasObject(objectId))
if (excludeInPackLast.contains(objectId))
return true;
for (PackIndex idx : excludeInPacks) {
if (idx.hasObject(objectId)) {
for (ObjectIdSet idx : excludeInPacks) {
if (idx.contains(objectId)) {
excludeInPackLast = idx;
return true;
}

Loading…
Cancel
Save