From d72416afbb1736d537197c68462a574e86155cbe Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Thu, 4 Apr 2013 12:18:22 -0700 Subject: [PATCH] Optimize DFS object reuse selection code Rewrite this complicated logic to examine each pack file exactly once. This reduces thrashing when there are many large pack files present and the reader needs to locate each object's header. The intermediate temporary list is now smaller, it is bounded to the same length as the input object list. In the prior version of this code the list contained one entry for every representation of every object being packed. Only one representation object is allocated, reducing the overall memory footprint to be approximately one reference per object found in the current pack file (the pointer in the BlockList). This saves considerable working set memory compared to the prior version that made and held onto a new representation for every ObjectToPack. Change-Id: I2c1f18cd6755643ac4c2cf1f23b5464ca9d91b22 --- .../storage/dfs/DfsObjectRepresentation.java | 33 ++----- .../internal/storage/dfs/DfsObjectToPack.java | 10 +++ .../internal/storage/dfs/DfsPackFile.java | 36 ++------ .../jgit/internal/storage/dfs/DfsReader.java | 88 ++++++++----------- .../storage/dfs/LargePackedWholeObject.java | 2 +- 5 files changed, 66 insertions(+), 103 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java index f9efbaeb7..073d94299 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectRepresentation.java @@ -47,34 +47,20 @@ import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE; import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource; -import org.eclipse.jgit.internal.storage.pack.ObjectToPack; import org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation; import org.eclipse.jgit.lib.ObjectId; class DfsObjectRepresentation extends StoredObjectRepresentation { - final ObjectToPack object; - - DfsPackFile pack; - - /** - * Position of {@link #pack} in the reader's pack list. Lower numbers are - * newer/more recent packs and less likely to contain the best format for a - * base object. Higher numbered packs are bigger, more stable, and favored - * by PackWriter when selecting representations... but only if they come - * last in the representation ordering. - */ - int packIndex; - - long offset; - + final DfsPackFile pack; + final int packIndex; int format; - + long offset; long length; - ObjectId baseId; - DfsObjectRepresentation(ObjectToPack object) { - this.object = object; + DfsObjectRepresentation(DfsPackFile pack, int packIndex) { + this.pack = pack; + this.packIndex = packIndex; } @Override @@ -94,10 +80,7 @@ class DfsObjectRepresentation extends StoredObjectRepresentation { @Override public boolean wasDeltaAttempted() { - if (pack != null) { - PackSource source = pack.getPackDescription().getPackSource(); - return source == GC || source == UNREACHABLE_GARBAGE; - } - return false; + PackSource source = pack.getPackDescription().getPackSource(); + return source == GC || source == UNREACHABLE_GARBAGE; } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java index 5dc3be2f4..469f9a773 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsObjectToPack.java @@ -49,6 +49,8 @@ import org.eclipse.jgit.lib.AnyObjectId; /** {@link ObjectToPack} for {@link DfsObjDatabase}. */ class DfsObjectToPack extends ObjectToPack { + private static final int FLAG_FOUND = 1 << 0; + /** Pack to reuse compressed data from, otherwise null. */ DfsPackFile pack; @@ -65,6 +67,14 @@ class DfsObjectToPack extends ObjectToPack { super(src, type); } + final boolean isFound() { + return isExtendedFlag(FLAG_FOUND); + } + + final void setFound() { + setExtendedFlag(FLAG_FOUND); + } + @Override protected void clearReuseAsIs() { super.clearReuseAsIs(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java index e59eda2e3..70d1af2ea 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackFile.java @@ -341,7 +341,7 @@ public final class DfsPackFile { } } - private PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException { + PackReverseIndex getReverseIdx(DfsReader ctx) throws IOException { DfsBlockCache.Ref revref = reverseIndex; if (revref != null) { PackReverseIndex revidx = revref.get(); @@ -431,22 +431,6 @@ public final class DfsPackFile { return idx(ctx).getObjectCount(); } - /** - * Search for object id with the specified start offset in associated pack - * (reverse) index. - * - * @param ctx - * current reader for the calling thread. - * @param offset - * start offset of object to find - * @return object id for this offset, or null if no object was found - * @throws IOException - * the index file cannot be loaded into memory. - */ - ObjectId findObjectForOffset(DfsReader ctx, long offset) throws IOException { - return getReverseIdx(ctx).findObject(offset); - } - private byte[] decompress(long position, int sz, DfsReader ctx) throws IOException, DataFormatException { byte[] dstbuf; @@ -1053,9 +1037,10 @@ public final class DfsPackFile { } } - void representation(DfsReader ctx, DfsObjectRepresentation r) + void representation(DfsObjectRepresentation r, final long pos, + DfsReader ctx, PackReverseIndex rev) throws IOException { - final long pos = r.offset; + r.offset = pos; final byte[] ib = ctx.tempId; readFully(pos, ib, 0, 20, ctx); int c = ib[0] & 0xff; @@ -1064,13 +1049,14 @@ public final class DfsPackFile { while ((c & 0x80) != 0) c = ib[p++] & 0xff; - long len = (getReverseIdx(ctx).findNextOffset(pos, length - 20) - pos); + long len = rev.findNextOffset(pos, length - 20) - pos; switch (typeCode) { case Constants.OBJ_COMMIT: case Constants.OBJ_TREE: case Constants.OBJ_BLOB: case Constants.OBJ_TAG: r.format = StoredObjectRepresentation.PACK_WHOLE; + r.baseId = null; r.length = len - p; return; @@ -1083,21 +1069,17 @@ public final class DfsPackFile { ofs <<= 7; ofs += (c & 127); } - ofs = pos - ofs; r.format = StoredObjectRepresentation.PACK_DELTA; - r.baseId = findObjectForOffset(ctx, ofs); + r.baseId = rev.findObject(pos - ofs); r.length = len - p; return; } case Constants.OBJ_REF_DELTA: { - len -= p; - len -= Constants.OBJECT_ID_LENGTH; readFully(pos + p, ib, 0, 20, ctx); - ObjectId id = ObjectId.fromRaw(ib); r.format = StoredObjectRepresentation.PACK_DELTA; - r.baseId = id; - r.length = len; + r.baseId = ObjectId.fromRaw(ib); + r.length = len - p - 20; return; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java index 3ec5e9360..ca88b5b85 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsReader.java @@ -73,6 +73,8 @@ import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.BitmapIndexImpl; import org.eclipse.jgit.internal.storage.file.PackBitmapIndex; +import org.eclipse.jgit.internal.storage.file.PackIndex; +import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.CachedPack; import org.eclipse.jgit.internal.storage.pack.ObjectReuseAsIs; import org.eclipse.jgit.internal.storage.pack.ObjectToPack; @@ -476,12 +478,9 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { return new DfsObjectToPack(objectId, type); } - private static final Comparator REPRESENTATION_SORT = new Comparator() { - public int compare(DfsObjectRepresentation a, DfsObjectRepresentation b) { - int cmp = a.packIndex - b.packIndex; - if (cmp == 0) - cmp = Long.signum(a.offset - b.offset); - return cmp; + private static final Comparator OFFSET_SORT = new Comparator() { + public int compare(DfsObjectToPack a, DfsObjectToPack b) { + return Long.signum(a.getOffset() - b.getOffset()); } }; @@ -489,56 +488,45 @@ public final class DfsReader extends ObjectReader implements ObjectReuseAsIs { ProgressMonitor monitor, Iterable objects) throws IOException, MissingObjectException { DfsPackFile[] packList = db.getPacks(); - if (packList.length == 0) { - Iterator itr = objects.iterator(); - if (itr.hasNext()) - throw new MissingObjectException(itr.next(), "unknown"); - return; - } - - int objectCount = 0; - int updated = 0; - int posted = 0; - List all = new BlockList(); - for (ObjectToPack otp : objects) { - boolean found = false; - for (int packIndex = 0; packIndex < packList.length; packIndex++) { - DfsPackFile pack = packList[packIndex]; - long p = pack.findOffset(this, otp); - if (0 < p) { - DfsObjectRepresentation r = new DfsObjectRepresentation(otp); - r.pack = pack; - r.packIndex = packIndex; - r.offset = p; - all.add(r); - found = true; + for (int packIndex = 0; packIndex < packList.length; packIndex++) { + DfsPackFile pack = packList[packIndex]; + List tmp = findAllFromPack(pack, objects); + if (tmp.isEmpty()) + continue; + Collections.sort(tmp, OFFSET_SORT); + try { + wantReadAhead = true; + PackReverseIndex rev = pack.getReverseIdx(this); + DfsObjectRepresentation rep = new DfsObjectRepresentation( + pack, + packIndex); + for (DfsObjectToPack otp : tmp) { + pack.representation(rep, otp.getOffset(), this, rev); + otp.setOffset(0); + packer.select(otp, rep); + if (!otp.isFound()) { + otp.setFound(); + monitor.update(1); + } } + } finally { + cancelReadAhead(); } - if (!found) - throw new MissingObjectException(otp, otp.getType()); - if ((++updated & 1) == 1) { - monitor.update(1); // Update by 50%, the other 50% is below. - posted++; - } - objectCount++; } - Collections.sort(all, REPRESENTATION_SORT); + } - try { - wantReadAhead = true; - for (DfsObjectRepresentation r : all) { - r.pack.representation(this, r); - packer.select(r.object, r); - if ((++updated & 1) == 1 && posted < objectCount) { - monitor.update(1); - posted++; - } + private List findAllFromPack(DfsPackFile pack, + Iterable objects) throws IOException { + List tmp = new BlockList(); + PackIndex idx = pack.getPackIndex(this); + for (ObjectToPack otp : objects) { + long p = idx.findOffset(otp); + if (0 < p) { + otp.setOffset(p); + tmp.add((DfsObjectToPack) otp); } - } finally { - cancelReadAhead(); } - if (posted < objectCount) - monitor.update(objectCount - posted); + return tmp; } public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp, diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java index b4f191bb1..4b050c566 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/LargePackedWholeObject.java @@ -109,7 +109,7 @@ final class LargePackedWholeObject extends ObjectLoader { // again and open the stream from that location instead. // try { - ObjectId obj = pack.findObjectForOffset(ctx, objectOffset); + ObjectId obj = pack.getReverseIdx(ctx).findObject(objectOffset); return ctx.open(obj, type).openStream(); } finally { ctx.release();