diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java new file mode 100644 index 000000000..4ae4eb8fa --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindow.java @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +import java.io.IOException; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.util.TemporaryBuffer; + +class DeltaWindow { + private static final int NEXT_RES = 0; + + private static final int NEXT_SRC = 1; + + private final PackWriter writer; + + private final ObjectReader reader; + + private final DeltaWindowEntry[] window; + + /** Maximum depth we should create for any delta chain. */ + private final int maxDepth; + + // The object we are currently considering needs a lot of state: + + /** Position of {@link #res} within {@link #window} array. */ + private int resSlot; + + /** + * Maximum delta chain depth the current object can have. + *
+ * This can be smaller than {@link #maxDepth}. + */ + private int resMaxDepth; + + /** Window entry of the object we are currently considering. */ + private DeltaWindowEntry res; + + /** If we have a delta for {@link #res}, this is the shortest found yet. */ + private TemporaryBuffer.Heap bestDelta; + + /** If we have {@link #bestDelta}, the window position it was created by. */ + private int bestSlot; + + DeltaWindow(PackWriter pw, ObjectReader or) { + writer = pw; + reader = or; + + // C Git increases the window size supplied by the user by 1. + // We don't know why it does this, but if the user asks for + // window=10, it actually processes with window=11. Because + // the window size has the largest direct impact on the final + // pack file size, we match this odd behavior here to give us + // a better chance of producing a similar sized pack as C Git. + // + // We would prefer to directly honor the user's request since + // PackWriter has a minimum of 2 for the window size, but then + // users might complain that JGit is creating a bigger pack file. + // + window = new DeltaWindowEntry[pw.getDeltaSearchWindowSize() + 1]; + for (int i = 0; i < window.length; i++) + window[i] = new DeltaWindowEntry(); + + maxDepth = pw.getMaxDeltaDepth(); + } + + void search(ProgressMonitor monitor, ObjectToPack[] toSearch, int off, + int cnt) throws IOException { + for (int end = off + cnt; off < end; off++) { + monitor.update(1); + + res = window[resSlot]; + res.set(toSearch[off]); + + if (res.object.isDoNotDelta()) { + // PackWriter marked edge objects with the do-not-delta flag. + // They are the only ones that appear in toSearch with it set, + // but we don't actually want to make a delta for them, just + // need to push them into the window so they can be read by + // other objects coming through. + // + keepInWindow(); + } else { + // Search for a delta for the current window slot. + // + search(); + } + } + } + + private void search() throws IOException { + // TODO(spearce) If the object is used as a base for other + // objects in this pack we should limit the depth we create + // for ourselves to be the remainder of our longest dependent + // chain and the configured maximum depth. This can happen + // when the dependents are being reused out a pack, but we + // cannot be because we are near the edge of a thin pack. + // + resMaxDepth = maxDepth; + + // Loop through the window backwards, considering every entry. + // This lets us look at the bigger objects that came before. + // + for (int srcSlot = prior(resSlot); srcSlot != resSlot; srcSlot = prior(srcSlot)) { + DeltaWindowEntry src = window[srcSlot]; + if (src.empty()) + break; + if (delta(src, srcSlot) == NEXT_RES) { + bestDelta = null; + return; + } + } + + // We couldn't find a suitable delta for this object, but it may + // still be able to act as a base for another one. + // + if (bestDelta == null) { + keepInWindow(); + return; + } + + // Select this best matching delta as the base for the object. + // + ObjectToPack srcObj = window[bestSlot].object; + ObjectToPack resObj = res.object; + if (srcObj.isDoNotDelta()) { + // The source (the delta base) is an edge object outside of the + // pack. Its part of the common base set that the peer already + // has on hand, so we don't want to send it. We have to store + // an ObjectId and *NOT* an ObjectToPack for the base to ensure + // the base isn't included in the outgoing pack file. + // + resObj.setDeltaBase(srcObj.copy()); + } else { + // The base is part of the pack we are sending, so it should be + // a direct pointer to the base. + // + resObj.setDeltaBase(srcObj); + } + resObj.setDeltaDepth(srcObj.getDeltaDepth() + 1); + resObj.clearReuseAsIs(); + + // Discard the cached best result, otherwise it leaks. + // + bestDelta = null; + + // If this should be the end of a chain, don't keep + // it in the window. Just move on to the next object. + // + if (resObj.getDeltaDepth() == maxDepth) + return; + + shuffleBaseUpInPriority(); + keepInWindow(); + } + + private int delta(final DeltaWindowEntry src, final int srcSlot) + throws IOException { + // Objects must use only the same type as their delta base. + // If we are looking at something where that isn't true we + // have exhausted everything of the correct type and should + // move on to the next thing to examine. + // + if (src.type() != res.type()) { + keepInWindow(); + return NEXT_RES; + } + + // Only consider a source with a short enough delta chain. + if (src.depth() > resMaxDepth) + return NEXT_SRC; + + // Estimate a reasonable upper limit on delta size. + int msz = deltaSizeLimit(res, resMaxDepth, src); + if (msz <= 8) + return NEXT_SRC; + + // If we have to insert a lot to make this work, find another. + if (res.size() - src.size() > msz) + return NEXT_SRC; + + // If the sizes are radically different, this is a bad pairing. + if (res.size() < src.size() / 16) + return NEXT_SRC; + + DeltaIndex srcIndex; + try { + srcIndex = index(src); + } catch (LargeObjectException tooBig) { + // If the source is too big to work on, skip it. + dropFromWindow(srcSlot); + return NEXT_SRC; + } catch (IOException notAvailable) { + if (src.object.isDoNotDelta()) { + // This is an edge that is suddenly not available. + dropFromWindow(srcSlot); + return NEXT_SRC; + } else { + throw notAvailable; + } + } + + byte[] resBuf; + try { + resBuf = buffer(res); + } catch (LargeObjectException tooBig) { + // If its too big, move on to another item. + return NEXT_RES; + } + + // If we already have a delta for the current object, abort + // encoding early if this new pairing produces a larger delta. + if (bestDelta != null && bestDelta.length() < msz) + msz = (int) bestDelta.length(); + + TemporaryBuffer.Heap delta = new TemporaryBuffer.Heap(msz); + try { + if (!srcIndex.encode(delta, resBuf, msz)) + return NEXT_SRC; + } catch (IOException deltaTooBig) { + // This only happens when the heap overflows our limit. + return NEXT_SRC; + } + + if (isBetterDelta(src, delta)) { + bestDelta = delta; + bestSlot = srcSlot; + } + + return NEXT_SRC; + } + + private void shuffleBaseUpInPriority() { + // Shuffle the entire window so that the best match we just used + // is at our current index, and our current object is at the index + // before it. Slide any entries in between to make space. + // + window[resSlot] = window[bestSlot]; + + DeltaWindowEntry next = res; + int slot = prior(resSlot); + for (; slot != bestSlot; slot = prior(slot)) { + DeltaWindowEntry e = window[slot]; + window[slot] = next; + next = e; + } + window[slot] = next; + } + + private void keepInWindow() { + if (++resSlot == window.length) + resSlot = 0; + } + + private int prior(int slot) { + if (slot == 0) + return window.length - 1; + return slot - 1; + } + + private void dropFromWindow(@SuppressWarnings("unused") int srcSlot) { + // We should drop the current source entry from the window, + // it is somehow invalid for us to work with. + } + + private boolean isBetterDelta(DeltaWindowEntry src, + TemporaryBuffer.Heap resDelta) { + if (bestDelta == null) + return true; + + // If both delta sequences are the same length, use the one + // that has a shorter delta chain since it would be faster + // to access during reads. + // + if (resDelta.length() == bestDelta.length()) + return src.depth() < window[bestSlot].depth(); + + return resDelta.length() < bestDelta.length(); + } + + private static int deltaSizeLimit(DeltaWindowEntry res, int maxDepth, + DeltaWindowEntry src) { + // Ideally the delta is at least 50% of the original size, + // but we also want to account for delta header overhead in + // the pack file (to point to the delta base) so subtract off + // some of those header bytes from the limit. + // + final int limit = res.size() / 2 - 20; + + // Distribute the delta limit over the entire chain length. + // This is weighted such that deeper items in the chain must + // be even smaller than if they were earlier in the chain, as + // they cost significantly more to unpack due to the increased + // number of recursive unpack calls. + // + final int remainingDepth = maxDepth - src.depth(); + return (limit * remainingDepth) / maxDepth; + } + + private DeltaIndex index(DeltaWindowEntry ent) + throws MissingObjectException, IncorrectObjectTypeException, + IOException, LargeObjectException { + DeltaIndex idx = ent.index; + if (idx == null) { + try { + idx = new DeltaIndex(buffer(ent)); + } catch (OutOfMemoryError noMemory) { + LargeObjectException e = new LargeObjectException(ent.object); + e.initCause(noMemory); + throw e; + } + ent.index = idx; + } + return idx; + } + + private byte[] buffer(DeltaWindowEntry ent) throws MissingObjectException, + IncorrectObjectTypeException, IOException, LargeObjectException { + byte[] buf = ent.buffer; + if (buf == null) + ent.buffer = buf = writer.buffer(reader, ent.object); + return buf; + } +} \ No newline at end of file diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java new file mode 100644 index 000000000..0f1e6329f --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaWindowEntry.java @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +class DeltaWindowEntry { + ObjectToPack object; + + /** Complete contents of this object. Lazily loaded. */ + byte[] buffer; + + /** Index of this object's content, to encode other deltas. Lazily loaded. */ + DeltaIndex index; + + void set(ObjectToPack object) { + this.object = object; + this.index = null; + this.buffer = null; + } + + /** @return current delta chain depth of this object. */ + int depth() { + return object.getDeltaDepth(); + } + + /** @return type of the object in this window entry. */ + int type() { + return object.getType(); + } + + /** @return estimated unpacked size of the object, in bytes . */ + int size() { + return object.getWeight(); + } + + /** @return true if there is no object stored in this entry. */ + boolean empty() { + return object == null; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index d96d5ddfd..f88f2635e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -48,11 +48,14 @@ import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_DELT import static org.eclipse.jgit.storage.pack.StoredObjectRepresentation.PACK_WHOLE; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.security.MessageDigest; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.zip.Deflater; @@ -61,6 +64,7 @@ import java.util.zip.DeflaterOutputStream; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.StoredObjectRepresentationNotAvailableException; import org.eclipse.jgit.lib.AnyObjectId; @@ -78,6 +82,8 @@ import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevSort; import org.eclipse.jgit.storage.file.PackIndexWriter; +import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.TemporaryBuffer; /** *
@@ -716,6 +722,8 @@ public class PackWriter {
if ((reuseDeltas || reuseObjects) && reuseSupport != null)
searchForReuse();
+ if (deltaCompress)
+ searchForDeltas(compressMonitor);
final PackOutputStream out = new PackOutputStream(writeMonitor,
packStream, this);
@@ -745,6 +753,103 @@ public class PackWriter {
}
}
+ private void searchForDeltas(ProgressMonitor monitor)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException {
+ // Commits and annotated tags tend to have too many differences to
+ // really benefit from delta compression. Consequently just don't
+ // bother examining those types here.
+ //
+ ObjectToPack[] list = new ObjectToPack[
+ objectsLists[Constants.OBJ_TREE].size()
+ + objectsLists[Constants.OBJ_BLOB].size()
+ + edgeObjects.size()];
+ int cnt = 0;
+ cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_TREE);
+ cnt = findObjectsNeedingDelta(list, cnt, Constants.OBJ_BLOB);
+ if (cnt == 0)
+ return;
+
+ // Queue up any edge objects that we might delta against. We won't
+ // be sending these as we assume the other side has them, but we need
+ // them in the search phase below.
+ //
+ for (ObjectToPack eo : edgeObjects) {
+ try {
+ if (loadSize(eo))
+ list[cnt++] = eo;
+ } catch (IOException notAvailable) {
+ // Skip this object. Since we aren't going to write it out
+ // the only consequence of it being unavailable to us is we
+ // may produce a larger data stream than we could have.
+ //
+ if (!ignoreMissingUninteresting)
+ throw notAvailable;
+ }
+ }
+
+ monitor.beginTask(COMPRESSING_OBJECTS_PROGRESS, cnt);
+
+ // Sort the objects by path hash so like files are near each other,
+ // and then by size descending so that bigger files are first. This
+ // applies "Linus' Law" which states that newer files tend to be the
+ // bigger ones, because source files grow and hardly ever shrink.
+ //
+ Arrays.sort(list, 0, cnt, new Comparator