diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/IntSetTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/IntSetTest.java new file mode 100644 index 000000000..f8838f8b5 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/IntSetTest.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2011, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +import static org.junit.Assert.*; + +import org.junit.Test; + +public class IntSetTest { + @Test + public void testAdd() { + IntSet s = new IntSet(); + + assertTrue(s.add(1)); + assertFalse(s.add(1)); + + for (int i = 2; i < 64; i++) + assertTrue(s.add(i)); + for (int i = 2; i < 64; i++) + assertFalse(s.add(i)); + + assertTrue(s.add(-1)); + assertFalse(s.add(-1)); + + assertTrue(s.add(-2)); + assertFalse(s.add(-2)); + + assertTrue(s.add(128)); + assertFalse(s.add(128)); + + assertFalse(s.add(1)); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java index aeba3160d..5d93126a9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java @@ -85,6 +85,8 @@ public class ObjectWalk extends RevWalk { */ private static final int IN_PENDING = RevWalk.REWRITE; + private static final byte[] EMPTY_PATH = {}; + private CanonicalTreeParser treeWalk; private List rootObjects; @@ -238,10 +240,8 @@ public class ObjectWalk extends RevWalk { return null; if ((r.flags & UNINTERESTING) != 0) { markTreeUninteresting(r.getTree()); - if (hasRevSort(RevSort.BOUNDARY)) { - pendingObjects.add(r.getTree()); + if (hasRevSort(RevSort.BOUNDARY)) return r; - } continue; } if (firstCommit == null) @@ -416,6 +416,16 @@ public class ObjectWalk extends RevWalk { return last != null ? treeWalk.getEntryPathHashCode() : 0; } + /** @return the internal buffer holding the current path. */ + public byte[] getPathBuffer() { + return last != null ? treeWalk.getEntryPathBuffer() : EMPTY_PATH; + } + + /** @return length of the path in {@link #getPathBuffer()}. */ + public int getPathLength() { + return last != null ? treeWalk.getEntryPathLength() : 0; + } + @Override public void dispose() { super.dispose(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java new file mode 100644 index 000000000..785b8eb69 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2011, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; +import static org.eclipse.jgit.lib.Constants.OBJ_TREE; + +import java.io.IOException; +import java.util.Set; + +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.MutableObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectIdSubclassMap; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.revwalk.RevTree; +import org.eclipse.jgit.treewalk.CanonicalTreeParser; + +class BaseSearch { + private static final int M_BLOB = FileMode.REGULAR_FILE.getBits(); + + private static final int M_TREE = FileMode.TREE.getBits(); + + private final ProgressMonitor progress; + + private final ObjectReader reader; + + private final ObjectId[] baseTrees; + + private final ObjectIdSubclassMap edgeObjects; + + private final IntSet alreadyProcessed; + + private final ObjectIdSubclassMap treeCache; + + private final CanonicalTreeParser parser; + + private final MutableObjectId idBuf; + + BaseSearch(ProgressMonitor countingMonitor, Set bases, + ObjectIdSubclassMap edges, ObjectReader or) { + progress = countingMonitor; + reader = or; + baseTrees = bases.toArray(new ObjectId[bases.size()]); + edgeObjects = edges; + + alreadyProcessed = new IntSet(); + treeCache = new ObjectIdSubclassMap(); + parser = new CanonicalTreeParser(); + idBuf = new MutableObjectId(); + } + + void addBase(int objectType, byte[] pathBuf, int pathLen, int pathHash) + throws IOException { + final int tailMode = modeForType(objectType); + if (tailMode == 0) + return; + + if (!alreadyProcessed.add(pathHash)) + return; + + if (pathLen == 0) { + for (ObjectId root : baseTrees) + add(root, OBJ_TREE, pathHash); + return; + } + + final int firstSlash = nextSlash(pathBuf, 0, pathLen); + + CHECK_BASE: for (ObjectId root : baseTrees) { + int ptr = 0; + int end = firstSlash; + int mode = end != pathLen ? M_TREE : tailMode; + + parser.reset(readTree(root)); + while (!parser.eof()) { + int cmp = parser.pathCompare(pathBuf, ptr, end, mode); + + if (cmp < 0) { + parser.next(); + continue; + } + + if (cmp > 0) + continue CHECK_BASE; + + if (end == pathLen) { + if (parser.getEntryFileMode().getObjectType() == objectType) { + idBuf.fromRaw(parser.idBuffer(), parser.idOffset()); + add(idBuf, objectType, pathHash); + } + continue CHECK_BASE; + } + + if (!FileMode.TREE.equals(parser.getEntryRawMode())) + continue CHECK_BASE; + + ptr = end + 1; + end = nextSlash(pathBuf, ptr, pathLen); + mode = end != pathLen ? M_TREE : tailMode; + + idBuf.fromRaw(parser.idBuffer(), parser.idOffset()); + parser.reset(readTree(idBuf)); + } + } + } + + private static int modeForType(int typeCode) { + switch (typeCode) { + case OBJ_TREE: + return M_TREE; + + case OBJ_BLOB: + return M_BLOB; + + default: + return 0; + } + } + + private static int nextSlash(byte[] pathBuf, int ptr, int end) { + while (ptr < end && pathBuf[ptr] != '/') + ptr++; + return ptr; + } + + private void add(AnyObjectId id, int objectType, int pathHash) { + ObjectToPack obj = new ObjectToPack(id, objectType); + obj.setEdge(); + obj.setPathHash(pathHash); + + if (edgeObjects.addIfAbsent(obj) == obj) + progress.update(1); + } + + private byte[] readTree(AnyObjectId id) throws MissingObjectException, + IncorrectObjectTypeException, IOException { + TreeWithData tree = treeCache.get(id); + if (tree != null) + return tree.buf; + + ObjectLoader ldr = reader.open(id, OBJ_TREE); + byte[] buf = ldr.getCachedBytes(Integer.MAX_VALUE); + treeCache.add(new TreeWithData(id, buf)); + return buf; + } + + private static class TreeWithData extends ObjectId { + final byte[] buf; + + TreeWithData(AnyObjectId id, byte[] buf) { + super(id); + this.buf = buf; + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/IntSet.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/IntSet.java new file mode 100644 index 000000000..edb4c3d71 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/IntSet.java @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2011, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.pack; + +class IntSet { + private int[] set; + + private int cnt; + + IntSet() { + set = new int[64]; + } + + boolean add(int key) { + int high = cnt; + int low = 0; + + if (high == 0) { + set[0] = key; + cnt = 1; + return true; + } + + do { + int p = (low + high) >>> 1; + if (key < set[p]) + high = p; + else if (key == set[p]) + return false; + else + low = p + 1; + } while (low < high); + + if (cnt == set.length) { + int[] n = new int[set.length * 2]; + System.arraycopy(set, 0, n, 0, cnt); + set = n; + } + + if (low < cnt) + System.arraycopy(set, low, set, low + 1, cnt - low); + set[low] = key; + cnt++; + return true; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java index af1fe7d57..3f6a091fc 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java @@ -55,8 +55,10 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; @@ -85,9 +87,11 @@ import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.ThreadSafeProgressMonitor; import org.eclipse.jgit.revwalk.AsyncRevObjectQueue; import org.eclipse.jgit.revwalk.ObjectWalk; +import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevFlag; import org.eclipse.jgit.revwalk.RevObject; import org.eclipse.jgit.revwalk.RevSort; +import org.eclipse.jgit.revwalk.RevTree; import org.eclipse.jgit.storage.file.PackIndexWriter; import org.eclipse.jgit.util.TemporaryBuffer; @@ -628,6 +632,10 @@ public class PackWriter { if (cmp != 0) return cmp; + cmp = (a.isEdge() ? 0 : 1) - (b.isEdge() ? 0 : 1); + if (cmp != 0) + return cmp; + return b.getWeight() - a.getWeight(); } }); @@ -1020,14 +1028,31 @@ public class PackWriter { q.release(); } + final int maxBases = config.getDeltaSearchWindowSize(); + Set baseTrees = new HashSet(); RevObject o; - while ((o = walker.next()) != null) { + if (o.has(RevFlag.UNINTERESTING)) { + if (baseTrees.size() <= maxBases) + baseTrees.add(((RevCommit) o).getTree()); + continue; + } addObject(o, 0); countingMonitor.update(1); } + + BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, // + edgeObjects, reader); while ((o = walker.nextObject()) != null) { - addObject(o, walker.getPathHashCode()); + if (o.has(RevFlag.UNINTERESTING)) + continue; + + int pathHash = walker.getPathHashCode(); + byte[] pathBuf = walker.getPathBuffer(); + int pathLen = walker.getPathLength(); + + bases.addBase(o.getType(), pathBuf, pathLen, pathHash); + addObject(o, pathHash); countingMonitor.update(1); } countingMonitor.endTask(); @@ -1047,25 +1072,25 @@ public class PackWriter { */ public void addObject(final RevObject object) throws IncorrectObjectTypeException { - addObject(object, 0); - } - - private void addObject(final RevObject object, final int pathHashCode) - throws IncorrectObjectTypeException { if (object.has(RevFlag.UNINTERESTING)) { switch (object.getType()) { case Constants.OBJ_TREE: case Constants.OBJ_BLOB: ObjectToPack otp = new ObjectToPack(object); - otp.setPathHash(pathHashCode); + otp.setPathHash(0); otp.setEdge(); - edgeObjects.add(otp); + edgeObjects.addIfAbsent(otp); thin = true; break; } return; } + addObject(object, 0); + } + + private void addObject(final RevObject object, final int pathHashCode) + throws IncorrectObjectTypeException { final ObjectToPack otp; if (reuseSupport != null) otp = reuseSupport.newObjectToPack(object); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java index 79b57d1eb..df3dac391 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java @@ -310,29 +310,47 @@ public abstract class AbstractTreeIterator { } int pathCompare(final AbstractTreeIterator p, final int pMode) { - final byte[] a = path; - final byte[] b = p.path; - final int aLen = pathLen; - final int bLen = p.pathLen; - int cPos; - // Its common when we are a subtree for both parents to match; // when this happens everything in path[0..cPos] is known to // be equal and does not require evaluation again. // - cPos = alreadyMatch(this, p); + int cPos = alreadyMatch(this, p); + return pathCompare(p.path, cPos, p.pathLen, pMode, cPos); + } + + /** + * Compare the path of this current entry to a raw buffer. + * + * @param buf + * the raw path buffer. + * @param pos + * position to start reading the raw buffer. + * @param end + * one past the end of the raw buffer (length is end - pos). + * @param mode + * the mode of the path. + * @return -1 if this entry sorts first; 0 if the entries are equal; 1 if + * p's entry sorts first. + */ + public int pathCompare(byte[] buf, int pos, int end, int mode) { + return pathCompare(buf, pos, end, mode, 0); + } + + private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) { + final byte[] a = path; + final int aEnd = pathLen; - for (; cPos < aLen && cPos < bLen; cPos++) { - final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff); + for (; aPos < aEnd && bPos < bEnd; aPos++, bPos++) { + final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff); if (cmp != 0) return cmp; } - if (cPos < aLen) - return (a[cPos] & 0xff) - lastPathChar(pMode); - if (cPos < bLen) - return lastPathChar(mode) - (b[cPos] & 0xff); - return lastPathChar(mode) - lastPathChar(pMode); + if (aPos < aEnd) + return (a[aPos] & 0xff) - lastPathChar(bMode); + if (bPos < bEnd) + return lastPathChar(mode) - (b[bPos] & 0xff); + return lastPathChar(mode) - lastPathChar(bMode); } private static int alreadyMatch(AbstractTreeIterator a, @@ -406,6 +424,16 @@ public abstract class AbstractTreeIterator { return TreeWalk.pathOf(this); } + /** @return the internal buffer holding the current path. */ + public byte[] getEntryPathBuffer() { + return path; + } + + /** @return length of the path in {@link #getEntryPathBuffer()}. */ + public int getEntryPathLength() { + return pathLen; + } + /** * Get the current entry's path hash code. *