Browse Source

PackWriter: Make thin packs more efficient

There is no point in pushing all of the files within the edge
commits into the delta search when making a thin pack.  This floods
the delta search window with objects that are unlikely to be useful
bases for the objects that will be written out, resulting in lower
data compression and higher transfer sizes.

Instead observe the path of a tree or blob that is being pushed
into the outgoing set, and use that path to locate up to WINDOW
ancestor versions from the edge commits.  Push only those objects
into the edgeObjects set, reducing the number of objects seen by the
search window.  This allows PackWriter to only look at ancestors
for the modified files, rather than all files in the project.
Limiting the search to WINDOW size makes sense, because more than
WINDOW edge objects will just skip through the window search as
none of them need to be delta compressed.

To further improve compression, sort edge objects into the front
of the window list, rather than randomly throughout.  This puts
non-edges later in the window and gives them a better chance at
finding their base, since they search backwards through the window.

These changes make a significant difference in the thin-pack:

  Before:
    remote: Counting objects: 144190, done
    remote: Finding sources: 100% (50275/50275)
    remote: Getting sizes: 100% (101405/101405)
    remote: Compressing objects: 100% (7587/7587)
    Receiving objects: 100% (50275/50275), 24.67 MiB | 9.90 MiB/s, done.
    Resolving deltas: 100% (40339/40339), completed with 2218 local objects.

    real    0m30.267s

  After:
    remote: Counting objects: 61549, done
    remote: Finding sources: 100% (50275/50275)
    remote: Getting sizes: 100% (18862/18862)
    remote: Compressing objects: 100% (7588/7588)
    Receiving objects: 100% (50275/50275), 11.04 MiB | 3.51 MiB/s, done.
    Resolving deltas: 100% (43160/43160), completed with 5014 local objects.

    real    0m22.170s

The resulting pack is 13.63 MiB smaller, even though it contains the
same exact objects.  82,543 fewer objects had to have their sizes
looked up, which saved about 8s of server CPU time.  2,796 more
objects from the client were used as part of the base object set,
which contributed to the smaller transfer size.

Change-Id: Id01271950432c6960897495b09deab70e33993a9
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Sigend-off-by: Chris Aniszczyk <caniszczyk@gmail.com>
stable-0.11
Shawn O. Pearce 14 years ago committed by Chris Aniszczyk
parent
commit
13bcf05a9e
  1. 74
      org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/IntSetTest.java
  2. 16
      org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java
  3. 202
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java
  4. 87
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/IntSet.java
  5. 43
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java
  6. 56
      org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java

74
org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/IntSetTest.java

@ -0,0 +1,74 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
import static org.junit.Assert.*;
import org.junit.Test;
public class IntSetTest {
@Test
public void testAdd() {
IntSet s = new IntSet();
assertTrue(s.add(1));
assertFalse(s.add(1));
for (int i = 2; i < 64; i++)
assertTrue(s.add(i));
for (int i = 2; i < 64; i++)
assertFalse(s.add(i));
assertTrue(s.add(-1));
assertFalse(s.add(-1));
assertTrue(s.add(-2));
assertFalse(s.add(-2));
assertTrue(s.add(128));
assertFalse(s.add(128));
assertFalse(s.add(1));
}
}

16
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java

@ -85,6 +85,8 @@ public class ObjectWalk extends RevWalk {
*/
private static final int IN_PENDING = RevWalk.REWRITE;
private static final byte[] EMPTY_PATH = {};
private CanonicalTreeParser treeWalk;
private List<RevObject> rootObjects;
@ -238,10 +240,8 @@ public class ObjectWalk extends RevWalk {
return null;
if ((r.flags & UNINTERESTING) != 0) {
markTreeUninteresting(r.getTree());
if (hasRevSort(RevSort.BOUNDARY)) {
pendingObjects.add(r.getTree());
if (hasRevSort(RevSort.BOUNDARY))
return r;
}
continue;
}
if (firstCommit == null)
@ -416,6 +416,16 @@ public class ObjectWalk extends RevWalk {
return last != null ? treeWalk.getEntryPathHashCode() : 0;
}
/** @return the internal buffer holding the current path. */
public byte[] getPathBuffer() {
return last != null ? treeWalk.getEntryPathBuffer() : EMPTY_PATH;
}
/** @return length of the path in {@link #getPathBuffer()}. */
public int getPathLength() {
return last != null ? treeWalk.getEntryPathLength() : 0;
}
@Override
public void dispose() {
super.dispose();

202
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java

@ -0,0 +1,202 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import java.io.IOException;
import java.util.Set;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.MutableObjectId;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSubclassMap;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.treewalk.CanonicalTreeParser;
class BaseSearch {
private static final int M_BLOB = FileMode.REGULAR_FILE.getBits();
private static final int M_TREE = FileMode.TREE.getBits();
private final ProgressMonitor progress;
private final ObjectReader reader;
private final ObjectId[] baseTrees;
private final ObjectIdSubclassMap<ObjectToPack> edgeObjects;
private final IntSet alreadyProcessed;
private final ObjectIdSubclassMap<TreeWithData> treeCache;
private final CanonicalTreeParser parser;
private final MutableObjectId idBuf;
BaseSearch(ProgressMonitor countingMonitor, Set<RevTree> bases,
ObjectIdSubclassMap<ObjectToPack> edges, ObjectReader or) {
progress = countingMonitor;
reader = or;
baseTrees = bases.toArray(new ObjectId[bases.size()]);
edgeObjects = edges;
alreadyProcessed = new IntSet();
treeCache = new ObjectIdSubclassMap<TreeWithData>();
parser = new CanonicalTreeParser();
idBuf = new MutableObjectId();
}
void addBase(int objectType, byte[] pathBuf, int pathLen, int pathHash)
throws IOException {
final int tailMode = modeForType(objectType);
if (tailMode == 0)
return;
if (!alreadyProcessed.add(pathHash))
return;
if (pathLen == 0) {
for (ObjectId root : baseTrees)
add(root, OBJ_TREE, pathHash);
return;
}
final int firstSlash = nextSlash(pathBuf, 0, pathLen);
CHECK_BASE: for (ObjectId root : baseTrees) {
int ptr = 0;
int end = firstSlash;
int mode = end != pathLen ? M_TREE : tailMode;
parser.reset(readTree(root));
while (!parser.eof()) {
int cmp = parser.pathCompare(pathBuf, ptr, end, mode);
if (cmp < 0) {
parser.next();
continue;
}
if (cmp > 0)
continue CHECK_BASE;
if (end == pathLen) {
if (parser.getEntryFileMode().getObjectType() == objectType) {
idBuf.fromRaw(parser.idBuffer(), parser.idOffset());
add(idBuf, objectType, pathHash);
}
continue CHECK_BASE;
}
if (!FileMode.TREE.equals(parser.getEntryRawMode()))
continue CHECK_BASE;
ptr = end + 1;
end = nextSlash(pathBuf, ptr, pathLen);
mode = end != pathLen ? M_TREE : tailMode;
idBuf.fromRaw(parser.idBuffer(), parser.idOffset());
parser.reset(readTree(idBuf));
}
}
}
private static int modeForType(int typeCode) {
switch (typeCode) {
case OBJ_TREE:
return M_TREE;
case OBJ_BLOB:
return M_BLOB;
default:
return 0;
}
}
private static int nextSlash(byte[] pathBuf, int ptr, int end) {
while (ptr < end && pathBuf[ptr] != '/')
ptr++;
return ptr;
}
private void add(AnyObjectId id, int objectType, int pathHash) {
ObjectToPack obj = new ObjectToPack(id, objectType);
obj.setEdge();
obj.setPathHash(pathHash);
if (edgeObjects.addIfAbsent(obj) == obj)
progress.update(1);
}
private byte[] readTree(AnyObjectId id) throws MissingObjectException,
IncorrectObjectTypeException, IOException {
TreeWithData tree = treeCache.get(id);
if (tree != null)
return tree.buf;
ObjectLoader ldr = reader.open(id, OBJ_TREE);
byte[] buf = ldr.getCachedBytes(Integer.MAX_VALUE);
treeCache.add(new TreeWithData(id, buf));
return buf;
}
private static class TreeWithData extends ObjectId {
final byte[] buf;
TreeWithData(AnyObjectId id, byte[] buf) {
super(id);
this.buf = buf;
}
}
}

87
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/IntSet.java

@ -0,0 +1,87 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
class IntSet {
private int[] set;
private int cnt;
IntSet() {
set = new int[64];
}
boolean add(int key) {
int high = cnt;
int low = 0;
if (high == 0) {
set[0] = key;
cnt = 1;
return true;
}
do {
int p = (low + high) >>> 1;
if (key < set[p])
high = p;
else if (key == set[p])
return false;
else
low = p + 1;
} while (low < high);
if (cnt == set.length) {
int[] n = new int[set.length * 2];
System.arraycopy(set, 0, n, 0, cnt);
set = n;
}
if (low < cnt)
System.arraycopy(set, low, set, low + 1, cnt - low);
set[low] = key;
cnt++;
return true;
}
}

43
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -55,8 +55,10 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
@ -85,9 +87,11 @@ import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.ThreadSafeProgressMonitor;
import org.eclipse.jgit.revwalk.AsyncRevObjectQueue;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.storage.file.PackIndexWriter;
import org.eclipse.jgit.util.TemporaryBuffer;
@ -628,6 +632,10 @@ public class PackWriter {
if (cmp != 0)
return cmp;
cmp = (a.isEdge() ? 0 : 1) - (b.isEdge() ? 0 : 1);
if (cmp != 0)
return cmp;
return b.getWeight() - a.getWeight();
}
});
@ -1020,14 +1028,31 @@ public class PackWriter {
q.release();
}
final int maxBases = config.getDeltaSearchWindowSize();
Set<RevTree> baseTrees = new HashSet<RevTree>();
RevObject o;
while ((o = walker.next()) != null) {
if (o.has(RevFlag.UNINTERESTING)) {
if (baseTrees.size() <= maxBases)
baseTrees.add(((RevCommit) o).getTree());
continue;
}
addObject(o, 0);
countingMonitor.update(1);
}
BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
edgeObjects, reader);
while ((o = walker.nextObject()) != null) {
addObject(o, walker.getPathHashCode());
if (o.has(RevFlag.UNINTERESTING))
continue;
int pathHash = walker.getPathHashCode();
byte[] pathBuf = walker.getPathBuffer();
int pathLen = walker.getPathLength();
bases.addBase(o.getType(), pathBuf, pathLen, pathHash);
addObject(o, pathHash);
countingMonitor.update(1);
}
countingMonitor.endTask();
@ -1047,25 +1072,25 @@ public class PackWriter {
*/
public void addObject(final RevObject object)
throws IncorrectObjectTypeException {
addObject(object, 0);
}
private void addObject(final RevObject object, final int pathHashCode)
throws IncorrectObjectTypeException {
if (object.has(RevFlag.UNINTERESTING)) {
switch (object.getType()) {
case Constants.OBJ_TREE:
case Constants.OBJ_BLOB:
ObjectToPack otp = new ObjectToPack(object);
otp.setPathHash(pathHashCode);
otp.setPathHash(0);
otp.setEdge();
edgeObjects.add(otp);
edgeObjects.addIfAbsent(otp);
thin = true;
break;
}
return;
}
addObject(object, 0);
}
private void addObject(final RevObject object, final int pathHashCode)
throws IncorrectObjectTypeException {
final ObjectToPack otp;
if (reuseSupport != null)
otp = reuseSupport.newObjectToPack(object);

56
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java

@ -310,29 +310,47 @@ public abstract class AbstractTreeIterator {
}
int pathCompare(final AbstractTreeIterator p, final int pMode) {
final byte[] a = path;
final byte[] b = p.path;
final int aLen = pathLen;
final int bLen = p.pathLen;
int cPos;
// Its common when we are a subtree for both parents to match;
// when this happens everything in path[0..cPos] is known to
// be equal and does not require evaluation again.
//
cPos = alreadyMatch(this, p);
int cPos = alreadyMatch(this, p);
return pathCompare(p.path, cPos, p.pathLen, pMode, cPos);
}
/**
* Compare the path of this current entry to a raw buffer.
*
* @param buf
* the raw path buffer.
* @param pos
* position to start reading the raw buffer.
* @param end
* one past the end of the raw buffer (length is end - pos).
* @param mode
* the mode of the path.
* @return -1 if this entry sorts first; 0 if the entries are equal; 1 if
* p's entry sorts first.
*/
public int pathCompare(byte[] buf, int pos, int end, int mode) {
return pathCompare(buf, pos, end, mode, 0);
}
private int pathCompare(byte[] b, int bPos, int bEnd, int bMode, int aPos) {
final byte[] a = path;
final int aEnd = pathLen;
for (; cPos < aLen && cPos < bLen; cPos++) {
final int cmp = (a[cPos] & 0xff) - (b[cPos] & 0xff);
for (; aPos < aEnd && bPos < bEnd; aPos++, bPos++) {
final int cmp = (a[aPos] & 0xff) - (b[bPos] & 0xff);
if (cmp != 0)
return cmp;
}
if (cPos < aLen)
return (a[cPos] & 0xff) - lastPathChar(pMode);
if (cPos < bLen)
return lastPathChar(mode) - (b[cPos] & 0xff);
return lastPathChar(mode) - lastPathChar(pMode);
if (aPos < aEnd)
return (a[aPos] & 0xff) - lastPathChar(bMode);
if (bPos < bEnd)
return lastPathChar(mode) - (b[bPos] & 0xff);
return lastPathChar(mode) - lastPathChar(bMode);
}
private static int alreadyMatch(AbstractTreeIterator a,
@ -406,6 +424,16 @@ public abstract class AbstractTreeIterator {
return TreeWalk.pathOf(this);
}
/** @return the internal buffer holding the current path. */
public byte[] getEntryPathBuffer() {
return path;
}
/** @return length of the path in {@link #getEntryPathBuffer()}. */
public int getEntryPathLength() {
return pathLen;
}
/**
* Get the current entry's path hash code.
* <p>

Loading…
Cancel
Save