Browse Source
There is no point in pushing all of the files within the edge commits into the delta search when making a thin pack. This floods the delta search window with objects that are unlikely to be useful bases for the objects that will be written out, resulting in lower data compression and higher transfer sizes. Instead observe the path of a tree or blob that is being pushed into the outgoing set, and use that path to locate up to WINDOW ancestor versions from the edge commits. Push only those objects into the edgeObjects set, reducing the number of objects seen by the search window. This allows PackWriter to only look at ancestors for the modified files, rather than all files in the project. Limiting the search to WINDOW size makes sense, because more than WINDOW edge objects will just skip through the window search as none of them need to be delta compressed. To further improve compression, sort edge objects into the front of the window list, rather than randomly throughout. This puts non-edges later in the window and gives them a better chance at finding their base, since they search backwards through the window. These changes make a significant difference in the thin-pack: Before: remote: Counting objects: 144190, done remote: Finding sources: 100% (50275/50275) remote: Getting sizes: 100% (101405/101405) remote: Compressing objects: 100% (7587/7587) Receiving objects: 100% (50275/50275), 24.67 MiB | 9.90 MiB/s, done. Resolving deltas: 100% (40339/40339), completed with 2218 local objects. real 0m30.267s After: remote: Counting objects: 61549, done remote: Finding sources: 100% (50275/50275) remote: Getting sizes: 100% (18862/18862) remote: Compressing objects: 100% (7588/7588) Receiving objects: 100% (50275/50275), 11.04 MiB | 3.51 MiB/s, done. Resolving deltas: 100% (43160/43160), completed with 5014 local objects. real 0m22.170s The resulting pack is 13.63 MiB smaller, even though it contains the same exact objects. 82,543 fewer objects had to have their sizes looked up, which saved about 8s of server CPU time. 2,796 more objects from the client were used as part of the base object set, which contributed to the smaller transfer size. Change-Id: Id01271950432c6960897495b09deab70e33993a9 Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Sigend-off-by: Chris Aniszczyk <caniszczyk@gmail.com>stable-0.11
Shawn O. Pearce
14 years ago
committed by
Chris Aniszczyk
6 changed files with 452 additions and 26 deletions
@ -0,0 +1,74 @@
|
||||
/* |
||||
* Copyright (C) 2011, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.storage.pack; |
||||
|
||||
import static org.junit.Assert.*; |
||||
|
||||
import org.junit.Test; |
||||
|
||||
public class IntSetTest { |
||||
@Test |
||||
public void testAdd() { |
||||
IntSet s = new IntSet(); |
||||
|
||||
assertTrue(s.add(1)); |
||||
assertFalse(s.add(1)); |
||||
|
||||
for (int i = 2; i < 64; i++) |
||||
assertTrue(s.add(i)); |
||||
for (int i = 2; i < 64; i++) |
||||
assertFalse(s.add(i)); |
||||
|
||||
assertTrue(s.add(-1)); |
||||
assertFalse(s.add(-1)); |
||||
|
||||
assertTrue(s.add(-2)); |
||||
assertFalse(s.add(-2)); |
||||
|
||||
assertTrue(s.add(128)); |
||||
assertFalse(s.add(128)); |
||||
|
||||
assertFalse(s.add(1)); |
||||
} |
||||
} |
@ -0,0 +1,202 @@
|
||||
/* |
||||
* Copyright (C) 2011, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.storage.pack; |
||||
|
||||
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB; |
||||
import static org.eclipse.jgit.lib.Constants.OBJ_TREE; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.Set; |
||||
|
||||
import org.eclipse.jgit.errors.IncorrectObjectTypeException; |
||||
import org.eclipse.jgit.errors.MissingObjectException; |
||||
import org.eclipse.jgit.lib.AnyObjectId; |
||||
import org.eclipse.jgit.lib.FileMode; |
||||
import org.eclipse.jgit.lib.MutableObjectId; |
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.lib.ObjectIdSubclassMap; |
||||
import org.eclipse.jgit.lib.ObjectLoader; |
||||
import org.eclipse.jgit.lib.ObjectReader; |
||||
import org.eclipse.jgit.lib.ProgressMonitor; |
||||
import org.eclipse.jgit.revwalk.RevTree; |
||||
import org.eclipse.jgit.treewalk.CanonicalTreeParser; |
||||
|
||||
class BaseSearch { |
||||
private static final int M_BLOB = FileMode.REGULAR_FILE.getBits(); |
||||
|
||||
private static final int M_TREE = FileMode.TREE.getBits(); |
||||
|
||||
private final ProgressMonitor progress; |
||||
|
||||
private final ObjectReader reader; |
||||
|
||||
private final ObjectId[] baseTrees; |
||||
|
||||
private final ObjectIdSubclassMap<ObjectToPack> edgeObjects; |
||||
|
||||
private final IntSet alreadyProcessed; |
||||
|
||||
private final ObjectIdSubclassMap<TreeWithData> treeCache; |
||||
|
||||
private final CanonicalTreeParser parser; |
||||
|
||||
private final MutableObjectId idBuf; |
||||
|
||||
BaseSearch(ProgressMonitor countingMonitor, Set<RevTree> bases, |
||||
ObjectIdSubclassMap<ObjectToPack> edges, ObjectReader or) { |
||||
progress = countingMonitor; |
||||
reader = or; |
||||
baseTrees = bases.toArray(new ObjectId[bases.size()]); |
||||
edgeObjects = edges; |
||||
|
||||
alreadyProcessed = new IntSet(); |
||||
treeCache = new ObjectIdSubclassMap<TreeWithData>(); |
||||
parser = new CanonicalTreeParser(); |
||||
idBuf = new MutableObjectId(); |
||||
} |
||||
|
||||
void addBase(int objectType, byte[] pathBuf, int pathLen, int pathHash) |
||||
throws IOException { |
||||
final int tailMode = modeForType(objectType); |
||||
if (tailMode == 0) |
||||
return; |
||||
|
||||
if (!alreadyProcessed.add(pathHash)) |
||||
return; |
||||
|
||||
if (pathLen == 0) { |
||||
for (ObjectId root : baseTrees) |
||||
add(root, OBJ_TREE, pathHash); |
||||
return; |
||||
} |
||||
|
||||
final int firstSlash = nextSlash(pathBuf, 0, pathLen); |
||||
|
||||
CHECK_BASE: for (ObjectId root : baseTrees) { |
||||
int ptr = 0; |
||||
int end = firstSlash; |
||||
int mode = end != pathLen ? M_TREE : tailMode; |
||||
|
||||
parser.reset(readTree(root)); |
||||
while (!parser.eof()) { |
||||
int cmp = parser.pathCompare(pathBuf, ptr, end, mode); |
||||
|
||||
if (cmp < 0) { |
||||
parser.next(); |
||||
continue; |
||||
} |
||||
|
||||
if (cmp > 0) |
||||
continue CHECK_BASE; |
||||
|
||||
if (end == pathLen) { |
||||
if (parser.getEntryFileMode().getObjectType() == objectType) { |
||||
idBuf.fromRaw(parser.idBuffer(), parser.idOffset()); |
||||
add(idBuf, objectType, pathHash); |
||||
} |
||||
continue CHECK_BASE; |
||||
} |
||||
|
||||
if (!FileMode.TREE.equals(parser.getEntryRawMode())) |
||||
continue CHECK_BASE; |
||||
|
||||
ptr = end + 1; |
||||
end = nextSlash(pathBuf, ptr, pathLen); |
||||
mode = end != pathLen ? M_TREE : tailMode; |
||||
|
||||
idBuf.fromRaw(parser.idBuffer(), parser.idOffset()); |
||||
parser.reset(readTree(idBuf)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private static int modeForType(int typeCode) { |
||||
switch (typeCode) { |
||||
case OBJ_TREE: |
||||
return M_TREE; |
||||
|
||||
case OBJ_BLOB: |
||||
return M_BLOB; |
||||
|
||||
default: |
||||
return 0; |
||||
} |
||||
} |
||||
|
||||
private static int nextSlash(byte[] pathBuf, int ptr, int end) { |
||||
while (ptr < end && pathBuf[ptr] != '/') |
||||
ptr++; |
||||
return ptr; |
||||
} |
||||
|
||||
private void add(AnyObjectId id, int objectType, int pathHash) { |
||||
ObjectToPack obj = new ObjectToPack(id, objectType); |
||||
obj.setEdge(); |
||||
obj.setPathHash(pathHash); |
||||
|
||||
if (edgeObjects.addIfAbsent(obj) == obj) |
||||
progress.update(1); |
||||
} |
||||
|
||||
private byte[] readTree(AnyObjectId id) throws MissingObjectException, |
||||
IncorrectObjectTypeException, IOException { |
||||
TreeWithData tree = treeCache.get(id); |
||||
if (tree != null) |
||||
return tree.buf; |
||||
|
||||
ObjectLoader ldr = reader.open(id, OBJ_TREE); |
||||
byte[] buf = ldr.getCachedBytes(Integer.MAX_VALUE); |
||||
treeCache.add(new TreeWithData(id, buf)); |
||||
return buf; |
||||
} |
||||
|
||||
private static class TreeWithData extends ObjectId { |
||||
final byte[] buf; |
||||
|
||||
TreeWithData(AnyObjectId id, byte[] buf) { |
||||
super(id); |
||||
this.buf = buf; |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,87 @@
|
||||
/* |
||||
* Copyright (C) 2011, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.storage.pack; |
||||
|
||||
class IntSet { |
||||
private int[] set; |
||||
|
||||
private int cnt; |
||||
|
||||
IntSet() { |
||||
set = new int[64]; |
||||
} |
||||
|
||||
boolean add(int key) { |
||||
int high = cnt; |
||||
int low = 0; |
||||
|
||||
if (high == 0) { |
||||
set[0] = key; |
||||
cnt = 1; |
||||
return true; |
||||
} |
||||
|
||||
do { |
||||
int p = (low + high) >>> 1; |
||||
if (key < set[p]) |
||||
high = p; |
||||
else if (key == set[p]) |
||||
return false; |
||||
else |
||||
low = p + 1; |
||||
} while (low < high); |
||||
|
||||
if (cnt == set.length) { |
||||
int[] n = new int[set.length * 2]; |
||||
System.arraycopy(set, 0, n, 0, cnt); |
||||
set = n; |
||||
} |
||||
|
||||
if (low < cnt) |
||||
System.arraycopy(set, low, set, low + 1, cnt - low); |
||||
set[low] = key; |
||||
cnt++; |
||||
return true; |
||||
} |
||||
} |
Loading…
Reference in new issue