Browse Source
The most expensive part of packing a repository for transport to another system is enumerating all of the objects in the repository. Once this gets to the size of the linux-2.6 repository (1.8 million objects), enumeration can take several CPU minutes and costs a lot of temporary working set memory. Teach PackWriter to efficiently reuse an existing "cached pack" by answering a clone request with a thin pack followed by a larger cached pack appended to the end. This requires the repository owner to first construct the cached pack by hand, and record the tip commits inside of $GIT_DIR/objects/info/cached-packs: cd $GIT_DIR root=$(git rev-parse master) tmp=objects/.tmp-$$ names=$(echo $root | git pack-objects --keep-true-parents --revs $tmp) for n in $names; do chmod a-w $tmp-$n.pack $tmp-$n.idx touch objects/pack/pack-$n.keep mv $tmp-$n.pack objects/pack/pack-$n.pack mv $tmp-$n.idx objects/pack/pack-$n.idx done (echo "+ $root"; for n in $names; do echo "P $n"; done; echo) >>objects/info/cached-packs git repack -a -d When a clone request needs to include $root, the corresponding cached pack will be copied as-is, rather than enumerating all of the objects that are reachable from $root. For a linux-2.6 kernel repository that should be about 376 MiB, the above process creates two packs of 368 MiB and 38 MiB[1]. This is a local disk usage increase of ~26 MiB, due to reduced delta compression between the large cached pack and the smaller recent activity pack. The overhead is similar to 1 full copy of the compressed project sources. With this cached pack in hand, JGit daemon completes a clone request in 1m17s less time, but a slightly larger data transfer (+2.39 MiB): Before: remote: Counting objects: 1861830, done remote: Finding sources: 100% (1861830/1861830) remote: Getting sizes: 100% (88243/88243) remote: Compressing objects: 100% (88184/88184) Receiving objects: 100% (1861830/1861830), 376.01 MiB | 19.01 MiB/s, done. remote: Total 1861830 (delta 4706), reused 1851053 (delta 1553844) Resolving deltas: 100% (1564621/1564621), done. real 3m19.005s After: remote: Counting objects: 1601, done remote: Counting objects: 1828460, done remote: Finding sources: 100% (50475/50475) remote: Getting sizes: 100% (18843/18843) remote: Compressing objects: 100% (7585/7585) remote: Total 1861830 (delta 2407), reused 1856197 (delta 37510) Receiving objects: 100% (1861830/1861830), 378.40 MiB | 31.31 MiB/s, done. Resolving deltas: 100% (1559477/1559477), done. real 2m2.938s Repository owners can periodically refresh their cached packs by repacking their repository, folding all newer objects into a larger cached pack. Since repacking is already considered to be a normal Git maintenance activity, this isn't a very big burden. [1] In this test $root was set back about two weeks. Change-Id: Ib87131d5c4b5e8c5cacb0f4fe16ff4ece554734b Signed-off-by: Shawn O. Pearce <spearce@spearce.org>stable-0.11
Shawn O. Pearce
14 years ago
18 changed files with 661 additions and 58 deletions
@ -0,0 +1,126 @@
|
||||
/* |
||||
* Copyright (C) 2011, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.storage.file; |
||||
|
||||
import java.io.File; |
||||
import java.io.FileNotFoundException; |
||||
import java.io.IOException; |
||||
import java.util.Collections; |
||||
import java.util.HashSet; |
||||
import java.util.List; |
||||
import java.util.Set; |
||||
|
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
import org.eclipse.jgit.storage.pack.CachedPack; |
||||
import org.eclipse.jgit.storage.pack.PackOutputStream; |
||||
|
||||
class LocalCachedPack extends CachedPack { |
||||
private final ObjectDirectory odb; |
||||
|
||||
private final Set<ObjectId> tips; |
||||
|
||||
private final String[] packNames; |
||||
|
||||
LocalCachedPack(ObjectDirectory odb, Set<ObjectId> tips, |
||||
List<String> packNames) { |
||||
this.odb = odb; |
||||
|
||||
if (tips.size() == 1) |
||||
this.tips = Collections.singleton(tips.iterator().next()); |
||||
else |
||||
this.tips = Collections.unmodifiableSet(tips); |
||||
|
||||
this.packNames = packNames.toArray(new String[packNames.size()]); |
||||
} |
||||
|
||||
@Override |
||||
public Set<ObjectId> getTips() { |
||||
return tips; |
||||
} |
||||
|
||||
@Override |
||||
public long getObjectCount() throws IOException { |
||||
long cnt = 0; |
||||
for (String packName : packNames) |
||||
cnt += getPackFile(packName).getObjectCount(); |
||||
return cnt; |
||||
} |
||||
|
||||
void copyAsIs(PackOutputStream out, WindowCursor wc) throws IOException { |
||||
for (String packName : packNames) |
||||
getPackFile(packName).copyPackAsIs(out, wc); |
||||
} |
||||
|
||||
@Override |
||||
public <T extends ObjectId> Set<ObjectId> hasObject(Iterable<T> toFind) |
||||
throws IOException { |
||||
PackFile[] packs = new PackFile[packNames.length]; |
||||
for (int i = 0; i < packNames.length; i++) |
||||
packs[i] = getPackFile(packNames[i]); |
||||
|
||||
Set<ObjectId> have = new HashSet<ObjectId>(); |
||||
for (ObjectId id : toFind) { |
||||
for (PackFile pack : packs) { |
||||
if (pack.hasObject(id)) { |
||||
have.add(id); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
return have; |
||||
} |
||||
|
||||
private PackFile getPackFile(String packName) throws FileNotFoundException { |
||||
for (PackFile pack : odb.getPacks()) { |
||||
if (packName.equals(pack.getPackName())) |
||||
return pack; |
||||
} |
||||
throw new FileNotFoundException(getPackFilePath(packName)); |
||||
} |
||||
|
||||
private String getPackFilePath(String packName) { |
||||
final File packDir = new File(odb.getDirectory(), "pack"); |
||||
return new File(packDir, "pack-" + packName + ".pack").getPath(); |
||||
} |
||||
} |
@ -0,0 +1,87 @@
|
||||
/* |
||||
* Copyright (C) 2011, Google Inc. |
||||
* and other copyright owners as documented in the project's IP log. |
||||
* |
||||
* This program and the accompanying materials are made available |
||||
* under the terms of the Eclipse Distribution License v1.0 which |
||||
* accompanies this distribution, is reproduced below, and is |
||||
* available at http://www.eclipse.org/org/documents/edl-v10.php
|
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* Redistribution and use in source and binary forms, with or |
||||
* without modification, are permitted provided that the following |
||||
* conditions are met: |
||||
* |
||||
* - Redistributions of source code must retain the above copyright |
||||
* notice, this list of conditions and the following disclaimer. |
||||
* |
||||
* - Redistributions in binary form must reproduce the above |
||||
* copyright notice, this list of conditions and the following |
||||
* disclaimer in the documentation and/or other materials provided |
||||
* with the distribution. |
||||
* |
||||
* - Neither the name of the Eclipse Foundation, Inc. nor the |
||||
* names of its contributors may be used to endorse or promote |
||||
* products derived from this software without specific prior |
||||
* written permission. |
||||
* |
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
||||
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||||
*/ |
||||
|
||||
package org.eclipse.jgit.storage.pack; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.Set; |
||||
|
||||
import org.eclipse.jgit.lib.ObjectId; |
||||
|
||||
/** Describes a pack file {@link ObjectReuseAsIs} can append onto a stream. */ |
||||
public abstract class CachedPack { |
||||
/** |
||||
* Objects that start this pack. |
||||
* <p> |
||||
* All objects reachable from the tips are contained within this pack. If |
||||
* {@link PackWriter} is going to include everything reachable from all of |
||||
* these objects, this cached pack is eligible to be appended directly onto |
||||
* the output pack stream. |
||||
* |
||||
* @return the tip objects that describe this pack. |
||||
*/ |
||||
public abstract Set<ObjectId> getTips(); |
||||
|
||||
/** |
||||
* Get the number of objects in this pack. |
||||
* |
||||
* @return the total object count for the pack. |
||||
* @throws IOException |
||||
* if the object count cannot be read. |
||||
*/ |
||||
public abstract long getObjectCount() throws IOException; |
||||
|
||||
/** |
||||
* Determine if the pack contains the requested objects. |
||||
* |
||||
* @param <T> |
||||
* any type of ObjectId to search for. |
||||
* @param toFind |
||||
* the objects to search for. |
||||
* @return the objects contained in the pack. |
||||
* @throws IOException |
||||
* the pack cannot be accessed |
||||
*/ |
||||
public abstract <T extends ObjectId> Set<ObjectId> hasObject( |
||||
Iterable<T> toFind) throws IOException; |
||||
} |
Loading…
Reference in new issue