Browse Source

PackWriter: Support reuse of entire packs

The most expensive part of packing a repository for transport to
another system is enumerating all of the objects in the repository.
Once this gets to the size of the linux-2.6 repository (1.8 million
objects), enumeration can take several CPU minutes and costs a lot
of temporary working set memory.

Teach PackWriter to efficiently reuse an existing "cached pack"
by answering a clone request with a thin pack followed by a larger
cached pack appended to the end.  This requires the repository
owner to first construct the cached pack by hand, and record the
tip commits inside of $GIT_DIR/objects/info/cached-packs:

  cd $GIT_DIR
  root=$(git rev-parse master)
  tmp=objects/.tmp-$$
  names=$(echo $root | git pack-objects --keep-true-parents --revs $tmp)
  for n in $names; do
    chmod a-w $tmp-$n.pack $tmp-$n.idx
    touch objects/pack/pack-$n.keep
    mv $tmp-$n.pack objects/pack/pack-$n.pack
    mv $tmp-$n.idx objects/pack/pack-$n.idx
  done

  (echo "+ $root";
   for n in $names; do echo "P $n"; done;
   echo) >>objects/info/cached-packs

  git repack -a -d

When a clone request needs to include $root, the corresponding
cached pack will be copied as-is, rather than enumerating all of
the objects that are reachable from $root.

For a linux-2.6 kernel repository that should be about 376 MiB,
the above process creates two packs of 368 MiB and 38 MiB[1].
This is a local disk usage increase of ~26 MiB, due to reduced
delta compression between the large cached pack and the smaller
recent activity pack.  The overhead is similar to 1 full copy of
the compressed project sources.

With this cached pack in hand, JGit daemon completes a clone request
in 1m17s less time, but a slightly larger data transfer (+2.39 MiB):

  Before:
    remote: Counting objects: 1861830, done
    remote: Finding sources: 100% (1861830/1861830)
    remote: Getting sizes: 100% (88243/88243)
    remote: Compressing objects: 100% (88184/88184)
    Receiving objects: 100% (1861830/1861830), 376.01 MiB | 19.01 MiB/s, done.
    remote: Total 1861830 (delta 4706), reused 1851053 (delta 1553844)
    Resolving deltas: 100% (1564621/1564621), done.

    real  3m19.005s

  After:
    remote: Counting objects: 1601, done
    remote: Counting objects: 1828460, done
    remote: Finding sources: 100% (50475/50475)
    remote: Getting sizes: 100% (18843/18843)
    remote: Compressing objects: 100% (7585/7585)
    remote: Total 1861830 (delta 2407), reused 1856197 (delta 37510)
    Receiving objects: 100% (1861830/1861830), 378.40 MiB | 31.31 MiB/s, done.
    Resolving deltas: 100% (1559477/1559477), done.

    real 2m2.938s

Repository owners can periodically refresh their cached packs by
repacking their repository, folding all newer objects into a larger
cached pack.  Since repacking is already considered to be a normal
Git maintenance activity, this isn't a very big burden.

[1] In this test $root was set back about two weeks.

Change-Id: Ib87131d5c4b5e8c5cacb0f4fe16ff4ece554734b
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.11
Shawn O. Pearce 14 years ago
parent
commit
461b012e95
  1. 1
      org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties
  2. 1
      org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java
  3. 6
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java
  4. 17
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java
  5. 6
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java
  6. 7
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/CachedObjectDirectory.java
  7. 10
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/FileObjectDatabase.java
  8. 126
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java
  9. 106
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java
  10. 17
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
  11. 24
      org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
  12. 13
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java
  13. 87
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java
  14. 30
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java
  15. 4
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackOutputStream.java
  16. 262
      org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java
  17. 1
      org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java
  18. 1
      org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

1
org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties

@ -35,6 +35,7 @@ bareRepositoryNoWorkdirAndIndex=Bare Repository has neither a working tree, nor
blobNotFound=Blob not found: {0}
branchNameInvalid=Branch name {0} is not allowed
blobNotFoundForPath=Blob not found: {0} for path: {1}
cachedPacksPreventsIndexCreation=Using cached packs prevents index creation
cannotBeCombined=Cannot be combined.
cannotCombineTreeFilterWithRevFilter=Cannot combine TreeFilter {0} with RefFilter {1}.
cannotCommitOnARepoWithState=Cannot commit on a repo with state: {0}

1
org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java

@ -95,6 +95,7 @@ public class JGitText extends TranslationBundle {
/***/ public String blobNotFound;
/***/ public String blobNotFoundForPath;
/***/ public String branchNameInvalid;
/***/ public String cachedPacksPreventsIndexCreation;
/***/ public String cannotBeCombined;
/***/ public String cannotCombineTreeFilterWithRevFilter;
/***/ public String cannotCommitOnARepoWithState;

6
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java

@ -46,11 +46,12 @@
package org.eclipse.jgit.storage.file;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.eclipse.jgit.storage.pack.PackOutputStream;
/**
* A {@link ByteWindow} with an underlying byte array for storage.
*/
@ -81,7 +82,8 @@ final class ByteArrayWindow extends ByteWindow {
out.update(array, (int) (pos - start), cnt);
}
void write(OutputStream out, long pos, int cnt) throws IOException {
@Override
void write(PackOutputStream out, long pos, int cnt) throws IOException {
out.write(array, (int) (pos - start), cnt);
}

17
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java

@ -45,10 +45,13 @@
package org.eclipse.jgit.storage.file;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.eclipse.jgit.storage.pack.PackOutputStream;
/**
* A window for accessing git packs using a {@link ByteBuffer} for storage.
*
@ -71,6 +74,20 @@ final class ByteBufferWindow extends ByteWindow {
return n;
}
@Override
void write(PackOutputStream out, long pos, int cnt) throws IOException {
final ByteBuffer s = buffer.slice();
s.position((int) (pos - start));
while (0 < cnt) {
byte[] buf = out.getCopyBuffer();
int n = Math.min(cnt, buf.length);
s.get(buf, 0, n);
out.write(buf, 0, n);
cnt -= n;
}
}
@Override
protected int setInput(final int pos, final Inflater inf)
throws DataFormatException {

6
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java

@ -44,9 +44,12 @@
package org.eclipse.jgit.storage.file;
import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.eclipse.jgit.storage.pack.PackOutputStream;
/**
* A window of data currently stored within a cache.
* <p>
@ -117,6 +120,9 @@ abstract class ByteWindow {
*/
protected abstract int copy(int pos, byte[] dstbuf, int dstoff, int cnt);
abstract void write(PackOutputStream out, long pos, int cnt)
throws IOException;
final int setInput(long pos, Inflater inf) throws DataFormatException {
return setInput((int) (pos - start), inf);
}

7
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/CachedObjectDirectory.java

@ -46,6 +46,7 @@ package org.eclipse.jgit.storage.file;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
@ -56,6 +57,7 @@ import org.eclipse.jgit.lib.ObjectDatabase;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectIdSubclassMap;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.storage.pack.CachedPack;
import org.eclipse.jgit.storage.pack.ObjectToPack;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.FS;
@ -138,6 +140,11 @@ class CachedObjectDirectory extends FileObjectDatabase {
return wrapped.getFS();
}
@Override
Collection<? extends CachedPack> getCachedPacks() throws IOException {
return wrapped.getCachedPacks();
}
@Override
AlternateHandle[] myAlternates() {
if (alts == null) {

10
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/FileObjectDatabase.java

@ -45,6 +45,7 @@ package org.eclipse.jgit.storage.file;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
@ -54,6 +55,7 @@ import org.eclipse.jgit.lib.ObjectDatabase;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.storage.pack.CachedPack;
import org.eclipse.jgit.storage.pack.ObjectToPack;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.FS;
@ -258,6 +260,9 @@ abstract class FileObjectDatabase extends ObjectDatabase {
abstract File getDirectory();
abstract Collection<? extends CachedPack> getCachedPacks()
throws IOException;
abstract AlternateHandle[] myAlternates();
abstract boolean tryAgain1();
@ -292,6 +297,11 @@ abstract class FileObjectDatabase extends ObjectDatabase {
this.db = db;
}
@SuppressWarnings("unchecked")
Collection<CachedPack> getCachedPacks() throws IOException {
return (Collection<CachedPack>) db.getCachedPacks();
}
void close() {
db.close();
}

126
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LocalCachedPack.java

@ -0,0 +1,126 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.file;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.storage.pack.CachedPack;
import org.eclipse.jgit.storage.pack.PackOutputStream;
class LocalCachedPack extends CachedPack {
private final ObjectDirectory odb;
private final Set<ObjectId> tips;
private final String[] packNames;
LocalCachedPack(ObjectDirectory odb, Set<ObjectId> tips,
List<String> packNames) {
this.odb = odb;
if (tips.size() == 1)
this.tips = Collections.singleton(tips.iterator().next());
else
this.tips = Collections.unmodifiableSet(tips);
this.packNames = packNames.toArray(new String[packNames.size()]);
}
@Override
public Set<ObjectId> getTips() {
return tips;
}
@Override
public long getObjectCount() throws IOException {
long cnt = 0;
for (String packName : packNames)
cnt += getPackFile(packName).getObjectCount();
return cnt;
}
void copyAsIs(PackOutputStream out, WindowCursor wc) throws IOException {
for (String packName : packNames)
getPackFile(packName).copyPackAsIs(out, wc);
}
@Override
public <T extends ObjectId> Set<ObjectId> hasObject(Iterable<T> toFind)
throws IOException {
PackFile[] packs = new PackFile[packNames.length];
for (int i = 0; i < packNames.length; i++)
packs[i] = getPackFile(packNames[i]);
Set<ObjectId> have = new HashSet<ObjectId>();
for (ObjectId id : toFind) {
for (PackFile pack : packs) {
if (pack.hasObject(id)) {
have.add(id);
break;
}
}
}
return have;
}
private PackFile getPackFile(String packName) throws FileNotFoundException {
for (PackFile pack : odb.getPacks()) {
if (packName.equals(pack.getPackName()))
return pack;
}
throw new FileNotFoundException(getPackFilePath(packName));
}
private String getPackFilePath(String packName) {
final File packDir = new File(odb.getDirectory(), "pack");
return new File(packDir, "pack-" + packName + ".pack").getPath();
}
}

106
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java

@ -72,10 +72,13 @@ import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.RepositoryCache;
import org.eclipse.jgit.lib.RepositoryCache.FileKey;
import org.eclipse.jgit.storage.pack.CachedPack;
import org.eclipse.jgit.storage.pack.ObjectToPack;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.FileUtils;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.RawParseUtils;
/**
* Traditional file system based {@link ObjectDatabase}.
@ -112,8 +115,12 @@ public class ObjectDirectory extends FileObjectDatabase {
private final File alternatesFile;
private final File cachedPacksFile;
private final AtomicReference<PackList> packList;
private final AtomicReference<CachedPackList> cachedPacks;
private final FS fs;
private final AtomicReference<AlternateHandle[]> alternates;
@ -142,7 +149,9 @@ public class ObjectDirectory extends FileObjectDatabase {
infoDirectory = new File(objects, "info");
packDirectory = new File(objects, "pack");
alternatesFile = new File(infoDirectory, "alternates");
cachedPacksFile = new File(infoDirectory, "cached-packs");
packList = new AtomicReference<PackList>(NO_PACKS);
cachedPacks = new AtomicReference<CachedPackList>();
unpackedObjectCache = new UnpackedObjectCache();
this.fs = fs;
@ -226,6 +235,83 @@ public class ObjectDirectory extends FileObjectDatabase {
return Collections.unmodifiableCollection(Arrays.asList(packs));
}
@Override
Collection<? extends CachedPack> getCachedPacks() throws IOException {
CachedPackList list = cachedPacks.get();
if (list == null || list.snapshot.isModified(cachedPacksFile))
list = scanCachedPacks(list);
Collection<CachedPack> result = list.getCachedPacks();
boolean resultIsCopy = false;
for (AlternateHandle h : myAlternates()) {
Collection<CachedPack> altPacks = h.getCachedPacks();
if (altPacks.isEmpty())
continue;
if (result.isEmpty()) {
result = altPacks;
continue;
}
if (!resultIsCopy) {
result = new ArrayList<CachedPack>(result);
resultIsCopy = true;
}
result.addAll(altPacks);
}
return result;
}
private CachedPackList scanCachedPacks(CachedPackList old)
throws IOException {
FileSnapshot s = FileSnapshot.save(cachedPacksFile);
byte[] buf;
try {
buf = IO.readFully(cachedPacksFile);
} catch (FileNotFoundException e) {
buf = new byte[0];
}
if (old != null && old.snapshot.equals(s)
&& Arrays.equals(old.raw, buf)) {
old.snapshot.setClean(s);
return old;
}
ArrayList<LocalCachedPack> list = new ArrayList<LocalCachedPack>(4);
Set<ObjectId> tips = new HashSet<ObjectId>();
int ptr = 0;
while (ptr < buf.length) {
if (buf[ptr] == '#' || buf[ptr] == '\n') {
ptr = RawParseUtils.nextLF(buf, ptr);
continue;
}
if (buf[ptr] == '+') {
tips.add(ObjectId.fromString(buf, ptr + 2));
ptr = RawParseUtils.nextLF(buf, ptr + 2);
continue;
}
List<String> names = new ArrayList<String>(4);
while (ptr < buf.length && buf[ptr] == 'P') {
int end = RawParseUtils.nextLF(buf, ptr);
if (buf[end - 1] == '\n')
end--;
names.add(RawParseUtils.decode(buf, ptr + 2, end));
ptr = RawParseUtils.nextLF(buf, end);
}
if (!tips.isEmpty() && !names.isEmpty()) {
list.add(new LocalCachedPack(this, tips, names));
tips = new HashSet<ObjectId>();
}
}
list.trimToSize();
return new CachedPackList(s, Collections.unmodifiableList(list), buf);
}
/**
* Add a single existing pack to the list of available pack files.
*
@ -760,6 +846,26 @@ public class ObjectDirectory extends FileObjectDatabase {
}
}
private static final class CachedPackList {
final FileSnapshot snapshot;
final Collection<LocalCachedPack> packs;
final byte[] raw;
CachedPackList(FileSnapshot sn, List<LocalCachedPack> list, byte[] buf) {
snapshot = sn;
packs = list;
raw = buf;
}
@SuppressWarnings("unchecked")
Collection<CachedPack> getCachedPacks() {
Collection p = packs;
return p;
}
}
@Override
public ObjectDatabase newCachedDatabase() {
return newCachedFileObjectDatabase();

17
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java

@ -175,6 +175,16 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
return packFile;
}
/** @return name extracted from {@code pack-*.pack} pattern. */
public String getPackName() {
String name = getPackFile().getName();
if (name.startsWith("pack-"))
name = name.substring("pack-".length());
if (name.endsWith(".pack"))
name = name.substring(0, name.length() - ".pack".length());
return name;
}
/**
* Determine if an object is contained within the pack file.
* <p>
@ -295,6 +305,13 @@ public class PackFile implements Iterable<PackIndex.MutableEntry> {
return dstbuf;
}
void copyPackAsIs(PackOutputStream out, WindowCursor curs)
throws IOException {
// Pin the first window, this ensures the length is accurate.
curs.pin(this, 0);
curs.copyPackAsIs(this, out, 12, length - (12 + 20));
}
final void copyAsIs(PackOutputStream out, LocalObjectToPack src,
WindowCursor curs) throws IOException,
StoredObjectRepresentationNotAvailableException {

24
org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java

@ -64,6 +64,7 @@ import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.storage.pack.CachedPack;
import org.eclipse.jgit.storage.pack.ObjectReuseAsIs;
import org.eclipse.jgit.storage.pack.ObjectToPack;
import org.eclipse.jgit.storage.pack.PackOutputStream;
@ -154,6 +155,11 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
out.writeObject(otp);
}
@SuppressWarnings("unchecked")
public Collection<CachedPack> getCachedPacks() throws IOException {
return (Collection<CachedPack>) db.getCachedPacks();
}
/**
* Copy bytes from the window to a caller supplied buffer.
*
@ -190,6 +196,24 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
return cnt - need;
}
public void copyPackAsIs(PackOutputStream out, CachedPack pack)
throws IOException {
((LocalCachedPack) pack).copyAsIs(out, this);
}
void copyPackAsIs(final PackFile pack, final PackOutputStream out,
long position, long cnt) throws IOException {
while (0 < cnt) {
pin(pack, position);
int ptr = (int) (position - window.start);
int n = (int) Math.min(window.size() - ptr, cnt);
window.write(out, position, n);
position += n;
cnt -= n;
}
}
/**
* Inflate a region of the pack starting at {@code position}.
*

13
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BaseSearch.java

@ -47,6 +47,7 @@ import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
@ -73,7 +74,9 @@ class BaseSearch {
private final ObjectId[] baseTrees;
private final ObjectIdSubclassMap<ObjectToPack> edgeObjects;
private final ObjectIdSubclassMap<ObjectToPack> objectsMap;
private final List<ObjectToPack> edgeObjects;
private final IntSet alreadyProcessed;
@ -84,10 +87,12 @@ class BaseSearch {
private final MutableObjectId idBuf;
BaseSearch(ProgressMonitor countingMonitor, Set<RevTree> bases,
ObjectIdSubclassMap<ObjectToPack> edges, ObjectReader or) {
ObjectIdSubclassMap<ObjectToPack> objects,
List<ObjectToPack> edges, ObjectReader or) {
progress = countingMonitor;
reader = or;
baseTrees = bases.toArray(new ObjectId[bases.size()]);
objectsMap = objects;
edgeObjects = edges;
alreadyProcessed = new IntSet();
@ -175,8 +180,10 @@ class BaseSearch {
obj.setEdge();
obj.setPathHash(pathHash);
if (edgeObjects.addIfAbsent(obj) == obj)
if (objectsMap.addIfAbsent(obj) == obj) {
edgeObjects.add(obj);
progress.update(1);
}
}
private byte[] readTree(AnyObjectId id) throws MissingObjectException,

87
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/CachedPack.java

@ -0,0 +1,87 @@
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.pack;
import java.io.IOException;
import java.util.Set;
import org.eclipse.jgit.lib.ObjectId;
/** Describes a pack file {@link ObjectReuseAsIs} can append onto a stream. */
public abstract class CachedPack {
/**
* Objects that start this pack.
* <p>
* All objects reachable from the tips are contained within this pack. If
* {@link PackWriter} is going to include everything reachable from all of
* these objects, this cached pack is eligible to be appended directly onto
* the output pack stream.
*
* @return the tip objects that describe this pack.
*/
public abstract Set<ObjectId> getTips();
/**
* Get the number of objects in this pack.
*
* @return the total object count for the pack.
* @throws IOException
* if the object count cannot be read.
*/
public abstract long getObjectCount() throws IOException;
/**
* Determine if the pack contains the requested objects.
*
* @param <T>
* any type of ObjectId to search for.
* @param toFind
* the objects to search for.
* @return the objects contained in the pack.
* @throws IOException
* the pack cannot be accessed
*/
public abstract <T extends ObjectId> Set<ObjectId> hasObject(
Iterable<T> toFind) throws IOException;
}

30
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/ObjectReuseAsIs.java

@ -44,6 +44,7 @@
package org.eclipse.jgit.storage.pack;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.eclipse.jgit.errors.MissingObjectException;
@ -180,4 +181,33 @@ public interface ObjectReuseAsIs {
*/
public void copyObjectAsIs(PackOutputStream out, ObjectToPack otp)
throws IOException, StoredObjectRepresentationNotAvailableException;
/**
* Obtain the available cached packs.
* <p>
* A cached pack has known starting points and may be sent entirely as-is,
* with almost no effort on the sender's part.
*
* @return the available cached packs.
* @throws IOException
* the cached packs cannot be listed from the repository.
* Callers may choose to ignore this and continue as-if there
* were no cached packs.
*/
public Collection<CachedPack> getCachedPacks() throws IOException;
/**
* Append an entire pack's contents onto the output stream.
* <p>
* The entire pack, excluding its header and trailing footer is sent.
*
* @param out
* stream to append the pack onto.
* @param pack
* the cached pack to send.
* @throws IOException
* the pack cannot be read, or stream did not accept a write.
*/
public abstract void copyPackAsIs(PackOutputStream out, CachedPack pack)
throws IOException;
}

4
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackOutputStream.java

@ -135,10 +135,10 @@ public final class PackOutputStream extends OutputStream {
out.flush();
}
void writeFileHeader(int version, int objectCount) throws IOException {
void writeFileHeader(int version, long objectCount) throws IOException {
System.arraycopy(Constants.PACK_SIGNATURE, 0, headerBuffer, 0, 4);
NB.encodeInt32(headerBuffer, 4, version);
NB.encodeInt32(headerBuffer, 8, objectCount);
NB.encodeInt32(headerBuffer, 8, (int) objectCount);
write(headerBuffer, 0, 12);
}

262
org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/PackWriter.java

@ -56,9 +56,11 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
@ -90,6 +92,7 @@ import org.eclipse.jgit.revwalk.AsyncRevObjectQueue;
import org.eclipse.jgit.revwalk.ObjectWalk;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevFlagSet;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevSort;
import org.eclipse.jgit.revwalk.RevTree;
@ -145,7 +148,9 @@ public class PackWriter {
private final ObjectIdSubclassMap<ObjectToPack> objectsMap = new ObjectIdSubclassMap<ObjectToPack>();
// edge objects for thin packs
private final ObjectIdSubclassMap<ObjectToPack> edgeObjects = new ObjectIdSubclassMap<ObjectToPack>();
private List<ObjectToPack> edgeObjects = new ArrayList<ObjectToPack>();
private List<CachedPack> cachedPacks = new ArrayList<CachedPack>(2);
private Deflater myDeflater;
@ -168,6 +173,8 @@ public class PackWriter {
private boolean thin;
private boolean useCachedPacks;
private boolean ignoreMissingUninteresting = true;
/**
@ -281,6 +288,24 @@ public class PackWriter {
thin = packthin;
}
/** @return true to reuse cached packs. If true index creation isn't available. */
public boolean isUseCachedPacks() {
return useCachedPacks;
}
/**
* @param useCached
* if set to true and a cached pack is present, it will be
* appended onto the end of a thin-pack, reducing the amount of
* working set space and CPU used by PackWriter. Enabling this
* feature prevents PackWriter from creating an index for the
* newly created pack, so its only suitable for writing to a
* network client, where the client will make the index.
*/
public void setUseCachedPacks(boolean useCached) {
useCachedPacks = useCached;
}
/**
* @return true to ignore objects that are uninteresting and also not found
* on local disk; false to throw a {@link MissingObjectException}
@ -308,8 +333,8 @@ public class PackWriter {
*
* @return number of objects in pack.
*/
public int getObjectsNumber() {
return objectsMap.size();
public long getObjectsNumber() {
return stats.totalObjects;
}
/**
@ -323,26 +348,15 @@ public class PackWriter {
* a caller side. Iterator must return each id of object to write exactly
* once.
* </p>
* <p>
* When iterator returns object that has {@link RevFlag#UNINTERESTING} flag,
* this object won't be included in an output pack. Instead, it is recorded
* as edge-object (known to remote repository) for thin-pack. In such a case
* writer may pack objects with delta base object not within set of objects
* to pack, but belonging to party repository - those marked with
* {@link RevFlag#UNINTERESTING} flag. This type of pack is used only for
* transport.
* </p>
*
* @param objectsSource
* iterator of object to store in a pack; order of objects within
* each type is important, ordering by type is not needed;
* allowed types for objects are {@link Constants#OBJ_COMMIT},
* {@link Constants#OBJ_TREE}, {@link Constants#OBJ_BLOB} and
* {@link Constants#OBJ_TAG}; objects returned by iterator may
* be later reused by caller as object id and type are internally
* copied in each iteration; if object returned by iterator has
* {@link RevFlag#UNINTERESTING} flag set, it won't be included
* in a pack, but is considered as edge-object for thin-pack.
* {@link Constants#OBJ_TAG}; objects returned by iterator may be
* later reused by caller as object id and type are internally
* copied in each iteration.
* @throws IOException
* when some I/O problem occur during reading objects.
*/
@ -392,9 +406,21 @@ public class PackWriter {
* @param id
* the object to test the existence of.
* @return true if the object will appear in the output pack file.
* @throws IOException
* a cached pack cannot be examined.
*/
public boolean willInclude(final AnyObjectId id) {
return get(id) != null;
public boolean willInclude(final AnyObjectId id) throws IOException {
ObjectToPack obj = objectsMap.get(id);
if (obj != null && !obj.isEdge())
return true;
Set<ObjectId> toFind = Collections.singleton(id.toObjectId());
for (CachedPack pack : cachedPacks) {
if (pack.hasObject(toFind).contains(id))
return true;
}
return false;
}
/**
@ -405,7 +431,8 @@ public class PackWriter {
* @return the object we are packing, or null.
*/
public ObjectToPack get(AnyObjectId id) {
return objectsMap.get(id);
ObjectToPack obj = objectsMap.get(id);
return obj != null && !obj.isEdge() ? obj : null;
}
/**
@ -439,6 +466,9 @@ public class PackWriter {
* the index data could not be written to the supplied stream.
*/
public void writeIndex(final OutputStream indexStream) throws IOException {
if (!cachedPacks.isEmpty())
throw new IOException(JGitText.get().cachedPacksPreventsIndexCreation);
final List<ObjectToPack> list = sortByName();
final PackIndexWriter iw;
int indexVersion = config.getIndexVersion();
@ -451,7 +481,10 @@ public class PackWriter {
private List<ObjectToPack> sortByName() {
if (sortedByName == null) {
sortedByName = new ArrayList<ObjectToPack>(objectsMap.size());
int cnt = 0;
for (List<ObjectToPack> list : objectsLists)
cnt += list.size();
sortedByName = new ArrayList<ObjectToPack>(cnt);
for (List<ObjectToPack> list : objectsLists) {
for (ObjectToPack otp : list)
sortedByName.add(otp);
@ -501,12 +534,21 @@ public class PackWriter {
final PackOutputStream out = new PackOutputStream(writeMonitor,
packStream, this);
int objCnt = getObjectsNumber();
long objCnt = 0;
for (List<ObjectToPack> list : objectsLists)
objCnt += list.size();
for (CachedPack pack : cachedPacks)
objCnt += pack.getObjectCount();
stats.totalObjects = objCnt;
writeMonitor.beginTask(JGitText.get().writingObjects, objCnt);
writeMonitor.beginTask(JGitText.get().writingObjects, (int) objCnt);
out.writeFileHeader(PACK_VERSION_GENERATED, objCnt);
out.flush();
writeObjects(out);
for (CachedPack pack : cachedPacks) {
stats.reusedObjects += pack.getObjectCount();
reuseSupport.copyPackAsIs(out, pack);
}
writeChecksum(out);
reader.release();
@ -532,7 +574,10 @@ public class PackWriter {
}
private void searchForReuse(ProgressMonitor monitor) throws IOException {
monitor.beginTask(JGitText.get().searchForReuse, getObjectsNumber());
int cnt = 0;
for (List<ObjectToPack> list : objectsLists)
cnt += list.size();
monitor.beginTask(JGitText.get().searchForReuse, cnt);
for (List<ObjectToPack> list : objectsLists)
reuseSupport.selectObjectRepresentation(this, monitor, list);
monitor.endTask();
@ -591,8 +636,8 @@ public class PackWriter {
continue;
}
otp = edgeObjects.get(notFound.getObjectId());
if (otp != null) {
otp = objectsMap.get(notFound.getObjectId());
if (otp != null && otp.isEdge()) {
otp.setDoNotDelta(true);
continue;
}
@ -601,11 +646,8 @@ public class PackWriter {
}
ObjectToPack otp = sizeQueue.getCurrent();
if (otp == null) {
if (otp == null)
otp = objectsMap.get(sizeQueue.getObjectId());
if (otp == null)
otp = edgeObjects.get(sizeQueue.getObjectId());
}
long sz = sizeQueue.getSize();
if (limit <= sz || Integer.MAX_VALUE <= sz)
@ -1014,16 +1056,38 @@ public class PackWriter {
all.addAll(want);
all.addAll(have);
final Map<ObjectId, CachedPack> tipToPack = new HashMap<ObjectId, CachedPack>();
final ObjectWalk walker = new ObjectWalk(reader);
final RevFlag inCachedPack = walker.newFlag("inCachedPack");
final RevFlag include = walker.newFlag("include");
final RevFlagSet keepOnRestart = new RevFlagSet();
keepOnRestart.add(inCachedPack);
walker.setRetainBody(false);
if (have.isEmpty())
walker.carry(include);
int haveEst = have.size();
if (have.isEmpty()) {
walker.sort(RevSort.COMMIT_TIME_DESC);
else {
if (useCachedPacks && reuseSupport != null) {
for (CachedPack pack : reuseSupport.getCachedPacks()) {
for (ObjectId id : pack.getTips()) {
tipToPack.put(id, pack);
all.add(id);
}
}
haveEst += tipToPack.size();
}
} else {
walker.sort(RevSort.TOPO);
if (thin)
walker.sort(RevSort.BOUNDARY, true);
}
List<RevObject> wantObjs = new ArrayList<RevObject>(want.size());
List<RevObject> haveObjs = new ArrayList<RevObject>(haveEst);
AsyncRevObjectQueue q = walker.parseAny(all, true);
try {
for (;;) {
@ -1031,10 +1095,18 @@ public class PackWriter {
RevObject o = q.next();
if (o == null)
break;
if (have.contains(o))
if (tipToPack.containsKey(o))
o.add(inCachedPack);
if (have.contains(o)) {
haveObjs.add(o);
walker.markUninteresting(o);
else
} else if (want.contains(o)) {
o.add(include);
wantObjs.add(o);
walker.markStart(o);
}
} catch (MissingObjectException e) {
if (ignoreMissingUninteresting
&& have.contains(e.getObjectId()))
@ -1046,21 +1118,45 @@ public class PackWriter {
q.release();
}
int typesToPrune = 0;
final int maxBases = config.getDeltaSearchWindowSize();
Set<RevTree> baseTrees = new HashSet<RevTree>();
RevObject o;
while ((o = walker.next()) != null) {
if (o.has(inCachedPack)) {
CachedPack pack = tipToPack.get(o);
if (includesAllTips(pack, include, walker)) {
useCachedPack(walker, keepOnRestart, //
wantObjs, haveObjs, pack);
countingMonitor.endTask();
countingMonitor.beginTask(JGitText.get().countingObjects,
ProgressMonitor.UNKNOWN);
continue;
}
}
if (o.has(RevFlag.UNINTERESTING)) {
if (baseTrees.size() <= maxBases)
baseTrees.add(((RevCommit) o).getTree());
continue;
}
addObject(o, 0);
countingMonitor.update(1);
}
for (CachedPack p : cachedPacks) {
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_COMMIT])) {
if (baseTrees.size() <= maxBases)
baseTrees.add(walker.lookupCommit(d).getTree());
objectsMap.get(d).setEdge();
typesToPrune |= 1 << Constants.OBJ_COMMIT;
}
}
BaseSearch bases = new BaseSearch(countingMonitor, baseTrees, //
edgeObjects, reader);
objectsMap, edgeObjects, reader);
while ((o = walker.nextObject()) != null) {
if (o.has(RevFlag.UNINTERESTING))
continue;
@ -1073,9 +1169,87 @@ public class PackWriter {
addObject(o, pathHash);
countingMonitor.update(1);
}
for (CachedPack p : cachedPacks) {
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TREE])) {
objectsMap.get(d).setEdge();
typesToPrune |= 1 << Constants.OBJ_TREE;
}
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_BLOB])) {
objectsMap.get(d).setEdge();
typesToPrune |= 1 << Constants.OBJ_BLOB;
}
for (ObjectId d : p.hasObject(objectsLists[Constants.OBJ_TAG])) {
objectsMap.get(d).setEdge();
typesToPrune |= 1 << Constants.OBJ_TAG;
}
}
if (typesToPrune != 0) {
pruneObjectList(typesToPrune, Constants.OBJ_COMMIT);
pruneObjectList(typesToPrune, Constants.OBJ_TREE);
pruneObjectList(typesToPrune, Constants.OBJ_BLOB);
pruneObjectList(typesToPrune, Constants.OBJ_TAG);
}
for (CachedPack pack : cachedPacks)
countingMonitor.update((int) pack.getObjectCount());
countingMonitor.endTask();
}
private void pruneObjectList(int typesToPrune, int typeCode) {
if ((typesToPrune & (1 << typeCode)) == 0)
return;
final List<ObjectToPack> list = objectsLists[typeCode];
final int size = list.size();
int src = 0;
int dst = 0;
for (; src < size; src++) {
ObjectToPack obj = list.get(src);
if (obj.isEdge())
continue;
if (dst != src)
list.set(dst, obj);
dst++;
}
while (dst < list.size())
list.remove(dst);
}
private void useCachedPack(ObjectWalk walker, RevFlagSet keepOnRestart,
List<RevObject> wantObj, List<RevObject> baseObj, CachedPack pack)
throws MissingObjectException, IncorrectObjectTypeException,
IOException {
cachedPacks.add(pack);
for (ObjectId id : pack.getTips())
baseObj.add(walker.lookupOrNull(id));
objectsMap.clear();
objectsLists[Constants.OBJ_COMMIT] = new ArrayList<ObjectToPack>();
setThin(true);
walker.resetRetain(keepOnRestart);
walker.sort(RevSort.TOPO);
walker.sort(RevSort.BOUNDARY, true);
for (RevObject id : wantObj)
walker.markStart(id);
for (RevObject id : baseObj)
walker.markUninteresting(id);
}
private static boolean includesAllTips(CachedPack pack, RevFlag include,
ObjectWalk walker) {
for (ObjectId id : pack.getTips()) {
if (!walker.lookupOrNull(id).has(include))
return false;
}
return true;
}
/**
* Include one object to the output file.
* <p>
@ -1090,20 +1264,6 @@ public class PackWriter {
*/
public void addObject(final RevObject object)
throws IncorrectObjectTypeException {
if (object.has(RevFlag.UNINTERESTING)) {
switch (object.getType()) {
case Constants.OBJ_TREE:
case Constants.OBJ_BLOB:
ObjectToPack otp = new ObjectToPack(object);
otp.setPathHash(0);
otp.setEdge();
edgeObjects.addIfAbsent(otp);
thin = true;
break;
}
return;
}
addObject(object, 0);
}
@ -1162,11 +1322,11 @@ public class PackWriter {
if (nFmt == PACK_DELTA && reuseDeltas) {
ObjectId baseId = next.getDeltaBase();
ObjectToPack ptr = objectsMap.get(baseId);
if (ptr != null) {
if (ptr != null && !ptr.isEdge()) {
otp.setDeltaBase(ptr);
otp.setReuseAsIs();
otp.setWeight(nWeight);
} else if (thin && edgeObjects.contains(baseId)) {
} else if (thin && ptr != null && ptr.isEdge()) {
otp.setDeltaBase(baseId);
otp.setReuseAsIs();
otp.setWeight(nWeight);

1
org.eclipse.jgit/src/org/eclipse/jgit/transport/BasePackPushConnection.java

@ -259,6 +259,7 @@ public abstract class BasePackPushConnection extends BasePackConnection implemen
newObjects.add(r.getNewObjectId());
}
writer.setUseCachedPacks(true);
writer.setThin(thinPack);
writer.setDeltaBaseAsOffset(capableOfsDelta);
writer.preparePack(monitor, newObjects, remoteObjects);

1
org.eclipse.jgit/src/org/eclipse/jgit/transport/UploadPack.java

@ -636,6 +636,7 @@ public class UploadPack {
cfg = new PackConfig(db);
final PackWriter pw = new PackWriter(cfg, walk.getObjectReader());
try {
pw.setUseCachedPacks(true);
pw.setDeltaBaseAsOffset(options.contains(OPTION_OFS_DELTA));
pw.setThin(options.contains(OPTION_THIN_PACK));
pw.preparePack(pm, wantAll, commonBase);

Loading…
Cancel
Save