From 7ba31474a334cf8f79ecd0c85598550f415b70c5 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 16 Sep 2010 17:02:27 -0700 Subject: [PATCH] Increase core.streamFileThreshold default to 50 MiB Projects like org.eclipse.mdt contain large XML files about 6 MiB in size. So does the Android project platform/frameworks/base. Doing a clone of either project with JGit takes forever to checkout the files into the working directory, because delta decompression tends to be very expensive as we need to constantly reposition the base stream for each copy instruction. This can be made worse by a very bad ordering of offsets, possibly due to an XML editor that doesn't preserve the order of elements in the file very well. Increasing the threshold to the same limit PackWriter uses when doing delta compression (50 MiB) permits a default configured JGit to decompress these XML file objects using the faster random-access arrays, rather than re-seeking through an inflate stream, significantly reducing checkout time after a clone. Since this new limit may be dangerously close to the JVM maximum heap size, every allocation attempt is now wrapped in a try/catch so that JGit can degrade by switching to the large object stream mode when the allocation is refused. It will run slower, but the operation will still complete. The large stream mode will run very well for big objects that aren't delta compressed, and is acceptable for delta compressed objects that are using only forward referencing copy instructions. Copies using prior offsets are still going to be horrible, and there is nothing we can do about it except increase core.streamFileThreshold. We might in the future want to consider changing the way the delta generators work in JGit and native C Git to avoid prior offsets once an object reaches a certain size, even if that causes the delta instruction stream to be slightly larger. Unfortunately native C Git won't want to do that until its also able to stream objects rather than malloc them as contiguous blocks. Change-Id: Ief7a3896afce15073e80d3691bed90c6a3897307 Signed-off-by: Shawn O. Pearce Signed-off-by: Chris Aniszczyk --- .../jgit/storage/file/PackFileTest.java | 70 +++------------ .../jgit/storage/file/UnpackedObjectTest.java | 18 ++-- .../org/eclipse/jgit/JGitText.properties | 1 + .../src/org/eclipse/jgit/JGitText.java | 1 + .../org/eclipse/jgit/lib/ObjectLoader.java | 8 -- .../eclipse/jgit/storage/file/PackFile.java | 86 ++++++++++++------- .../jgit/storage/file/WindowCacheConfig.java | 4 +- .../jgit/storage/pack/BinaryDelta.java | 26 +++++- 8 files changed, 105 insertions(+), 109 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java index 9a4bc0b1a..74c9f0655 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java @@ -70,6 +70,8 @@ import org.eclipse.jgit.util.NB; import org.eclipse.jgit.util.TemporaryBuffer; public class PackFileTest extends LocalDiskRepositoryTestCase { + private int streamThreshold = 16 * 1024; + private TestRng rng; private FileRepository repo; @@ -80,6 +82,11 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { protected void setUp() throws Exception { super.setUp(); + + WindowCacheConfig cfg = new WindowCacheConfig(); + cfg.setStreamFileThreshold(streamThreshold); + WindowCache.reconfigure(cfg); + rng = new TestRng(getName()); repo = createBareRepository(); tr = new TestRepository(repo); @@ -89,6 +96,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { protected void tearDown() throws Exception { if (wc != null) wc.release(); + WindowCache.reconfigure(new WindowCacheConfig()); super.tearDown(); } @@ -120,7 +128,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { public void testWhole_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); RevBlob id = tr.blob(data); tr.branch("master").commit().add("A", id).create(); tr.packAndPrune(); @@ -213,7 +221,7 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { public void testDelta_LargeObjectChain() throws Exception { ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); - byte[] data0 = new byte[ObjectLoader.STREAM_THRESHOLD + 5]; + byte[] data0 = new byte[streamThreshold + 5]; Arrays.fill(data0, (byte) 0xf3); ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); @@ -277,64 +285,6 @@ public class PackFileTest extends LocalDiskRepositoryTestCase { in.close(); } - public void testDelta_LargeInstructionStream() throws Exception { - ObjectInserter.Formatter fmt = new ObjectInserter.Formatter(); - byte[] data0 = new byte[32]; - Arrays.fill(data0, (byte) 0xf3); - ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0); - - byte[] data3 = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); - ByteArrayOutputStream tmp = new ByteArrayOutputStream(); - DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length); - de.insert(data3, 0, data3.length); - byte[] delta3 = tmp.toByteArray(); - assertTrue(delta3.length > ObjectLoader.STREAM_THRESHOLD); - - TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap( - ObjectLoader.STREAM_THRESHOLD + 1024); - packHeader(pack, 2); - objectHeader(pack, Constants.OBJ_BLOB, data0.length); - deflate(pack, data0); - - ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3); - objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length); - id0.copyRawTo(pack); - deflate(pack, delta3); - - digest(pack); - final byte[] raw = pack.toByteArray(); - IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw)); - ip.setFixThin(true); - ip.index(NullProgressMonitor.INSTANCE); - ip.renameAndOpenPack(); - - assertTrue("has blob", wc.has(id3)); - - ObjectLoader ol = wc.open(id3); - assertNotNull("created loader", ol); - assertEquals(Constants.OBJ_BLOB, ol.getType()); - assertEquals(data3.length, ol.getSize()); - assertTrue("is large", ol.isLarge()); - try { - ol.getCachedBytes(); - fail("Should have thrown LargeObjectException"); - } catch (LargeObjectException tooBig) { - assertEquals(MessageFormat.format( - JGitText.get().largeObjectException, id3.name()), tooBig - .getMessage()); - } - - ObjectStream in = ol.openStream(); - assertNotNull("have stream", in); - assertEquals(Constants.OBJ_BLOB, in.getType()); - assertEquals(data3.length, in.getSize()); - byte[] act = new byte[data3.length]; - IO.readFully(in, act, 0, data3.length); - assertTrue("same content", Arrays.equals(act, data3)); - assertEquals("stream at EOF", -1, in.read()); - in.close(); - } - private byte[] clone(int first, byte[] base) { byte[] r = new byte[base.length]; System.arraycopy(base, 1, r, 1, r.length - 1); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java index ab7445b17..547565ea8 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java @@ -67,6 +67,8 @@ import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.util.IO; public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { + private int streamThreshold = 16 * 1024; + private TestRng rng; private FileRepository repo; @@ -75,6 +77,11 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { protected void setUp() throws Exception { super.setUp(); + + WindowCacheConfig cfg = new WindowCacheConfig(); + cfg.setStreamFileThreshold(streamThreshold); + WindowCache.reconfigure(cfg); + rng = new TestRng(getName()); repo = createBareRepository(); wc = (WindowCursor) repo.newObjectReader(); @@ -83,6 +90,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { protected void tearDown() throws Exception { if (wc != null) wc.release(); + WindowCache.reconfigure(new WindowCacheConfig()); super.tearDown(); } @@ -113,7 +121,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); write(id, compressStandardFormat(type, data)); @@ -268,7 +276,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_CorruptZLibStream() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); gz[gz.length - 1] = 0; @@ -305,7 +313,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_TruncatedZLibStream() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); byte[] tr = new byte[gz.length - 1]; @@ -339,7 +347,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testStandardFormat_LargeObject_TrailingGarbage() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); byte[] gz = compressStandardFormat(type, data); byte[] tr = new byte[gz.length + 1]; @@ -396,7 +404,7 @@ public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { public void testPackFormat_LargeObject() throws Exception { final int type = Constants.OBJ_BLOB; - byte[] data = rng.nextBytes(ObjectLoader.STREAM_THRESHOLD + 5); + byte[] data = rng.nextBytes(streamThreshold + 5); ObjectId id = new ObjectInserter.Formatter().idFor(type, data); write(id, compressPackFormat(type, data)); diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties index b6336283c..db7a952b2 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/JGitText.properties @@ -335,6 +335,7 @@ repositoryState_rebaseOrApplyMailbox=Rebase/Apply mailbox repositoryState_rebaseWithMerge=Rebase w/merge requiredHashFunctionNotAvailable=Required hash function {0} not available. resolvingDeltas=Resolving deltas +resultLengthIncorrect=result length incorrect searchForReuse=Finding sources sequenceTooLargeForDiffAlgorithm=Sequence too large for difference algorithm. serviceNotPermitted={0} not permitted diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java index bb6ffe184..dceefa2f4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/JGitText.java @@ -395,6 +395,7 @@ public class JGitText extends TranslationBundle { /***/ public String repositoryState_rebaseWithMerge; /***/ public String requiredHashFunctionNotAvailable; /***/ public String resolvingDeltas; + /***/ public String resultLengthIncorrect; /***/ public String searchForReuse; /***/ public String sequenceTooLargeForDiffAlgorithm; /***/ public String serviceNotPermitted; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index fe4a7fdc0..b2cc29426 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -60,14 +60,6 @@ import org.eclipse.jgit.util.IO; * New loaders are constructed for every object. */ public abstract class ObjectLoader { - /** - * Default setting for the large object threshold. - *

- * Objects larger than this size must be accessed as a stream through the - * loader's {@link #openStream()} method. - */ - public static final int STREAM_THRESHOLD = 5 * 1024 * 1024; - /** * @return Git in pack object type, see {@link Constants}. */ diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java index 523911162..3ae0b8f39 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java @@ -63,6 +63,7 @@ import java.util.zip.Inflater; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.LargeObjectException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.errors.PackInvalidException; import org.eclipse.jgit.errors.PackMismatchException; @@ -274,12 +275,11 @@ public class PackFile implements Iterable { return getReverseIdx().findObject(offset); } - private final byte[] decompress(final long position, final long totalSize, - final WindowCursor curs) throws IOException, DataFormatException { - final byte[] dstbuf = new byte[(int) totalSize]; - if (curs.inflate(this, position, dstbuf, 0) != totalSize) + private final void decompress(final long position, final WindowCursor curs, + final byte[] dstbuf, final int dstoff, final int dstsz) + throws IOException, DataFormatException { + if (curs.inflate(this, position, dstbuf, dstoff) != dstsz) throw new EOFException(MessageFormat.format(JGitText.get().shortCompressedStreamAt, position)); - return dstbuf; } final void copyAsIs(PackOutputStream out, LocalObjectToPack src, @@ -630,10 +630,16 @@ public class PackFile implements Iterable { case Constants.OBJ_BLOB: case Constants.OBJ_TAG: { if (sz < curs.getStreamFileThreshold()) { - byte[] data = decompress(pos + p, sz, curs); + byte[] data; + try { + data = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeWhole(curs, pos, type, sz, p); + } + decompress(pos + p, curs, data, 0, data.length); return new ObjectLoader.SmallObject(type, data); } - return new LargePackedWholeObject(type, sz, pos, p, this, curs.db); + return largeWhole(curs, pos, type, sz, p); } case Constants.OBJ_OFS_DELTA: { @@ -680,44 +686,58 @@ public class PackFile implements Iterable { private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz, long posBase, WindowCursor curs) throws IOException, DataFormatException { - if (curs.getStreamFileThreshold() <= sz) { - // The delta instruction stream itself is pretty big, and - // that implies the resulting object is going to be massive. - // Use only the large delta format here. - // - return new LargePackedDeltaObject(posSelf, posBase, hdrLen, // - this, curs.db); - } + if (Integer.MAX_VALUE <= sz) + return largeDelta(posSelf, hdrLen, posBase, curs); - byte[] data; + byte[] base; int type; DeltaBaseCache.Entry e = DeltaBaseCache.get(this, posBase); if (e != null) { - data = e.data; + base = e.data; type = e.type; } else { ObjectLoader p = load(curs, posBase); - if (p.isLarge()) { - // The base itself is large. We have to produce a large - // delta stream as we don't want to build the whole base. - // - return new LargePackedDeltaObject(posSelf, posBase, hdrLen, - this, curs.db); + try { + base = p.getCachedBytes(curs.getStreamFileThreshold()); + } catch (LargeObjectException tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); } - data = p.getCachedBytes(); type = p.getType(); - DeltaBaseCache.store(this, posBase, data, type); + DeltaBaseCache.store(this, posBase, base, type); } - // At this point we have the base, and its small, and the delta - // stream also is small, so the result object cannot be more than - // 2x our small size. This occurs if the delta instructions were - // "copy entire base, literal insert entire delta". Go with the - // faster small object style at this point. - // - data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs)); - return new ObjectLoader.SmallObject(type, data); + final byte[] delta; + try { + delta = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); + } + + decompress(posSelf + hdrLen, curs, delta, 0, delta.length); + sz = BinaryDelta.getResultSize(delta); + if (Integer.MAX_VALUE <= sz) + return largeDelta(posSelf, hdrLen, posBase, curs); + + final byte[] result; + try { + result = new byte[(int) sz]; + } catch (OutOfMemoryError tooBig) { + return largeDelta(posSelf, hdrLen, posBase, curs); + } + + BinaryDelta.apply(base, delta, result); + return new ObjectLoader.SmallObject(type, result); + } + + private LargePackedWholeObject largeWhole(final WindowCursor curs, + final long pos, final int type, long sz, int p) { + return new LargePackedWholeObject(type, sz, pos, p, this, curs.db); + } + + private LargePackedDeltaObject largeDelta(long posObj, int hdrLen, + long posBase, WindowCursor wc) { + return new LargePackedDeltaObject(posObj, posBase, hdrLen, this, wc.db); } byte[] getDeltaHeader(WindowCursor wc, long pos) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java index 95ec6f7c7..c8e0d5584 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCacheConfig.java @@ -44,7 +44,7 @@ package org.eclipse.jgit.storage.file; import org.eclipse.jgit.lib.Config; -import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.storage.pack.PackConfig; /** Configuration parameters for {@link WindowCache}. */ public class WindowCacheConfig { @@ -73,7 +73,7 @@ public class WindowCacheConfig { packedGitWindowSize = 8 * KB; packedGitMMAP = false; deltaBaseCacheLimit = 10 * MB; - streamFileThreshold = ObjectLoader.STREAM_THRESHOLD; + streamFileThreshold = PackConfig.DEFAULT_BIG_FILE_THRESHOLD; } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java index 1d433d78a..eead163ee 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java @@ -115,6 +115,25 @@ public class BinaryDelta { * @return patched base */ public static final byte[] apply(final byte[] base, final byte[] delta) { + return apply(base, delta, null); + } + + /** + * Apply the changes defined by delta to the data in base, yielding a new + * array of bytes. + * + * @param base + * some byte representing an object of some kind. + * @param delta + * a git pack delta defining the transform from one version to + * another. + * @param result + * array to store the result into. If null the result will be + * allocated and returned. + * @return either {@code result}, or the result array allocated. + */ + public static final byte[] apply(final byte[] base, final byte[] delta, + byte[] result) { int deltaPtr = 0; // Length of the base object (a variable length int). @@ -140,7 +159,12 @@ public class BinaryDelta { shift += 7; } while ((c & 0x80) != 0); - final byte[] result = new byte[resLen]; + if (result == null) + result = new byte[resLen]; + else if (result.length != resLen) + throw new IllegalArgumentException( + JGitText.get().resultLengthIncorrect); + int resultPtr = 0; while (deltaPtr < delta.length) { final int cmd = delta[deltaPtr++] & 0xff;