From ad68553be4417ec7ac636c3d823fdddced46ecfb Mon Sep 17 00:00:00 2001
From: "Shawn O. Pearce"
Date: Fri, 2 Jul 2010 02:19:12 -0700
Subject: [PATCH] Support large delta packed objects as streams
Very large delta instruction streams, or deltas which use very large
base objects, are now streamed through as large objects rather than
being inflated into a byte array.
This isn't the most efficient way to access delta encoded content, as
we may need to rewind and reprocess the base object when there was a
block moved within the file, but it will at least prevent the JVM from
having its heap explode.
When streaming a delta we have an inflater open for each level in the
delta chain, to inflate the instruction set of the delta, as well as
an inflater for the base level object. The base object is buffered,
as is the top level delta requested by the application, but we do not
buffer the intermediate delta streams. This keeps memory usage lower,
so its closer to 1024 bytes per level in the chain, without having an
adverse impact on raw throughput as the top-level buffer gets pushed
down to the lowest stream that has the next region.
Delta instructions transparently collapse here, if the top level does
not copy a region from its base, the base won't materialize that part
from its own base, etc. This allows us to avoid copying around a lot
of segments which have been deleted from the final version.
Change-Id: I724d45245cebb4bad2deeae7b896fc55b2dd49b3
Signed-off-by: Shawn O. Pearce
---
.../jgit/storage/file/PackFileTest.java | 250 +++++++++++++
.../jgit/storage/pack/DeltaStreamTest.java | 273 ++++++++++++++
.../jgit/storage/file/ByteArrayWindow.java | 17 +-
.../jgit/storage/file/ByteBufferWindow.java | 20 +-
.../eclipse/jgit/storage/file/ByteWindow.java | 57 +--
.../storage/file/LargePackedDeltaObject.java | 153 ++++++++
.../eclipse/jgit/storage/file/PackFile.java | 97 ++++-
.../jgit/storage/file/WindowCursor.java | 25 +-
.../jgit/storage/pack/BinaryDelta.java | 45 +++
.../jgit/storage/pack/DeltaEncoder.java | 202 +++++++++++
.../jgit/storage/pack/DeltaStream.java | 341 ++++++++++++++++++
11 files changed, 1387 insertions(+), 93 deletions(-)
create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java
create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LargePackedDeltaObject.java
create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaEncoder.java
create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaStream.java
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
index 55459ac26..1b6e3bff9 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/PackFileTest.java
@@ -43,17 +43,29 @@
package org.eclipse.jgit.storage.file;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.security.MessageDigest;
import java.util.Arrays;
+import java.util.zip.Deflater;
import org.eclipse.jgit.errors.LargeObjectException;
import org.eclipse.jgit.junit.LocalDiskRepositoryTestCase;
import org.eclipse.jgit.junit.TestRepository;
import org.eclipse.jgit.junit.TestRng;
import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectStream;
import org.eclipse.jgit.revwalk.RevBlob;
+import org.eclipse.jgit.storage.pack.DeltaEncoder;
+import org.eclipse.jgit.transport.IndexPack;
import org.eclipse.jgit.util.IO;
+import org.eclipse.jgit.util.NB;
+import org.eclipse.jgit.util.TemporaryBuffer;
public class PackFileTest extends LocalDiskRepositoryTestCase {
private TestRng rng;
@@ -134,4 +146,242 @@ public class PackFileTest extends LocalDiskRepositoryTestCase {
assertEquals("stream at EOF", -1, in.read());
in.close();
}
+
+ public void testDelta_SmallObjectChain() throws Exception {
+ ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
+ byte[] data0 = new byte[512];
+ Arrays.fill(data0, (byte) 0xf3);
+ ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
+
+ TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024);
+ packHeader(pack, 4);
+ objectHeader(pack, Constants.OBJ_BLOB, data0.length);
+ deflate(pack, data0);
+
+ byte[] data1 = clone(0x01, data0);
+ byte[] delta1 = delta(data0, data1);
+ ObjectId id1 = fmt.idFor(Constants.OBJ_BLOB, data1);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta1.length);
+ id0.copyRawTo(pack);
+ deflate(pack, delta1);
+
+ byte[] data2 = clone(0x02, data1);
+ byte[] delta2 = delta(data1, data2);
+ ObjectId id2 = fmt.idFor(Constants.OBJ_BLOB, data2);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta2.length);
+ id1.copyRawTo(pack);
+ deflate(pack, delta2);
+
+ byte[] data3 = clone(0x03, data2);
+ byte[] delta3 = delta(data2, data3);
+ ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
+ id2.copyRawTo(pack);
+ deflate(pack, delta3);
+
+ digest(pack);
+ final byte[] raw = pack.toByteArray();
+ IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
+ ip.setFixThin(true);
+ ip.index(NullProgressMonitor.INSTANCE);
+ ip.renameAndOpenPack();
+
+ assertTrue("has blob", wc.has(id3));
+
+ ObjectLoader ol = wc.open(id3);
+ assertNotNull("created loader", ol);
+ assertEquals(Constants.OBJ_BLOB, ol.getType());
+ assertEquals(data3.length, ol.getSize());
+ assertFalse("is large", ol.isLarge());
+ assertNotNull(ol.getCachedBytes());
+ assertTrue(Arrays.equals(data3, ol.getCachedBytes()));
+
+ ObjectStream in = ol.openStream();
+ assertNotNull("have stream", in);
+ assertEquals(Constants.OBJ_BLOB, in.getType());
+ assertEquals(data3.length, in.getSize());
+ byte[] act = new byte[data3.length];
+ IO.readFully(in, act, 0, data3.length);
+ assertTrue("same content", Arrays.equals(act, data3));
+ assertEquals("stream at EOF", -1, in.read());
+ in.close();
+ }
+
+ public void testDelta_LargeObjectChain() throws Exception {
+ ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
+ byte[] data0 = new byte[UnpackedObject.LARGE_OBJECT + 5];
+ Arrays.fill(data0, (byte) 0xf3);
+ ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
+
+ TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024);
+ packHeader(pack, 4);
+ objectHeader(pack, Constants.OBJ_BLOB, data0.length);
+ deflate(pack, data0);
+
+ byte[] data1 = clone(0x01, data0);
+ byte[] delta1 = delta(data0, data1);
+ ObjectId id1 = fmt.idFor(Constants.OBJ_BLOB, data1);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta1.length);
+ id0.copyRawTo(pack);
+ deflate(pack, delta1);
+
+ byte[] data2 = clone(0x02, data1);
+ byte[] delta2 = delta(data1, data2);
+ ObjectId id2 = fmt.idFor(Constants.OBJ_BLOB, data2);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta2.length);
+ id1.copyRawTo(pack);
+ deflate(pack, delta2);
+
+ byte[] data3 = clone(0x03, data2);
+ byte[] delta3 = delta(data2, data3);
+ ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
+ id2.copyRawTo(pack);
+ deflate(pack, delta3);
+
+ digest(pack);
+ final byte[] raw = pack.toByteArray();
+ IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
+ ip.setFixThin(true);
+ ip.index(NullProgressMonitor.INSTANCE);
+ ip.renameAndOpenPack();
+
+ assertTrue("has blob", wc.has(id3));
+
+ ObjectLoader ol = wc.open(id3);
+ assertNotNull("created loader", ol);
+ assertEquals(Constants.OBJ_BLOB, ol.getType());
+ assertEquals(data3.length, ol.getSize());
+ assertTrue("is large", ol.isLarge());
+ try {
+ ol.getCachedBytes();
+ fail("Should have thrown LargeObjectException");
+ } catch (LargeObjectException tooBig) {
+ assertEquals(id3.name(), tooBig.getMessage());
+ }
+
+ ObjectStream in = ol.openStream();
+ assertNotNull("have stream", in);
+ assertEquals(Constants.OBJ_BLOB, in.getType());
+ assertEquals(data3.length, in.getSize());
+ byte[] act = new byte[data3.length];
+ IO.readFully(in, act, 0, data3.length);
+ assertTrue("same content", Arrays.equals(act, data3));
+ assertEquals("stream at EOF", -1, in.read());
+ in.close();
+ }
+
+ public void testDelta_LargeInstructionStream() throws Exception {
+ ObjectInserter.Formatter fmt = new ObjectInserter.Formatter();
+ byte[] data0 = new byte[32];
+ Arrays.fill(data0, (byte) 0xf3);
+ ObjectId id0 = fmt.idFor(Constants.OBJ_BLOB, data0);
+
+ byte[] data3 = rng.nextBytes(UnpackedObject.LARGE_OBJECT + 5);
+ ByteArrayOutputStream tmp = new ByteArrayOutputStream();
+ DeltaEncoder de = new DeltaEncoder(tmp, data0.length, data3.length);
+ de.insert(data3, 0, data3.length);
+ byte[] delta3 = tmp.toByteArray();
+ assertTrue(delta3.length > UnpackedObject.LARGE_OBJECT);
+
+ TemporaryBuffer.Heap pack = new TemporaryBuffer.Heap(64 * 1024);
+ packHeader(pack, 2);
+ objectHeader(pack, Constants.OBJ_BLOB, data0.length);
+ deflate(pack, data0);
+
+ ObjectId id3 = fmt.idFor(Constants.OBJ_BLOB, data3);
+ objectHeader(pack, Constants.OBJ_REF_DELTA, delta3.length);
+ id0.copyRawTo(pack);
+ deflate(pack, delta3);
+
+ digest(pack);
+ final byte[] raw = pack.toByteArray();
+ IndexPack ip = IndexPack.create(repo, new ByteArrayInputStream(raw));
+ ip.setFixThin(true);
+ ip.index(NullProgressMonitor.INSTANCE);
+ ip.renameAndOpenPack();
+
+ assertTrue("has blob", wc.has(id3));
+
+ ObjectLoader ol = wc.open(id3);
+ assertNotNull("created loader", ol);
+ assertEquals(Constants.OBJ_BLOB, ol.getType());
+ assertEquals(data3.length, ol.getSize());
+ assertTrue("is large", ol.isLarge());
+ try {
+ ol.getCachedBytes();
+ fail("Should have thrown LargeObjectException");
+ } catch (LargeObjectException tooBig) {
+ assertEquals(id3.name(), tooBig.getMessage());
+ }
+
+ ObjectStream in = ol.openStream();
+ assertNotNull("have stream", in);
+ assertEquals(Constants.OBJ_BLOB, in.getType());
+ assertEquals(data3.length, in.getSize());
+ byte[] act = new byte[data3.length];
+ IO.readFully(in, act, 0, data3.length);
+ assertTrue("same content", Arrays.equals(act, data3));
+ assertEquals("stream at EOF", -1, in.read());
+ in.close();
+ }
+
+ private byte[] clone(int first, byte[] base) {
+ byte[] r = new byte[base.length];
+ System.arraycopy(base, 1, r, 1, r.length - 1);
+ r[0] = (byte) first;
+ return r;
+ }
+
+ private byte[] delta(byte[] base, byte[] dest) throws IOException {
+ ByteArrayOutputStream tmp = new ByteArrayOutputStream();
+ DeltaEncoder de = new DeltaEncoder(tmp, base.length, dest.length);
+ de.insert(dest, 0, 1);
+ de.copy(1, base.length - 1);
+ return tmp.toByteArray();
+ }
+
+ private void packHeader(TemporaryBuffer.Heap pack, int cnt)
+ throws IOException {
+ final byte[] hdr = new byte[8];
+ NB.encodeInt32(hdr, 0, 2);
+ NB.encodeInt32(hdr, 4, cnt);
+ pack.write(Constants.PACK_SIGNATURE);
+ pack.write(hdr, 0, 8);
+ }
+
+ private void objectHeader(TemporaryBuffer.Heap pack, int type, int sz)
+ throws IOException {
+ byte[] buf = new byte[8];
+ int nextLength = sz >>> 4;
+ buf[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (sz & 0x0F));
+ sz = nextLength;
+ int n = 1;
+ while (sz > 0) {
+ nextLength >>>= 7;
+ buf[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (sz & 0x7F));
+ sz = nextLength;
+ }
+ pack.write(buf, 0, n);
+ }
+
+ private void deflate(TemporaryBuffer.Heap pack, final byte[] content)
+ throws IOException {
+ final Deflater deflater = new Deflater();
+ final byte[] buf = new byte[128];
+ deflater.setInput(content, 0, content.length);
+ deflater.finish();
+ do {
+ final int n = deflater.deflate(buf, 0, buf.length);
+ if (n > 0)
+ pack.write(buf, 0, n);
+ } while (!deflater.finished());
+ deflater.end();
+ }
+
+ private void digest(TemporaryBuffer.Heap buf) throws IOException {
+ MessageDigest md = Constants.newMessageDigest();
+ md.update(buf.toByteArray());
+ buf.write(md.digest());
+ }
}
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java
new file mode 100644
index 000000000..9b34ad5e0
--- /dev/null
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/pack/DeltaStreamTest.java
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.pack;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.eclipse.jgit.JGitText;
+import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.junit.TestRng;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.util.IO;
+
+public class DeltaStreamTest extends TestCase {
+ private TestRng rng;
+
+ private ByteArrayOutputStream deltaBuf;
+
+ private DeltaEncoder deltaEnc;
+
+ private byte[] base;
+
+ private byte[] data;
+
+ private int dataPtr;
+
+ private byte[] delta;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ rng = new TestRng(getName());
+ deltaBuf = new ByteArrayOutputStream();
+ }
+
+ public void testCopy_SingleOp() throws IOException {
+ init((1 << 16) + 1, (1 << 8) + 1);
+ copy(0, data.length);
+ assertValidState();
+ }
+
+ public void testCopy_MaxSize() throws IOException {
+ int max = (0xff << 16) + (0xff << 8) + 0xff;
+ init(1 + max, max);
+ copy(1, max);
+ assertValidState();
+ }
+
+ public void testCopy_64k() throws IOException {
+ init(0x10000 + 2, 0x10000 + 1);
+ copy(1, 0x10000);
+ copy(0x10001, 1);
+ assertValidState();
+ }
+
+ public void testCopy_Gap() throws IOException {
+ init(256, 8);
+ copy(4, 4);
+ copy(128, 4);
+ assertValidState();
+ }
+
+ public void testCopy_OutOfOrder() throws IOException {
+ init((1 << 16) + 1, (1 << 16) + 1);
+ copy(1 << 8, 1 << 8);
+ copy(0, data.length - dataPtr);
+ assertValidState();
+ }
+
+ public void testInsert_SingleOp() throws IOException {
+ init((1 << 16) + 1, 2);
+ insert("hi");
+ assertValidState();
+ }
+
+ public void testInsertAndCopy() throws IOException {
+ init(8, 512);
+ insert(new byte[127]);
+ insert(new byte[127]);
+ insert(new byte[127]);
+ insert(new byte[125]);
+ copy(2, 6);
+ assertValidState();
+ }
+
+ public void testSkip() throws IOException {
+ init(32, 15);
+ copy(2, 2);
+ insert("ab");
+ insert("cd");
+ copy(4, 4);
+ copy(0, 2);
+ insert("efg");
+ assertValidState();
+
+ for (int p = 0; p < data.length; p++) {
+ byte[] act = new byte[data.length];
+ System.arraycopy(data, 0, act, 0, p);
+ DeltaStream in = open();
+ IO.skipFully(in, p);
+ assertEquals(data.length - p, in.read(act, p, data.length - p));
+ assertEquals(-1, in.read());
+ assertTrue("skipping " + p, Arrays.equals(data, act));
+ }
+
+ // Skip all the way to the end should still recognize EOF.
+ DeltaStream in = open();
+ IO.skipFully(in, data.length);
+ assertEquals(-1, in.read());
+ assertEquals(0, in.skip(1));
+
+ // Skip should not open the base as we move past it, but it
+ // will open when we need to start copying data from it.
+ final boolean[] opened = new boolean[1];
+ in = new DeltaStream(new ByteArrayInputStream(delta)) {
+ @Override
+ protected long getBaseSize() throws IOException {
+ return base.length;
+ }
+
+ @Override
+ protected InputStream openBase() throws IOException {
+ opened[0] = true;
+ return new ByteArrayInputStream(base);
+ }
+ };
+ IO.skipFully(in, 7);
+ assertFalse("not yet open", opened[0]);
+ assertEquals(data[7], in.read());
+ assertTrue("now open", opened[0]);
+ }
+
+ public void testIncorrectBaseSize() throws IOException {
+ init(4, 4);
+ copy(0, 4);
+ assertValidState();
+
+ DeltaStream in = new DeltaStream(new ByteArrayInputStream(delta)) {
+ @Override
+ protected long getBaseSize() throws IOException {
+ return 128;
+ }
+
+ @Override
+ protected InputStream openBase() throws IOException {
+ return new ByteArrayInputStream(base);
+ }
+ };
+ try {
+ in.read(new byte[4]);
+ fail("did not throw an exception");
+ } catch (CorruptObjectException e) {
+ assertEquals(JGitText.get().baseLengthIncorrect, e.getMessage());
+ }
+
+ in = new DeltaStream(new ByteArrayInputStream(delta)) {
+ @Override
+ protected long getBaseSize() throws IOException {
+ return 4;
+ }
+
+ @Override
+ protected InputStream openBase() throws IOException {
+ return new ByteArrayInputStream(new byte[0]);
+ }
+ };
+ try {
+ in.read(new byte[4]);
+ fail("did not throw an exception");
+ } catch (CorruptObjectException e) {
+ assertEquals(JGitText.get().baseLengthIncorrect, e.getMessage());
+ }
+ }
+
+ private void init(int baseSize, int dataSize) throws IOException {
+ base = rng.nextBytes(baseSize);
+ data = new byte[dataSize];
+ deltaEnc = new DeltaEncoder(deltaBuf, baseSize, dataSize);
+ }
+
+ private void copy(int offset, int len) throws IOException {
+ System.arraycopy(base, offset, data, dataPtr, len);
+ deltaEnc.copy(offset, len);
+ assertEquals(deltaBuf.size(), deltaEnc.getSize());
+ dataPtr += len;
+ }
+
+ private void insert(String text) throws IOException {
+ insert(Constants.encode(text));
+ }
+
+ private void insert(byte[] text) throws IOException {
+ System.arraycopy(text, 0, data, dataPtr, text.length);
+ deltaEnc.insert(text);
+ assertEquals(deltaBuf.size(), deltaEnc.getSize());
+ dataPtr += text.length;
+ }
+
+ private void assertValidState() throws IOException {
+ assertEquals("test filled example result", data.length, dataPtr);
+
+ delta = deltaBuf.toByteArray();
+ assertEquals(base.length, BinaryDelta.getBaseSize(delta));
+ assertEquals(data.length, BinaryDelta.getResultSize(delta));
+ assertTrue(Arrays.equals(data, BinaryDelta.apply(base, delta)));
+
+ byte[] act = new byte[data.length];
+ DeltaStream in = open();
+ assertEquals(data.length, in.getSize());
+ assertEquals(data.length, in.read(act));
+ assertEquals(-1, in.read());
+ assertTrue(Arrays.equals(data, act));
+ }
+
+ private DeltaStream open() throws IOException {
+ return new DeltaStream(new ByteArrayInputStream(delta)) {
+ @Override
+ protected long getBaseSize() throws IOException {
+ return base.length;
+ }
+
+ @Override
+ protected InputStream openBase() throws IOException {
+ return new ByteArrayInputStream(base);
+ }
+ };
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java
index 840244b77..0e85c58dc 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteArrayWindow.java
@@ -70,18 +70,11 @@ final class ByteArrayWindow extends ByteWindow {
}
@Override
- protected int inflate(final int pos, final byte[] b, int o,
- final Inflater inf) throws DataFormatException {
- while (!inf.finished()) {
- if (inf.needsInput()) {
- inf.setInput(array, pos, array.length - pos);
- break;
- }
- o += inf.inflate(b, o, b.length - o);
- }
- while (!inf.finished() && !inf.needsInput())
- o += inf.inflate(b, o, b.length - o);
- return o;
+ protected int inflate(final int pos, final Inflater inf)
+ throws DataFormatException {
+ int n = array.length - pos;
+ inf.setInput(array, pos, n);
+ return n;
}
void crc32(CRC32 out, long pos, int cnt) {
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java
index 52bc00f35..c6308cc1d 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteBufferWindow.java
@@ -72,21 +72,13 @@ final class ByteBufferWindow extends ByteWindow {
}
@Override
- protected int inflate(final int pos, final byte[] b, int o,
- final Inflater inf) throws DataFormatException {
- final byte[] tmp = new byte[512];
+ protected int inflate(final int pos, final Inflater inf)
+ throws DataFormatException {
final ByteBuffer s = buffer.slice();
s.position(pos);
- while (s.remaining() > 0 && !inf.finished()) {
- if (inf.needsInput()) {
- final int n = Math.min(s.remaining(), tmp.length);
- s.get(tmp, 0, n);
- inf.setInput(tmp, 0, n);
- }
- o += inf.inflate(b, o, b.length - o);
- }
- while (!inf.finished() && !inf.needsInput())
- o += inf.inflate(b, o, b.length - o);
- return o;
+ final byte[] tmp = new byte[Math.min(s.remaining(), 512)];
+ s.get(tmp, 0, tmp.length);
+ inf.setInput(tmp, 0, tmp.length);
+ return tmp.length;
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java
index 5c77cff01..e95dfd30f 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ByteWindow.java
@@ -117,59 +117,10 @@ abstract class ByteWindow {
*/
protected abstract int copy(int pos, byte[] dstbuf, int dstoff, int cnt);
- /**
- * Pump bytes into the supplied inflater as input.
- *
- * @param pos
- * offset within the file to start supplying input from.
- * @param dstbuf
- * destination buffer the inflater should output decompressed
- * data to.
- * @param dstoff
- * current offset within dstbuf
to inflate into.
- * @param inf
- * the inflater to feed input to. The caller is responsible for
- * initializing the inflater as multiple windows may need to
- * supply data to the same inflater to completely decompress
- * something.
- * @return updated dstoff
based on the number of bytes
- * successfully copied into dstbuf
by
- * inf
. If the inflater is not yet finished then
- * another window's data must still be supplied as input to finish
- * decompression.
- * @throws DataFormatException
- * the inflater encountered an invalid chunk of data. Data
- * stream corruption is likely.
- */
- final int inflate(long pos, byte[] dstbuf, int dstoff, Inflater inf)
- throws DataFormatException {
- return inflate((int) (pos - start), dstbuf, dstoff, inf);
+ final int inflate(long pos, Inflater inf) throws DataFormatException {
+ return inflate((int) (pos - start), inf);
}
- /**
- * Pump bytes into the supplied inflater as input.
- *
- * @param pos
- * offset within the window to start supplying input from.
- * @param dstbuf
- * destination buffer the inflater should output decompressed
- * data to.
- * @param dstoff
- * current offset within dstbuf
to inflate into.
- * @param inf
- * the inflater to feed input to. The caller is responsible for
- * initializing the inflater as multiple windows may need to
- * supply data to the same inflater to completely decompress
- * something.
- * @return updated dstoff
based on the number of bytes
- * successfully copied into dstbuf
by
- * inf
. If the inflater is not yet finished then
- * another window's data must still be supplied as input to finish
- * decompression.
- * @throws DataFormatException
- * the inflater encountered an invalid chunk of data. Data
- * stream corruption is likely.
- */
- protected abstract int inflate(int pos, byte[] dstbuf, int dstoff,
- Inflater inf) throws DataFormatException;
+ protected abstract int inflate(int pos, Inflater inf)
+ throws DataFormatException;
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LargePackedDeltaObject.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LargePackedDeltaObject.java
new file mode 100644
index 000000000..f46f988bc
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/LargePackedDeltaObject.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.file;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.InflaterInputStream;
+
+import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.errors.LargeObjectException;
+import org.eclipse.jgit.errors.MissingObjectException;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.ObjectLoader;
+import org.eclipse.jgit.lib.ObjectStream;
+import org.eclipse.jgit.storage.pack.DeltaStream;
+
+class LargePackedDeltaObject extends ObjectLoader {
+ private final int type;
+
+ private final long size;
+
+ private final long objectOffset;
+
+ private final long baseOffset;
+
+ private final int headerLength;
+
+ private final PackFile pack;
+
+ private final FileObjectDatabase db;
+
+ LargePackedDeltaObject(int type, long size, long objectOffset,
+ long baseOffset, int headerLength, PackFile pack,
+ FileObjectDatabase db) {
+ this.type = type;
+ this.size = size;
+ this.objectOffset = objectOffset;
+ this.baseOffset = baseOffset;
+ this.headerLength = headerLength;
+ this.pack = pack;
+ this.db = db;
+ }
+
+ @Override
+ public int getType() {
+ return type;
+ }
+
+ @Override
+ public long getSize() {
+ return size;
+ }
+
+ @Override
+ public byte[] getCachedBytes() throws LargeObjectException {
+ try {
+ throw new LargeObjectException(getObjectId());
+ } catch (IOException cannotObtainId) {
+ throw new LargeObjectException();
+ }
+ }
+
+ @Override
+ public ObjectStream openStream() throws MissingObjectException, IOException {
+ final WindowCursor wc = new WindowCursor(db);
+ InputStream in = open(wc);
+ in = new BufferedInputStream(in, 8192);
+ return new ObjectStream.Filter(type, size, in) {
+ @Override
+ public void close() throws IOException {
+ wc.release();
+ super.close();
+ }
+ };
+ }
+
+ private InputStream open(final WindowCursor wc)
+ throws MissingObjectException, IOException,
+ IncorrectObjectTypeException {
+ InputStream delta;
+ try {
+ delta = new PackInputStream(pack, objectOffset + headerLength, wc);
+ } catch (IOException packGone) {
+ // If the pack file cannot be pinned into the cursor, it
+ // probably was repacked recently. Go find the object
+ // again and open the stream from that location instead.
+ //
+ return wc.open(getObjectId(), type).openStream();
+ }
+ delta = new InflaterInputStream(delta);
+
+ final ObjectLoader base = pack.load(wc, baseOffset);
+ return new DeltaStream(delta) {
+ @Override
+ protected InputStream openBase() throws IOException {
+ if (base instanceof LargePackedDeltaObject)
+ return ((LargePackedDeltaObject) base).open(wc);
+ return base.openStream();
+ }
+
+ @Override
+ protected long getBaseSize() throws IOException {
+ return base.getSize();
+ }
+ };
+ }
+
+ private ObjectId getObjectId() throws IOException {
+ return pack.findObjectForOffset(objectOffset);
+ }
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
index f955c8af9..3fa1c1eeb 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackFile.java
@@ -611,7 +611,7 @@ public class PackFile implements Iterable {
, getPackFile()));
}
- private ObjectLoader load(final WindowCursor curs, final long pos)
+ ObjectLoader load(final WindowCursor curs, final long pos)
throws IOException {
final byte[] ib = curs.tempId;
readFully(pos, ib, 0, 20, curs);
@@ -648,13 +648,13 @@ public class PackFile implements Iterable {
ofs <<= 7;
ofs += (c & 127);
}
- return loadDelta(pos + p, sz, pos - ofs, curs);
+ return loadDelta(pos, p, sz, pos - ofs, curs);
}
case Constants.OBJ_REF_DELTA: {
readFully(pos + p, ib, 0, 20, curs);
long ofs = findDeltaBase(ObjectId.fromRaw(ib));
- return loadDelta(pos + p + 20, sz, ofs, curs);
+ return loadDelta(pos, p + 20, sz, ofs, curs);
}
default:
@@ -680,9 +680,20 @@ public class PackFile implements Iterable {
return ofs;
}
- private ObjectLoader loadDelta(final long posData, long sz,
- final long posBase, final WindowCursor curs) throws IOException,
+ private ObjectLoader loadDelta(long posSelf, int hdrLen, long sz,
+ long posBase, WindowCursor curs) throws IOException,
DataFormatException {
+ if (UnpackedObject.LARGE_OBJECT <= sz) {
+ // The delta instruction stream itself is pretty big, and
+ // that implies the resulting object is going to be massive.
+ // Use only the large delta format here.
+ //
+ byte[] hdr = getDeltaHeader(posSelf + hdrLen, curs);
+ return new LargePackedDeltaObject(getObjectType(curs, posBase), //
+ BinaryDelta.getResultSize(hdr), //
+ posSelf, posBase, hdrLen, this, curs.db);
+ }
+
byte[] data;
int type;
@@ -692,15 +703,89 @@ public class PackFile implements Iterable {
type = e.type;
} else {
ObjectLoader p = load(curs, posBase);
+ if (p.isLarge()) {
+ // The base itself is large. We have to produce a large
+ // delta stream as we don't want to build the whole base.
+ //
+ byte[] hdr = getDeltaHeader(posSelf + hdrLen, curs);
+ return new LargePackedDeltaObject(getObjectType(curs, posBase),
+ BinaryDelta.getResultSize(hdr), //
+ posSelf, posBase, hdrLen, this, curs.db);
+ }
data = p.getCachedBytes();
type = p.getType();
saveCache(posBase, data, type);
}
- data = BinaryDelta.apply(data, decompress(posData, sz, curs));
+ // At this point we have the base, and its small, and the delta
+ // stream also is small, so the result object cannot be more than
+ // 2x our small size. This occurs if the delta instructions were
+ // "copy entire base, literal insert entire delta". Go with the
+ // faster small object style at this point.
+ //
+ data = BinaryDelta.apply(data, decompress(posSelf + hdrLen, sz, curs));
return new ObjectLoader.SmallObject(type, data);
}
+ private byte[] getDeltaHeader(long pos, WindowCursor wc)
+ throws IOException, DataFormatException {
+ // The delta stream starts as two variable length integers. If we
+ // assume they are 64 bits each, we need 16 bytes to encode them,
+ // plus 2 extra bytes for the variable length overhead. So 18 is
+ // the longest delta instruction header.
+ //
+ final byte[] hdr = new byte[18];
+ wc.inflate(this, pos, hdr, 0);
+ return hdr;
+ }
+
+ private int getObjectType(final WindowCursor curs, long pos)
+ throws IOException {
+ final byte[] ib = curs.tempId;
+ for (;;) {
+ readFully(pos, ib, 0, 20, curs);
+ int c = ib[0] & 0xff;
+ final int type = (c >> 4) & 7;
+ int shift = 4;
+ int p = 1;
+ while ((c & 0x80) != 0) {
+ c = ib[p++] & 0xff;
+ shift += 7;
+ }
+
+ switch (type) {
+ case Constants.OBJ_COMMIT:
+ case Constants.OBJ_TREE:
+ case Constants.OBJ_BLOB:
+ case Constants.OBJ_TAG:
+ return type;
+
+ case Constants.OBJ_OFS_DELTA: {
+ c = ib[p++] & 0xff;
+ long ofs = c & 127;
+ while ((c & 128) != 0) {
+ ofs += 1;
+ c = ib[p++] & 0xff;
+ ofs <<= 7;
+ ofs += (c & 127);
+ }
+ pos = pos - ofs;
+ continue;
+ }
+
+ case Constants.OBJ_REF_DELTA: {
+ readFully(pos + p, ib, 0, 20, curs);
+ pos = findDeltaBase(ObjectId.fromRaw(ib));
+ continue;
+ }
+
+ default:
+ throw new IOException(MessageFormat.format(
+ JGitText.get().unknownObjectType, type));
+ }
+ }
+ }
+
LocalObjectRepresentation representation(final WindowCursor curs,
final AnyObjectId objectId) throws IOException {
final long pos = idx().findOffset(objectId);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
index 7ede751d0..e2fecca78 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java
@@ -147,7 +147,7 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
}
/**
- * Pump bytes into the supplied inflater as input.
+ * Inflate a region of the pack starting at {@code position}.
*
* @param pack
* the file the desired window is stored within.
@@ -170,13 +170,22 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs {
int inflate(final PackFile pack, long position, final byte[] dstbuf,
int dstoff) throws IOException, DataFormatException {
prepareInflater();
- for (;;) {
- pin(pack, position);
- dstoff = window.inflate(position, dstbuf, dstoff, inf);
- if (inf.finished())
- return dstoff;
- position = window.end;
- }
+ pin(pack, position);
+ position += window.inflate(position, inf);
+ do {
+ int n = inf.inflate(dstbuf, dstoff, dstbuf.length - dstoff);
+ if (n == 0) {
+ if (inf.needsInput()) {
+ pin(pack, position);
+ position += window.inflate(position, inf);
+ } else if (inf.finished())
+ return dstoff;
+ else
+ throw new DataFormatException();
+ }
+ dstoff += n;
+ } while (dstoff < dstbuf.length);
+ return dstoff;
}
ByteArrayWindow quickCopy(PackFile p, long pos, long cnt)
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
index 027ffd62a..494623df2 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/BinaryDelta.java
@@ -55,6 +55,51 @@ import org.eclipse.jgit.JGitText;
*
*/
public class BinaryDelta {
+ /**
+ * Length of the base object in the delta stream.
+ *
+ * @param delta
+ * the delta stream, or at least the header of it.
+ * @return the base object's size.
+ */
+ public static long getBaseSize(final byte[] delta) {
+ int p = 0;
+ long baseLen = 0;
+ int c, shift = 0;
+ do {
+ c = delta[p++] & 0xff;
+ baseLen |= (c & 0x7f) << shift;
+ shift += 7;
+ } while ((c & 0x80) != 0);
+ return baseLen;
+ }
+
+ /**
+ * Length of the resulting object in the delta stream.
+ *
+ * @param delta
+ * the delta stream, or at least the header of it.
+ * @return the resulting object's size.
+ */
+ public static long getResultSize(final byte[] delta) {
+ int p = 0;
+
+ // Skip length of the base object.
+ //
+ int c;
+ do {
+ c = delta[p++] & 0xff;
+ } while ((c & 0x80) != 0);
+
+ long resLen = 0;
+ int shift = 0;
+ do {
+ c = delta[p++] & 0xff;
+ resLen |= (c & 0x7f) << shift;
+ shift += 7;
+ } while ((c & 0x80) != 0);
+ return resLen;
+ }
/**
* Apply the changes defined by delta to the data in base, yielding a new
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaEncoder.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaEncoder.java
new file mode 100644
index 000000000..7d4f62fc1
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaEncoder.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.pack;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.eclipse.jgit.lib.Constants;
+
+/** Encodes an instruction stream for {@link BinaryDelta}. */
+public class DeltaEncoder {
+ private static final int MAX_COPY = (0xff << 16) + (0xff << 8) + 0xff;
+
+ private final OutputStream out;
+
+ private final byte[] buf = new byte[16];
+
+ private int size;
+
+ /**
+ * Create an encoder.
+ *
+ * @param out
+ * buffer to store the instructions written.
+ * @param baseSize
+ * size of the base object, in bytes.
+ * @param resultSize
+ * size of the resulting object, after applying this instruction
+ * stream to the base object, in bytes.
+ * @throws IOException
+ * the output buffer cannot store the instruction stream's
+ * header with the size fields.
+ */
+ public DeltaEncoder(OutputStream out, long baseSize, long resultSize)
+ throws IOException {
+ this.out = out;
+ writeVarint(baseSize);
+ writeVarint(resultSize);
+ }
+
+ private void writeVarint(long sz) throws IOException {
+ int p = 0;
+ while (sz > 0x80) {
+ buf[p++] = (byte) (0x80 | (((int) sz) & 0x7f));
+ sz >>>= 7;
+ }
+ buf[p++] = (byte) (((int) sz) & 0x7f);
+ out.write(buf, 0, p);
+ size += p;
+ }
+
+ /** @return current size of the delta stream, in bytes. */
+ public int getSize() {
+ return size;
+ }
+
+ /**
+ * Insert a literal string of text, in UTF-8 encoding.
+ *
+ * @param text
+ * the string to insert.
+ * @throws IOException
+ * the instruction buffer can't store the instructions.
+ */
+ public void insert(String text) throws IOException {
+ insert(Constants.encode(text));
+ }
+
+ /**
+ * Insert a literal binary sequence.
+ *
+ * @param text
+ * the binary to insert.
+ * @throws IOException
+ * the instruction buffer can't store the instructions.
+ */
+ public void insert(byte[] text) throws IOException {
+ insert(text, 0, text.length);
+ }
+
+ /**
+ * Insert a literal binary sequence.
+ *
+ * @param text
+ * the binary to insert.
+ * @param off
+ * offset within {@code text} to start copying from.
+ * @param cnt
+ * number of bytes to insert.
+ * @throws IOException
+ * the instruction buffer can't store the instructions.
+ */
+ public void insert(byte[] text, int off, int cnt) throws IOException {
+ while (0 < cnt) {
+ int n = Math.min(127, cnt);
+ out.write((byte) n);
+ out.write(text, off, n);
+ off += n;
+ cnt -= n;
+ size += 1 + n;
+ }
+ }
+
+ /**
+ * Create a copy instruction to copy from the base object.
+ *
+ * @param offset
+ * position in the base object to copy from. This is absolute,
+ * from the beginning of the base.
+ * @param cnt
+ * number of bytes to copy.
+ * @throws IOException
+ * the instruction buffer cannot store the instructions.
+ */
+ public void copy(long offset, int cnt) throws IOException {
+ if (cnt > MAX_COPY) {
+ copy(offset, MAX_COPY);
+ offset += MAX_COPY;
+ cnt -= MAX_COPY;
+ }
+
+ int cmd = 0x80;
+ int p = 1;
+
+ if ((offset & 0xff) != 0) {
+ cmd |= 0x01;
+ buf[p++] = (byte) (offset & 0xff);
+ }
+ if ((offset & (0xff << 8)) != 0) {
+ cmd |= 0x02;
+ buf[p++] = (byte) ((offset >>> 8) & 0xff);
+ }
+ if ((offset & (0xff << 16)) != 0) {
+ cmd |= 0x04;
+ buf[p++] = (byte) ((offset >>> 16) & 0xff);
+ }
+ if ((offset & (0xff << 24)) != 0) {
+ cmd |= 0x08;
+ buf[p++] = (byte) ((offset >>> 24) & 0xff);
+ }
+
+ if (cnt != 0x10000) {
+ if ((cnt & 0xff) != 0) {
+ cmd |= 0x10;
+ buf[p++] = (byte) (cnt & 0xff);
+ }
+ if ((cnt & (0xff << 8)) != 0) {
+ cmd |= 0x20;
+ buf[p++] = (byte) ((cnt >>> 8) & 0xff);
+ }
+ if ((cnt & (0xff << 16)) != 0) {
+ cmd |= 0x40;
+ buf[p++] = (byte) ((cnt >>> 16) & 0xff);
+ }
+ }
+
+ buf[0] = (byte) cmd;
+ out.write(buf, 0, p);
+ size += p;
+ }
+}
\ No newline at end of file
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaStream.java
new file mode 100644
index 000000000..6f479eb90
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/pack/DeltaStream.java
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2007, Robin Rosenberg
+ * Copyright (C) 2006-2007, Shawn O. Pearce
+ * Copyright (C) 2010, Google Inc.
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.storage.pack;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.eclipse.jgit.JGitText;
+import org.eclipse.jgit.errors.CorruptObjectException;
+import org.eclipse.jgit.util.IO;
+
+/**
+ * Inflates a delta in an incremental way.
+ *
+ * Implementations must provide a means to access a stream for the base object.
+ * This stream may be accessed multiple times, in order to randomly position it
+ * to match the copy instructions. A {@code DeltaStream} performs an efficient
+ * skip by only moving through the delta stream, making restarts of stacked
+ * deltas reasonably efficient.
+ */
+public abstract class DeltaStream extends InputStream {
+ private static final int CMD_COPY = 0;
+
+ private static final int CMD_INSERT = 1;
+
+ private static final int CMD_EOF = 2;
+
+ private final InputStream deltaStream;
+
+ private long baseSize;
+
+ private long resultSize;
+
+ private final byte[] cmdbuf = new byte[512];
+
+ private int cmdptr;
+
+ private int cmdcnt;
+
+ /** Stream to read from the base object. */
+ private InputStream baseStream;
+
+ /** Current position within {@link #baseStream}. */
+ private long baseOffset;
+
+ private int curcmd;
+
+ /** If {@code curcmd == CMD_COPY}, position the base has to be at. */
+ private long copyOffset;
+
+ /** Total number of bytes in this current command. */
+ private int copySize;
+
+ /**
+ * Construct a delta application stream, reading instructions.
+ *
+ * @param deltaStream
+ * the stream to read delta instructions from.
+ * @throws IOException
+ * the delta instruction stream cannot be read, or is
+ * inconsistent with the the base object information.
+ */
+ public DeltaStream(final InputStream deltaStream) throws IOException {
+ this.deltaStream = deltaStream;
+ if (!fill(cmdbuf.length))
+ throw new EOFException();
+
+ // Length of the base object.
+ //
+ int c, shift = 0;
+ do {
+ c = cmdbuf[cmdptr++] & 0xff;
+ baseSize |= (c & 0x7f) << shift;
+ shift += 7;
+ } while ((c & 0x80) != 0);
+
+ // Length of the resulting object.
+ //
+ shift = 0;
+ do {
+ c = cmdbuf[cmdptr++] & 0xff;
+ resultSize |= (c & 0x7f) << shift;
+ shift += 7;
+ } while ((c & 0x80) != 0);
+
+ curcmd = next();
+ }
+
+ /**
+ * Open the base stream.
+ *
+ * The {@code DeltaStream} may close and reopen the base stream multiple
+ * times if copy instructions use offsets out of order. This can occur if a
+ * large block in the file was moved from near the top, to near the bottom.
+ * In such cases the reopened stream is skipped to the target offset, so
+ * {@code skip(long)} should be as efficient as possible.
+ *
+ * @return stream to read from the base object. This stream should not be
+ * buffered (or should be only minimally buffered), and does not
+ * need to support mark/reset.
+ * @throws IOException
+ * the base object cannot be opened for reading.
+ */
+ protected abstract InputStream openBase() throws IOException;
+
+ /**
+ * @return length of the base object, in bytes.
+ * @throws IOException
+ * the length of the base cannot be determined.
+ */
+ protected abstract long getBaseSize() throws IOException;
+
+ /** @return total size of this stream, in bytes. */
+ public long getSize() {
+ return resultSize;
+ }
+
+ @Override
+ public int read() throws IOException {
+ byte[] buf = new byte[1];
+ int n = read(buf, 0, 1);
+ return n == 1 ? buf[0] & 0xff : -1;
+ }
+
+ @Override
+ public void close() throws IOException {
+ deltaStream.close();
+ if (baseStream != null)
+ baseStream.close();
+ }
+
+ @Override
+ public long skip(long len) throws IOException {
+ long act = 0;
+ while (0 < len) {
+ long n = Math.min(len, copySize);
+ switch (curcmd) {
+ case CMD_COPY:
+ copyOffset += n;
+ break;
+
+ case CMD_INSERT:
+ cmdptr += n;
+ break;
+
+ case CMD_EOF:
+ return act;
+ default:
+ throw new CorruptObjectException(
+ JGitText.get().unsupportedCommand0);
+ }
+
+ act += n;
+ len -= n;
+ copySize -= n;
+ if (copySize == 0)
+ curcmd = next();
+ }
+ return act;
+ }
+
+ @Override
+ public int read(byte[] buf, int off, int len) throws IOException {
+ int act = 0;
+ while (0 < len) {
+ int n = Math.min(len, copySize);
+ switch (curcmd) {
+ case CMD_COPY:
+ seekBase();
+ n = baseStream.read(buf, off, n);
+ if (n < 0)
+ throw new CorruptObjectException(
+ JGitText.get().baseLengthIncorrect);
+ baseOffset += n;
+ break;
+
+ case CMD_INSERT:
+ System.arraycopy(cmdbuf, cmdptr, buf, off, n);
+ cmdptr += n;
+ break;
+
+ case CMD_EOF:
+ return 0 < act ? act : -1;
+ default:
+ throw new CorruptObjectException(
+ JGitText.get().unsupportedCommand0);
+ }
+
+ act += n;
+ off += n;
+ len -= n;
+ copySize -= n;
+ if (copySize == 0)
+ curcmd = next();
+ }
+ return act;
+ }
+
+ private boolean fill(final int need) throws IOException {
+ int n = have();
+ if (need < n)
+ return true;
+ if (n == 0) {
+ cmdptr = 0;
+ cmdcnt = 0;
+ } else if (cmdbuf.length - cmdptr < need) {
+ // There isn't room for the entire worst-case copy command,
+ // so shift the array down to make sure we can use the entire
+ // command without having it span across the end of the array.
+ //
+ System.arraycopy(cmdbuf, cmdptr, cmdbuf, 0, n);
+ cmdptr = 0;
+ cmdcnt = n;
+ }
+
+ do {
+ n = deltaStream.read(cmdbuf, cmdcnt, cmdbuf.length - cmdcnt);
+ if (n < 0)
+ return 0 < have();
+ cmdcnt += n;
+ } while (cmdcnt < cmdbuf.length);
+ return true;
+ }
+
+ private int next() throws IOException {
+ if (!fill(8))
+ return CMD_EOF;
+
+ final int cmd = cmdbuf[cmdptr++] & 0xff;
+ if ((cmd & 0x80) != 0) {
+ // Determine the segment of the base which should
+ // be copied into the output. The segment is given
+ // as an offset and a length.
+ //
+ copyOffset = 0;
+ if ((cmd & 0x01) != 0)
+ copyOffset = cmdbuf[cmdptr++] & 0xff;
+ if ((cmd & 0x02) != 0)
+ copyOffset |= (cmdbuf[cmdptr++] & 0xff) << 8;
+ if ((cmd & 0x04) != 0)
+ copyOffset |= (cmdbuf[cmdptr++] & 0xff) << 16;
+ if ((cmd & 0x08) != 0)
+ copyOffset |= (cmdbuf[cmdptr++] & 0xff) << 24;
+
+ copySize = 0;
+ if ((cmd & 0x10) != 0)
+ copySize = cmdbuf[cmdptr++] & 0xff;
+ if ((cmd & 0x20) != 0)
+ copySize |= (cmdbuf[cmdptr++] & 0xff) << 8;
+ if ((cmd & 0x40) != 0)
+ copySize |= (cmdbuf[cmdptr++] & 0xff) << 16;
+ if (copySize == 0)
+ copySize = 0x10000;
+ return CMD_COPY;
+
+ } else if (cmd != 0) {
+ // Anything else the data is literal within the delta
+ // itself. Page the entire thing into the cmdbuf, if
+ // its not already there.
+ //
+ fill(cmd);
+ copySize = cmd;
+ return CMD_INSERT;
+
+ } else {
+ // cmd == 0 has been reserved for future encoding but
+ // for now its not acceptable.
+ //
+ throw new CorruptObjectException(JGitText.get().unsupportedCommand0);
+ }
+ }
+
+ private int have() {
+ return cmdcnt - cmdptr;
+ }
+
+ private void seekBase() throws IOException {
+ if (baseStream == null) {
+ baseStream = openBase();
+ if (getBaseSize() != baseSize)
+ throw new CorruptObjectException(
+ JGitText.get().baseLengthIncorrect);
+ IO.skipFully(baseStream, copyOffset);
+ baseOffset = copyOffset;
+
+ } else if (baseOffset < copyOffset) {
+ IO.skipFully(baseStream, copyOffset - baseOffset);
+ baseOffset = copyOffset;
+
+ } else if (baseOffset > copyOffset) {
+ baseStream.close();
+ baseStream = openBase();
+ IO.skipFully(baseStream, copyOffset);
+ baseOffset = copyOffset;
+ }
+ }
+}