From fa23482ca71314ef412ca7b2a4a5ae6886507dcf Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 1 Jul 2010 18:26:17 -0700 Subject: [PATCH] Support large loose objects as streams Big loose objects can now be streamed if they are over the large object size threshold. This prevents the JVM heap from exploding with a very large byte array to hold the slurped file, and then again with its uncompressed copy. We may have slightly slowed down the simple case for small loose objects, as the loader no longer slurps the entire thing and decompresses in memory. To try and keep good performance for the very common small objects that are below 8 KiB in size, buffers are set to 8 KiB, causing the reader to slurp most of the file anyway. However the data has to be copied at least once, from the BufferedInputStream into the InflaterInputStream. New unit tests are supplied to get nearly 100% code coverage on the unpacked code paths, for both standard and pack style loose objects. We tested a fair chunk of the code elsewhere, but these new tests are better isolated to the specific branches in the code path. Change-Id: I87b764ab1b84225e9b5619a2a55fd8eaa640e1fe Signed-off-by: Shawn O. Pearce --- .../jgit/storage/file/UnpackedObjectTest.java | 430 ++++++++++++++++++ .../org/eclipse/jgit/lib/ObjectLoader.java | 51 +++ .../org/eclipse/jgit/lib/ObjectStream.java | 83 ++++ .../jgit/storage/file/ObjectDirectory.java | 9 +- .../jgit/storage/file/UnpackedObject.java | 321 +++++++++++++ .../storage/file/UnpackedObjectLoader.java | 230 ---------- .../jgit/storage/file/WindowCursor.java | 2 +- .../jgit/transport/WalkFetchConnection.java | 7 +- 8 files changed, 898 insertions(+), 235 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObject.java delete mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java new file mode 100644 index 000000000..4ca64d3eb --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/storage/file/UnpackedObjectTest.java @@ -0,0 +1,430 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.file; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.zip.DeflaterOutputStream; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.junit.LocalDiskRepositoryTestCase; +import org.eclipse.jgit.junit.TestRng; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectInserter; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; +import org.eclipse.jgit.util.IO; + +public class UnpackedObjectTest extends LocalDiskRepositoryTestCase { + private TestRng rng; + + private FileRepository repo; + + private WindowCursor wc; + + protected void setUp() throws Exception { + super.setUp(); + rng = new TestRng(getName()); + repo = createBareRepository(); + wc = (WindowCursor) repo.newObjectReader(); + } + + protected void tearDown() throws Exception { + if (wc != null) + wc.release(); + super.tearDown(); + } + + public void testStandardFormat_SmallObject() throws Exception { + final int type = Constants.OBJ_BLOB; + byte[] data = rng.nextBytes(300); + byte[] gz = compressStandardFormat(type, data); + ObjectId id = ObjectId.zeroId(); + + ObjectLoader ol = UnpackedObject.open(new ByteArrayInputStream(gz), + path(id), id, wc); + assertNotNull("created loader", ol); + assertEquals(type, ol.getType()); + assertEquals(data.length, ol.getSize()); + assertFalse("is not large", ol.isLarge()); + assertTrue("same content", Arrays.equals(data, ol.getCachedBytes())); + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(type, in.getType()); + assertEquals(data.length, in.getSize()); + byte[] data2 = new byte[data.length]; + IO.readFully(in, data2, 0, data.length); + assertTrue("same content", Arrays.equals(data2, data)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + public void testStandardFormat_LargeObject() throws Exception { + final int type = Constants.OBJ_BLOB; + byte[] data = rng.nextBytes(UnpackedObject.LARGE_OBJECT + 5); + ObjectId id = new ObjectInserter.Formatter().idFor(type, data); + write(id, compressStandardFormat(type, data)); + + ObjectLoader ol; + { + FileInputStream fs = new FileInputStream(path(id)); + try { + ol = UnpackedObject.open(fs, path(id), id, wc); + } finally { + fs.close(); + } + } + + assertNotNull("created loader", ol); + assertEquals(type, ol.getType()); + assertEquals(data.length, ol.getSize()); + assertTrue("is large", ol.isLarge()); + try { + ol.getCachedBytes(); + fail("Should have thrown LargeObjectException"); + } catch (LargeObjectException tooBig) { + assertEquals(id.name(), tooBig.getMessage()); + } + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(type, in.getType()); + assertEquals(data.length, in.getSize()); + byte[] data2 = new byte[data.length]; + IO.readFully(in, data2, 0, data.length); + assertTrue("same content", Arrays.equals(data2, data)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + public void testStandardFormat_NegativeSize() throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = rng.nextBytes(300); + + try { + byte[] gz = compressStandardFormat("blob", "-1", data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectNegativeSize), coe + .getMessage()); + } + } + + public void testStandardFormat_InvalidType() throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = rng.nextBytes(300); + + try { + byte[] gz = compressStandardFormat("not.a.type", "1", data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectInvalidType), coe + .getMessage()); + } + } + + public void testStandardFormat_NoHeader() throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = {}; + + try { + byte[] gz = compressStandardFormat("", "", data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectNoHeader), coe + .getMessage()); + } + } + + public void testStandardFormat_GarbageAfterSize() throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = rng.nextBytes(300); + + try { + byte[] gz = compressStandardFormat("blob", "1foo", data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectGarbageAfterSize), + coe.getMessage()); + } + } + + public void testStandardFormat_SmallObject_CorruptZLibStream() + throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = rng.nextBytes(300); + + try { + byte[] gz = compressStandardFormat(Constants.OBJ_BLOB, data); + for (int i = 5; i < gz.length; i++) + gz[i] = 0; + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectBadStream), coe + .getMessage()); + } + } + + public void testStandardFormat_LargeObject_CorruptZLibStream() + throws Exception { + final int type = Constants.OBJ_BLOB; + byte[] data = rng.nextBytes(UnpackedObject.LARGE_OBJECT + 5); + ObjectId id = new ObjectInserter.Formatter().idFor(type, data); + byte[] gz = compressStandardFormat(type, data); + gz[gz.length - 1] = 0; + gz[gz.length - 2] = 0; + + write(id, gz); + + ObjectLoader ol; + { + FileInputStream fs = new FileInputStream(path(id)); + try { + ol = UnpackedObject.open(fs, path(id), id, wc); + } finally { + fs.close(); + } + } + + try { + byte[] tmp = new byte[data.length]; + InputStream in = ol.openStream(); + try { + IO.readFully(in, tmp, 0, tmp.length); + } finally { + in.close(); + } + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectBadStream), coe + .getMessage()); + } + } + + public void testPackFormat_SmallObject() throws Exception { + final int type = Constants.OBJ_BLOB; + byte[] data = rng.nextBytes(300); + byte[] gz = compressPackFormat(type, data); + ObjectId id = ObjectId.zeroId(); + + ObjectLoader ol = UnpackedObject.open(new ByteArrayInputStream(gz), + path(id), id, wc); + assertNotNull("created loader", ol); + assertEquals(type, ol.getType()); + assertEquals(data.length, ol.getSize()); + assertFalse("is not large", ol.isLarge()); + assertTrue("same content", Arrays.equals(data, ol.getCachedBytes())); + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(type, in.getType()); + assertEquals(data.length, in.getSize()); + byte[] data2 = new byte[data.length]; + IO.readFully(in, data2, 0, data.length); + assertTrue("same content", Arrays.equals(data, ol.getCachedBytes())); + in.close(); + } + + public void testPackFormat_LargeObject() throws Exception { + final int type = Constants.OBJ_BLOB; + byte[] data = rng.nextBytes(UnpackedObject.LARGE_OBJECT + 5); + ObjectId id = new ObjectInserter.Formatter().idFor(type, data); + write(id, compressPackFormat(type, data)); + + ObjectLoader ol; + { + FileInputStream fs = new FileInputStream(path(id)); + try { + ol = UnpackedObject.open(fs, path(id), id, wc); + } finally { + fs.close(); + } + } + + assertNotNull("created loader", ol); + assertEquals(type, ol.getType()); + assertEquals(data.length, ol.getSize()); + assertTrue("is large", ol.isLarge()); + try { + ol.getCachedBytes(); + fail("Should have thrown LargeObjectException"); + } catch (LargeObjectException tooBig) { + assertEquals(id.name(), tooBig.getMessage()); + } + + ObjectStream in = ol.openStream(); + assertNotNull("have stream", in); + assertEquals(type, in.getType()); + assertEquals(data.length, in.getSize()); + byte[] data2 = new byte[data.length]; + IO.readFully(in, data2, 0, data.length); + assertTrue("same content", Arrays.equals(data2, data)); + assertEquals("stream at EOF", -1, in.read()); + in.close(); + } + + public void testPackFormat_DeltaNotAllowed() throws Exception { + ObjectId id = ObjectId.zeroId(); + byte[] data = rng.nextBytes(300); + + try { + byte[] gz = compressPackFormat(Constants.OBJ_OFS_DELTA, data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectInvalidType), coe + .getMessage()); + } + + try { + byte[] gz = compressPackFormat(Constants.OBJ_REF_DELTA, data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectInvalidType), coe + .getMessage()); + } + + try { + byte[] gz = compressPackFormat(Constants.OBJ_TYPE_5, data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectInvalidType), coe + .getMessage()); + } + + try { + byte[] gz = compressPackFormat(Constants.OBJ_EXT, data); + UnpackedObject.open(new ByteArrayInputStream(gz), path(id), id, wc); + fail("Did not throw CorruptObjectException"); + } catch (CorruptObjectException coe) { + assertEquals(MessageFormat.format(JGitText.get().objectIsCorrupt, + id.name(), JGitText.get().corruptObjectInvalidType), coe + .getMessage()); + } + } + + private byte[] compressStandardFormat(int type, byte[] data) + throws IOException { + String typeString = Constants.typeString(type); + String length = String.valueOf(data.length); + return compressStandardFormat(typeString, length, data); + } + + private byte[] compressStandardFormat(String type, String length, + byte[] data) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + DeflaterOutputStream d = new DeflaterOutputStream(out); + d.write(Constants.encodeASCII(type)); + d.write(' '); + d.write(Constants.encodeASCII(length)); + d.write(0); + d.write(data); + d.finish(); + return out.toByteArray(); + } + + private byte[] compressPackFormat(int type, byte[] data) throws IOException { + byte[] hdr = new byte[64]; + int rawLength = data.length; + int nextLength = rawLength >>> 4; + hdr[0] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (type << 4) | (rawLength & 0x0F)); + rawLength = nextLength; + int n = 1; + while (rawLength > 0) { + nextLength >>>= 7; + hdr[n++] = (byte) ((nextLength > 0 ? 0x80 : 0x00) | (rawLength & 0x7F)); + rawLength = nextLength; + } + + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + out.write(hdr, 0, n); + + DeflaterOutputStream d = new DeflaterOutputStream(out); + d.write(data); + d.finish(); + return out.toByteArray(); + } + + private File path(ObjectId id) { + return repo.getObjectDatabase().fileFor(id); + } + + private void write(ObjectId id, byte[] data) throws IOException { + File path = path(id); + path.getParentFile().mkdirs(); + FileOutputStream out = new FileOutputStream(path); + try { + out.write(data); + } finally { + out.close(); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index e7be11a13..312fa958c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -173,4 +173,55 @@ public abstract class ObjectLoader { out.write(getCachedBytes()); } } + + /** + * Simple loader around the cached byte array. + *

+ * ObjectReader implementations can use this stream type when the object's + * content is small enough to be accessed as a single byte array. + */ + public static class SmallObject extends ObjectLoader { + private final int type; + + private final byte[] data; + + /** + * Construct a small object loader. + * + * @param type + * type of the object. + * @param data + * the object's data array. This array will be returned as-is + * for the {@link #getCachedBytes()} method. + */ + public SmallObject(int type, byte[] data) { + this.type = type; + this.data = data; + } + + @Override + public int getType() { + return type; + } + + @Override + public long getSize() { + return getCachedBytes().length; + } + + @Override + public boolean isLarge() { + return false; + } + + @Override + public byte[] getCachedBytes() { + return data; + } + + @Override + public ObjectStream openStream() { + return new ObjectStream.SmallStream(this); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java index ec2e8f099..86d66439d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java @@ -43,6 +43,7 @@ package org.eclipse.jgit.lib; +import java.io.IOException; import java.io.InputStream; /** Stream of data coming from an object loaded by {@link ObjectLoader}. */ @@ -134,4 +135,86 @@ public abstract class ObjectStream extends InputStream { ptr = mark; } } + + /** + * Simple filter stream around another stream. + *

+ * ObjectLoader implementations can use this stream type when the object's + * content is available from a standard InputStream. + */ + public static class Filter extends ObjectStream { + private final int type; + + private final long size; + + private final InputStream in; + + /** + * Create a filter stream for an object. + * + * @param type + * the type of the object. + * @param size + * total size of the object, in bytes. + * @param in + * stream the object's raw data is available from. This + * stream should be buffered with some reasonable amount of + * buffering. + */ + public Filter(int type, long size, InputStream in) { + this.type = type; + this.size = size; + this.in = in; + } + + @Override + public int getType() { + return type; + } + + @Override + public long getSize() { + return size; + } + + @Override + public int available() throws IOException { + return in.available(); + } + + @Override + public long skip(long n) throws IOException { + return in.skip(n); + } + + @Override + public int read() throws IOException { + return in.read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return in.read(b, off, len); + } + + @Override + public boolean markSupported() { + return in.markSupported(); + } + + @Override + public void mark(int readlimit) { + in.mark(readlimit); + } + + @Override + public void reset() throws IOException { + in.reset(); + } + + @Override + public void close() throws IOException { + in.close(); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java index 7020b7a67..49a69a536 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/ObjectDirectory.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.storage.file; import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -326,7 +327,13 @@ public class ObjectDirectory extends FileObjectDatabase { final String objectName, final AnyObjectId objectId) throws IOException { try { - return new UnpackedObjectLoader(fileFor(objectName), objectId); + File path = fileFor(objectName); + FileInputStream in = new FileInputStream(path); + try { + return UnpackedObject.open(in, path, objectId, curs); + } finally { + in.close(); + } } catch (FileNotFoundException noFile) { return null; } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObject.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObject.java new file mode 100644 index 000000000..9072fcd2d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObject.java @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2007, Robin Rosenberg + * Copyright (C) 2006-2008, Shawn O. Pearce + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.storage.file; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; +import java.util.zip.ZipException; + +import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.AnyObjectId; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.InflaterCache; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; +import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.MutableInteger; +import org.eclipse.jgit.util.RawParseUtils; + +/** + * Loose object loader. This class loads an object not stored in a pack. + */ +public class UnpackedObject { + private static final int BUFFER_SIZE = 8192; + + static final int LARGE_OBJECT = 1024 * 1024; + + /** + * Parse an object from the unpacked object format. + * + * @param raw + * complete contents of the compressed object. + * @param id + * expected ObjectId of the object, used only for error reporting + * in exceptions. + * @return loader to read the inflated contents. + * @throws IOException + * the object cannot be parsed. + */ + public static ObjectLoader parse(byte[] raw, AnyObjectId id) + throws IOException { + WindowCursor wc = new WindowCursor(null); + try { + return open(new ByteArrayInputStream(raw), null, id, wc); + } finally { + wc.release(); + } + } + + static ObjectLoader open(InputStream in, File path, AnyObjectId id, + WindowCursor wc) throws IOException { + try { + in = buffer(in); + in.mark(20); + final byte[] hdr = new byte[64]; + IO.readFully(in, hdr, 0, 2); + + if (isStandardFormat(hdr)) { + in.reset(); + in = inflate(in, wc); + int avail = readSome(in, hdr, 0, 64); + if (avail < 5) + throw new CorruptObjectException(id, + JGitText.get().corruptObjectNoHeader); + + final MutableInteger p = new MutableInteger(); + int type = Constants.decodeTypeString(id, hdr, (byte) ' ', p); + long size = RawParseUtils.parseLongBase10(hdr, p.value, p); + if (size < 0) + throw new CorruptObjectException(id, + JGitText.get().corruptObjectNegativeSize); + if (hdr[p.value++] != 0) + throw new CorruptObjectException(id, + JGitText.get().corruptObjectGarbageAfterSize); + if (path == null && Integer.MAX_VALUE < size) + throw new LargeObjectException(id.copy()); + if (size < LARGE_OBJECT || path == null) { + byte[] data = new byte[(int) size]; + int n = avail - p.value; + if (n > 0) + System.arraycopy(hdr, p.value, data, 0, n); + IO.readFully(in, data, n, data.length - n); + return new ObjectLoader.SmallObject(type, data); + } + return new LargeObject(type, size, path, id, wc.db); + + } else { + readSome(in, hdr, 2, 18); + int c = hdr[0] & 0xff; + int type = (c >> 4) & 7; + long size = c & 15; + int shift = 4; + int p = 1; + while ((c & 0x80) != 0) { + c = hdr[p++] & 0xff; + size += (c & 0x7f) << shift; + shift += 7; + } + + switch (type) { + case Constants.OBJ_COMMIT: + case Constants.OBJ_TREE: + case Constants.OBJ_BLOB: + case Constants.OBJ_TAG: + // Acceptable types for a loose object. + break; + default: + throw new CorruptObjectException(id, + JGitText.get().corruptObjectInvalidType); + } + + if (path == null && Integer.MAX_VALUE < size) + throw new LargeObjectException(id.copy()); + if (size < LARGE_OBJECT || path == null) { + in.reset(); + IO.skipFully(in, p); + in = inflate(in, wc); + byte[] data = new byte[(int) size]; + IO.readFully(in, data, 0, data.length); + return new ObjectLoader.SmallObject(type, data); + } + return new LargeObject(type, size, path, id, wc.db); + } + } catch (ZipException badStream) { + throw new CorruptObjectException(id, + JGitText.get().corruptObjectBadStream); + } + } + + private static boolean isStandardFormat(final byte[] hdr) { + // Try to determine if this is a standard format loose object or + // a pack style loose object. The standard format is completely + // compressed with zlib so the first byte must be 0x78 (15-bit + // window size, deflated) and the first 16 bit word must be + // evenly divisible by 31. Otherwise its a pack style object. + // + final int fb = hdr[0] & 0xff; + return fb == 0x78 && (((fb << 8) | hdr[1] & 0xff) % 31) == 0; + } + + private static InputStream inflate(InputStream in, final ObjectId id) { + final Inflater inf = InflaterCache.get(); + return new InflaterInputStream(in, inf) { + @Override + public int read(byte[] b, int off, int cnt) throws IOException { + try { + return super.read(b, off, cnt); + } catch (ZipException badStream) { + throw new CorruptObjectException(id, + JGitText.get().corruptObjectBadStream); + } + } + + @Override + public void close() throws IOException { + super.close(); + InflaterCache.release(inf); + } + }; + } + + private static InputStream inflate(InputStream in, WindowCursor wc) { + return new InflaterInputStream(in, wc.inflater(), BUFFER_SIZE); + } + + private static BufferedInputStream buffer(InputStream in) { + return new BufferedInputStream(in, BUFFER_SIZE); + } + + private static int readSome(InputStream in, final byte[] hdr, int off, + int cnt) throws IOException { + int avail = 0; + while (0 < cnt) { + int n = in.read(hdr, off, cnt); + if (n < 0) + break; + avail += n; + cnt -= n; + } + return avail; + } + + private static final class LargeObject extends ObjectLoader { + private final int type; + + private final long size; + + private final File path; + + private final ObjectId id; + + private final FileObjectDatabase source; + + private LargeObject(int type, long size, File path, AnyObjectId id, + FileObjectDatabase db) { + this.type = type; + this.size = size; + this.path = path; + this.id = id.copy(); + this.source = db; + } + + @Override + public int getType() { + return type; + } + + @Override + public long getSize() { + return size; + } + + @Override + public boolean isLarge() { + return true; + } + + @Override + public byte[] getCachedBytes() throws LargeObjectException { + throw new LargeObjectException(id); + } + + @Override + public ObjectStream openStream() throws MissingObjectException, + IOException { + InputStream in; + try { + in = buffer(new FileInputStream(path)); + } catch (FileNotFoundException gone) { + // If the loose file no longer exists, it may have been + // moved into a pack file in the mean time. Try again + // to locate the object. + // + return source.open(id, type).openStream(); + } + + boolean ok = false; + try { + final byte[] hdr = new byte[64]; + in.mark(20); + IO.readFully(in, hdr, 0, 2); + + if (isStandardFormat(hdr)) { + in.reset(); + in = buffer(inflate(in, id)); + while (0 < in.read()) + continue; + } else { + readSome(in, hdr, 2, 18); + int c = hdr[0] & 0xff; + int p = 1; + while ((c & 0x80) != 0) + c = hdr[p++] & 0xff; + + in.reset(); + IO.skipFully(in, p); + in = buffer(inflate(in, id)); + } + + ok = true; + return new ObjectStream.Filter(type, size, in); + } finally { + if (!ok) + in.close(); + } + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java deleted file mode 100644 index 4a70793d0..000000000 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (C) 2007, Robin Rosenberg - * Copyright (C) 2006-2008, Shawn O. Pearce - * and other copyright owners as documented in the project's IP log. - * - * This program and the accompanying materials are made available - * under the terms of the Eclipse Distribution License v1.0 which - * accompanies this distribution, is reproduced below, and is - * available at http://www.eclipse.org/org/documents/edl-v10.php - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * - Neither the name of the Eclipse Foundation, Inc. nor the - * names of its contributors may be used to endorse or promote - * products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND - * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -package org.eclipse.jgit.storage.file; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; - -import org.eclipse.jgit.JGitText; -import org.eclipse.jgit.errors.CorruptObjectException; -import org.eclipse.jgit.errors.MissingObjectException; -import org.eclipse.jgit.lib.AnyObjectId; -import org.eclipse.jgit.lib.Constants; -import org.eclipse.jgit.lib.InflaterCache; -import org.eclipse.jgit.lib.ObjectLoader; -import org.eclipse.jgit.lib.ObjectStream; -import org.eclipse.jgit.util.IO; -import org.eclipse.jgit.util.MutableInteger; -import org.eclipse.jgit.util.RawParseUtils; - -/** - * Loose object loader. This class loads an object not stored in a pack. - */ -public class UnpackedObjectLoader extends ObjectLoader { - private final int objectType; - - private final int objectSize; - - private final byte[] bytes; - - /** - * Construct an ObjectLoader to read from the file. - * - * @param path - * location of the loose object to read. - * @param id - * expected identity of the object being loaded, if known. - * @throws FileNotFoundException - * the loose object file does not exist. - * @throws IOException - * the loose object file exists, but is corrupt. - */ - public UnpackedObjectLoader(final File path, final AnyObjectId id) - throws IOException { - this(IO.readFully(path), id); - } - - /** - * Construct an ObjectLoader from a loose object's compressed form. - * - * @param compressed - * entire content of the loose object file. - * @throws CorruptObjectException - * The compressed data supplied does not match the format for a - * valid loose object. - */ - public UnpackedObjectLoader(final byte[] compressed) - throws CorruptObjectException { - this(compressed, null); - } - - private UnpackedObjectLoader(final byte[] compressed, final AnyObjectId id) - throws CorruptObjectException { - // Try to determine if this is a legacy format loose object or - // a new style loose object. The legacy format was completely - // compressed with zlib so the first byte must be 0x78 (15-bit - // window size, deflated) and the first 16 bit word must be - // evenly divisible by 31. Otherwise its a new style loose - // object. - // - final Inflater inflater = InflaterCache.get(); - try { - final int fb = compressed[0] & 0xff; - if (fb == 0x78 && (((fb << 8) | compressed[1] & 0xff) % 31) == 0) { - inflater.setInput(compressed); - final byte[] hdr = new byte[64]; - int avail = 0; - while (!inflater.finished() && avail < hdr.length) - try { - int uncompressed = inflater.inflate(hdr, avail, - hdr.length - avail); - if (uncompressed == 0) { - throw new CorruptObjectException(id, - JGitText.get().corruptObjectBadStreamCorruptHeader); - } - avail += uncompressed; - } catch (DataFormatException dfe) { - final CorruptObjectException coe; - coe = new CorruptObjectException(id, JGitText.get().corruptObjectBadStream); - coe.initCause(dfe); - throw coe; - } - if (avail < 5) - throw new CorruptObjectException(id, JGitText.get().corruptObjectNoHeader); - - final MutableInteger p = new MutableInteger(); - objectType = Constants.decodeTypeString(id, hdr, (byte) ' ', p); - objectSize = RawParseUtils.parseBase10(hdr, p.value, p); - if (objectSize < 0) - throw new CorruptObjectException(id, JGitText.get().corruptObjectNegativeSize); - if (hdr[p.value++] != 0) - throw new CorruptObjectException(id, JGitText.get().corruptObjectGarbageAfterSize); - bytes = new byte[objectSize]; - if (p.value < avail) - System.arraycopy(hdr, p.value, bytes, 0, avail - p.value); - decompress(id, inflater, avail - p.value); - } else { - int p = 0; - int c = compressed[p++] & 0xff; - final int typeCode = (c >> 4) & 7; - int size = c & 15; - int shift = 4; - while ((c & 0x80) != 0) { - c = compressed[p++] & 0xff; - size += (c & 0x7f) << shift; - shift += 7; - } - - switch (typeCode) { - case Constants.OBJ_COMMIT: - case Constants.OBJ_TREE: - case Constants.OBJ_BLOB: - case Constants.OBJ_TAG: - objectType = typeCode; - break; - default: - throw new CorruptObjectException(id, JGitText.get().corruptObjectInvalidType); - } - - objectSize = size; - bytes = new byte[objectSize]; - inflater.setInput(compressed, p, compressed.length - p); - decompress(id, inflater, 0); - } - } finally { - InflaterCache.release(inflater); - } - } - - private void decompress(final AnyObjectId id, final Inflater inf, int p) - throws CorruptObjectException { - try { - while (!inf.finished()) { - int uncompressed = inf.inflate(bytes, p, objectSize - p); - p += uncompressed; - if (uncompressed == 0 && !inf.finished()) { - throw new CorruptObjectException(id, - JGitText.get().corruptObjectBadStreamCorruptHeader); - } - } - } catch (DataFormatException dfe) { - final CorruptObjectException coe; - coe = new CorruptObjectException(id, JGitText.get().corruptObjectBadStream); - coe.initCause(dfe); - throw coe; - } - if (p != objectSize) - throw new CorruptObjectException(id, JGitText.get().corruptObjectIncorrectLength); - } - - @Override - public int getType() { - return objectType; - } - - @Override - public long getSize() { - return objectSize; - } - - @Override - public byte[] getCachedBytes() { - return bytes; - } - - @Override - public final boolean isLarge() { - return false; - } - - @Override - public final ObjectStream openStream() throws MissingObjectException, - IOException { - return new ObjectStream.SmallStream(this); - } -} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java index 544498573..3527eb08c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/WindowCursor.java @@ -71,7 +71,7 @@ final class WindowCursor extends ObjectReader implements ObjectReuseAsIs { private ByteWindow window; - private final FileObjectDatabase db; + final FileObjectDatabase db; WindowCursor(FileObjectDatabase db) { this.db = db; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java index faea378e9..237b4314c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/WalkFetchConnection.java @@ -70,6 +70,7 @@ import org.eclipse.jgit.lib.MutableObjectId; import org.eclipse.jgit.lib.ObjectChecker; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ObjectInserter; +import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; @@ -84,7 +85,7 @@ import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.storage.file.ObjectDirectory; import org.eclipse.jgit.storage.file.PackIndex; import org.eclipse.jgit.storage.file.PackLock; -import org.eclipse.jgit.storage.file.UnpackedObjectLoader; +import org.eclipse.jgit.storage.file.UnpackedObject; import org.eclipse.jgit.treewalk.TreeWalk; /** @@ -598,9 +599,9 @@ class WalkFetchConnection extends BaseFetchConnection { private void verifyAndInsertLooseObject(final AnyObjectId id, final byte[] compressed) throws IOException { - final UnpackedObjectLoader uol; + final ObjectLoader uol; try { - uol = new UnpackedObjectLoader(compressed); + uol = UnpackedObject.parse(compressed, id); } catch (CorruptObjectException parsingError) { // Some HTTP servers send back a "200 OK" status with an HTML // page that explains the requested file could not be found.