diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java b/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java new file mode 100644 index 000000000..d897c51de --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/errors/LargeObjectException.java @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.errors; + +import org.eclipse.jgit.lib.ObjectId; + +/** An object is too big to load into memory as a single byte array. */ +public class LargeObjectException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** Create a large object exception, where the object isn't known. */ + public LargeObjectException() { + // Do nothing. + } + + /** + * Create a large object exception, naming the object that is too big. + * + * @param id + * identity of the object that is too big to be loaded as a byte + * array in this JVM. + */ + public LargeObjectException(ObjectId id) { + super(id.name()); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java index 1a8d1ba9b..e7be11a13 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectLoader.java @@ -47,12 +47,20 @@ package org.eclipse.jgit.lib; +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; + +import org.eclipse.jgit.errors.LargeObjectException; +import org.eclipse.jgit.errors.MissingObjectException; /** * Base class for a set of loaders for different representations of Git objects. * New loaders are constructed for every object. */ public abstract class ObjectLoader { + private static final int LARGE_OBJECT = 1024 * 1024; + /** * @return Git in pack object type, see {@link Constants}. */ @@ -63,6 +71,15 @@ public abstract class ObjectLoader { */ public abstract long getSize(); + /** + * @return true if this object is too large to obtain as a byte array. + * Objects over a certain threshold should be accessed only by their + * {@link #openStream()} to prevent overflowing the JVM heap. + */ + public boolean isLarge() { + return LARGE_OBJECT <= getSize(); + } + /** * Obtain a copy of the bytes of this object. *

@@ -70,8 +87,12 @@ public abstract class ObjectLoader { * be modified by the caller. * * @return the bytes of this object. + * @throws LargeObjectException + * if the object won't fit into a byte array, because + * {@link #isLarge()} returns true. Callers should use + * {@link #openStream()} instead to access the contents. */ - public final byte[] getBytes() { + public final byte[] getBytes() throws LargeObjectException { final byte[] data = getCachedBytes(); final byte[] copy = new byte[data.length]; System.arraycopy(data, 0, copy, 0, data.length); @@ -87,6 +108,69 @@ public abstract class ObjectLoader { * Changes (if made) will affect the cache but not the repository itself. * * @return the cached bytes of this object. Do not modify it. + * @throws LargeObjectException + * if the object won't fit into a byte array, because + * {@link #isLarge()} returns true. Callers should use + * {@link #openStream()} instead to access the contents. */ - public abstract byte[] getCachedBytes(); + public abstract byte[] getCachedBytes() throws LargeObjectException; + + /** + * Obtain an input stream to read this object's data. + * + * @return a stream of this object's data. Caller must close the stream when + * through with it. The returned stream is buffered with a + * reasonable buffer size. + * @throws MissingObjectException + * the object no longer exists. + * @throws IOException + * the object store cannot be accessed. + */ + public abstract ObjectStream openStream() throws MissingObjectException, + IOException; + + /** + * Copy this object to the output stream. + *

+ * For some object store implementations, this method may be more efficient + * than reading from {@link #openStream()} into a temporary byte array, then + * writing to the destination stream. + *

+ * The default implementation of this method is to copy with a temporary + * byte array for large objects, or to pass through the cached byte array + * for small objects. + * + * @param out + * stream to receive the complete copy of this object's data. + * Caller is responsible for flushing or closing this stream + * after this method returns. + * @throws MissingObjectException + * the object no longer exists. + * @throws IOException + * the object store cannot be accessed, or the stream cannot be + * written to. + */ + public void copyTo(OutputStream out) throws MissingObjectException, + IOException { + if (isLarge()) { + ObjectStream in = openStream(); + try { + byte[] tmp = new byte[1024]; + long copied = 0; + for (;;) { + int n = in.read(tmp); + if (n < 0) + break; + out.write(tmp, 0, n); + copied += n; + } + if (copied != getSize()) + throw new EOFException(); + } finally { + in.close(); + } + } else { + out.write(getCachedBytes()); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java new file mode 100644 index 000000000..ec2e8f099 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectStream.java @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.lib; + +import java.io.InputStream; + +/** Stream of data coming from an object loaded by {@link ObjectLoader}. */ +public abstract class ObjectStream extends InputStream { + /** @return Git object type, see {@link Constants}. */ + public abstract int getType(); + + /** @return total size of object in bytes */ + public abstract long getSize(); + + /** + * Simple stream around the cached byte array created by a loader. + *

+ * ObjectLoader implementations can use this stream type when the object's + * content is small enough to be accessed as a single byte array, but the + * application has still requested it in stream format. + */ + public static class SmallStream extends ObjectStream { + private final int type; + + private final byte[] data; + + private int ptr; + + private int mark; + + /** + * Create the stream from an existing loader's cached bytes. + * + * @param loader + * the loader. + */ + public SmallStream(ObjectLoader loader) { + this.type = loader.getType(); + this.data = loader.getCachedBytes(); + } + + @Override + public int getType() { + return type; + } + + @Override + public long getSize() { + return data.length; + } + + @Override + public int available() { + return data.length - ptr; + } + + @Override + public long skip(long n) { + int s = (int) Math.min(available(), Math.max(0, n)); + ptr += s; + return s; + } + + @Override + public int read() { + if (ptr == data.length) + return -1; + return data[ptr++] & 0xff; + } + + @Override + public int read(byte[] b, int off, int len) { + if (ptr == data.length) + return -1; + int n = Math.min(available(), len); + System.arraycopy(data, ptr, b, off, n); + ptr += n; + return n; + } + + @Override + public boolean markSupported() { + return true; + } + + @Override + public void mark(int readlimit) { + mark = ptr; + } + + @Override + public void reset() { + ptr = mark; + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java index ad4042e17..f056c7413 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/PackedObjectLoader.java @@ -46,7 +46,11 @@ package org.eclipse.jgit.storage.file; +import java.io.IOException; + +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; /** Object loaded in from a {@link PackFile}. */ final class PackedObjectLoader extends ObjectLoader { @@ -71,4 +75,15 @@ final class PackedObjectLoader extends ObjectLoader { public final byte[] getCachedBytes() { return data; } + + @Override + public final boolean isLarge() { + return false; + } + + @Override + public final ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream.SmallStream(this); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java index b85ec149e..4a70793d0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/storage/file/UnpackedObjectLoader.java @@ -52,10 +52,12 @@ import java.util.zip.Inflater; import org.eclipse.jgit.JGitText; import org.eclipse.jgit.errors.CorruptObjectException; +import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.InflaterCache; import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectStream; import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.MutableInteger; import org.eclipse.jgit.util.RawParseUtils; @@ -214,4 +216,15 @@ public class UnpackedObjectLoader extends ObjectLoader { public byte[] getCachedBytes() { return bytes; } + + @Override + public final boolean isLarge() { + return false; + } + + @Override + public final ObjectStream openStream() throws MissingObjectException, + IOException { + return new ObjectStream.SmallStream(this); + } }