diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java index 712698200..8d0b504c3 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java @@ -64,6 +64,7 @@ import org.eclipse.jgit.pgm.CLIText; import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.CanonicalTreeParser; import org.eclipse.jgit.treewalk.FileTreeIterator; +import org.eclipse.jgit.treewalk.WorkingTreeOptions; import org.eclipse.jgit.util.FS; /** @@ -96,7 +97,7 @@ public class AbstractTreeIteratorHandler extends final String name = params.getParameter(0); if (new File(name).isDirectory()) { - setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED)); + setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED, WorkingTreeOptions.createDefaultInstance())); return 1; } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java index 12c11482a..72354e4e9 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java @@ -62,7 +62,7 @@ public class AbstractTreeIteratorTest extends TestCase { public class FakeTreeIterator extends WorkingTreeIterator { public FakeTreeIterator(String pathName, FileMode fileMode) { - super(prefix(pathName)); + super(prefix(pathName), new WorkingTreeOptions(false)); mode = fileMode.getBits(); final int s = pathName.lastIndexOf('/'); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java index f939c90d8..838a56c13 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java @@ -78,7 +78,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { public void testEmptyIfRootIsFile() throws Exception { final File r = new File(trash, paths[0]); assertTrue(r.isFile()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } @@ -86,7 +87,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { public void testEmptyIfRootDoesNotExist() throws Exception { final File r = new File(trash, "not-existing-file"); assertFalse(r.exists()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } @@ -97,13 +99,15 @@ public class FileTreeIteratorTest extends RepositoryTestCase { r.mkdir(); assertTrue(r.isDirectory()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } public void testSimpleIterate() throws Exception { - final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); + final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(top.first()); assertFalse(top.eof()); @@ -151,7 +155,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { } public void testComputeFileObjectId() throws Exception { - final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); + final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); final MessageDigest md = Constants.newMessageDigest(); md.update(Constants.encodeASCII(Constants.TYPE_BLOB)); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java index bb76d0075..58fb5297a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java @@ -87,7 +87,7 @@ public class FileTreeIteratorWithTimeControl extends FileTreeIterator { public FileTreeIteratorWithTimeControl(File f, FS fs, TreeSet modTimes) { - super(f, fs); + super(f, fs, new WorkingTreeOptions(false)); this.modTimes = modTimes; } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java new file mode 100644 index 000000000..b8061dcf3 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; + +import junit.framework.TestCase; + +public class EolCanonicalizingInputStreamTest extends TestCase { + + public void testLF() throws IOException { + final byte[] bytes = asBytes("1\n2\n3"); + test(bytes, bytes); + } + + public void testCR() throws IOException { + final byte[] bytes = asBytes("1\r2\r3"); + test(bytes, bytes); + } + + public void testCRLF() throws IOException { + test(asBytes("1\r\n2\r\n3"), asBytes("1\n2\n3")); + } + + public void testLFCR() throws IOException { + final byte[] bytes = asBytes("1\n\r2\n\r3"); + test(bytes, bytes); + } + + private void test(byte[] input, byte[] expected) throws IOException { + final InputStream bis1 = new ByteArrayInputStream(input); + final InputStream cis1 = new EolCanonicalizingInputStream(bis1); + int index1 = 0; + for (int b = cis1.read(); b != -1; b = cis1.read()) { + assertEquals(expected[index1], (byte) b); + index1++; + } + + assertEquals(expected.length, index1); + + for (int bufferSize = 1; bufferSize < 10; bufferSize++) { + final byte[] buffer = new byte[bufferSize]; + final InputStream bis2 = new ByteArrayInputStream(input); + final InputStream cis2 = new EolCanonicalizingInputStream(bis2); + + int read = 0; + for (int readNow = cis2.read(buffer, 0, buffer.length); readNow != -1 + && read < expected.length; readNow = cis2.read(buffer, 0, + buffer.length)) { + for (int index2 = 0; index2 < readNow; index2++) { + assertEquals(expected[read + index2], buffer[index2]); + } + read += readNow; + } + + assertEquals(expected.length, read); + } + } + + private static byte[] asBytes(String in) { + try { + return in.getBytes("UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw new AssertionError(); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 574d2edf9..4befe586c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -46,6 +46,7 @@ package org.eclipse.jgit.diff; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import org.eclipse.jgit.util.IO; @@ -118,7 +119,8 @@ public class RawText implements Sequence { * * @param file * the text file. - * @throws IOException if Exceptions occur while reading the file + * @throws IOException + * if Exceptions occur while reading the file */ public RawText(File file) throws IOException { this(IO.readFully(file)); @@ -234,9 +236,53 @@ public class RawText implements Sequence { * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw) { + return isBinary(raw, raw.length); + } + + /** + * Determine heuristically whether the bytes contained in a stream + * represents binary (as opposed to text) content. + * + * Note: Do not further use this stream after having called this method! The + * stream may not be fully read and will be left at an unknown position + * after consuming an unknown number of bytes. The caller is responsible for + * closing the stream. + * + * @param raw + * input stream containing the raw file content. + * @return true if raw is likely to be a binary file, false otherwise + * @throws IOException + * if input stream could not be read + */ + public static boolean isBinary(InputStream raw) throws IOException { + final byte[] buffer = new byte[FIRST_FEW_BYTES]; + int cnt = 0; + while (cnt < buffer.length) { + final int n = raw.read(buffer, cnt, buffer.length - cnt); + if (n == -1) + break; + cnt += n; + } + return isBinary(buffer, cnt); + } + + /** + * Determine heuristically whether a byte array represents binary (as + * opposed to text) content. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. This should be + * {@code raw.length} unless {@code raw} was over-allocated by + * the caller. + * @return true if raw is likely to be a binary file, false otherwise + */ + public static boolean isBinary(byte[] raw, int length) { // Same heuristic as C Git - int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length; - for (int ptr = 0; ptr < size; ptr++) + if (length > FIRST_FEW_BYTES) + length = FIRST_FEW_BYTES; + for (int ptr = 0; ptr < length; ptr++) if (raw[ptr] == '\0') return true; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java index 5ad7910be..d37973799 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java @@ -70,6 +70,8 @@ public class CoreConfig { private final int streamFileThreshold; + private final boolean autoCRLF; + private CoreConfig(final Config rc) { compression = rc.getInt("core", "compression", DEFAULT_COMPRESSION); packIndexVersion = rc.getInt("pack", "indexversion", 2); @@ -80,6 +82,8 @@ public class CoreConfig { sft = Math.min(sft, maxMem / 4); // don't use more than 1/4 of the heap sft = Math.min(sft, Integer.MAX_VALUE); // cannot exceed array length streamFileThreshold = (int) sft; + + autoCRLF = rc.getBoolean("core", "autocrlf", false); } /** @@ -108,4 +112,11 @@ public class CoreConfig { public int getStreamFileThreshold() { return streamFileThreshold; } + + /** + * @return whether automatic CRLF conversion has been configured + */ + public boolean isAutoCRLF() { + return autoCRLF; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java index 09dd50063..8f53a0df9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java @@ -66,14 +66,14 @@ import org.eclipse.jgit.util.FS; */ public class FileTreeIterator extends WorkingTreeIterator { /** - * the starting directory. This directory should correspond to - * the root of the repository. + * the starting directory. This directory should correspond to the root of + * the repository. */ protected final File directory; /** - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform certain + * file system operations. */ protected final FS fs; @@ -84,7 +84,8 @@ public class FileTreeIterator extends WorkingTreeIterator { * the repository whose working tree will be scanned. */ public FileTreeIterator(Repository repo) { - this(repo.getWorkTree(), repo.getFS()); + this(repo.getWorkTree(), repo.getFS(), WorkingTreeOptions + .createConfigurationInstance(repo.getConfig())); initRootIterator(repo); } @@ -95,10 +96,13 @@ public class FileTreeIterator extends WorkingTreeIterator { * the starting directory. This directory should correspond to * the root of the repository. * @param fs - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform + * certain file system operations. + * @param options + * working tree options to be used */ - public FileTreeIterator(final File root, FS fs) { + public FileTreeIterator(final File root, FS fs, WorkingTreeOptions options) { + super(options); directory = root; this.fs = fs; init(entries()); @@ -110,8 +114,8 @@ public class FileTreeIterator extends WorkingTreeIterator { * @param p * the parent iterator we were created from. * @param fs - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform + * certain file system operations. * @param root * the subdirectory. This should be a directory contained within * the parent directory. @@ -205,8 +209,7 @@ public class FileTreeIterator extends WorkingTreeIterator { } /** - * @return - * The root directory of this iterator + * @return The root directory of this iterator */ public File getDirectory() { return directory; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index a46f4dfc9..a1a60868a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.treewalk; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -60,6 +61,7 @@ import java.util.Collections; import java.util.Comparator; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.RawText; import org.eclipse.jgit.dircache.DirCache; import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.errors.CorruptObjectException; @@ -69,6 +71,8 @@ import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.util.FS; +import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.io.EolCanonicalizingInputStream; /** * Walks a working directory tree as part of a {@link TreeWalk}. @@ -88,6 +92,12 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { /** Size we perform file IO in if we have to read and hash a file. */ private static final int BUFFER_SIZE = 2048; + /** + * Maximum size of files which may be read fully into memory for performance + * reasons. + */ + private static final long MAXIMUM_FILE_SIZE_TO_READ_FULLY = 65536; + /** The {@link #idBuffer()} for the current entry. */ private byte[] contentId; @@ -115,10 +125,19 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { /** If there is a .gitignore file present, the parsed rules from it. */ private IgnoreNode ignoreNode; - /** Create a new iterator with no parent. */ - protected WorkingTreeIterator() { + /** Options used to process the working tree. */ + private final WorkingTreeOptions options; + + /** + * Create a new iterator with no parent. + * + * @param options + * working tree options to be used + */ + protected WorkingTreeIterator(WorkingTreeOptions options) { super(); nameEncoder = Constants.CHARSET.newEncoder(); + this.options = options; } /** @@ -135,10 +154,14 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { * may be null or the empty string to indicate the prefix is the * root of the repository. A trailing slash ('/') is * automatically appended if the prefix does not end in '/'. + * @param options + * working tree options to be used */ - protected WorkingTreeIterator(final String prefix) { + protected WorkingTreeIterator(final String prefix, + WorkingTreeOptions options) { super(prefix); nameEncoder = Constants.CHARSET.newEncoder(); + this.options = options; } /** @@ -150,6 +173,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { protected WorkingTreeIterator(final WorkingTreeIterator p) { super(p); nameEncoder = p.nameEncoder; + options = p.options; } /** @@ -191,7 +215,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { return zeroid; } - private void initializeDigest() { + private void initializeDigestAndReadBuffer() { if (contentDigest != null) return; @@ -200,7 +224,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { contentDigest = Constants.newMessageDigest(); } else { final WorkingTreeIterator p = (WorkingTreeIterator) parent; - p.initializeDigest(); + p.initializeDigestAndReadBuffer(); contentReadBuffer = p.contentReadBuffer; contentDigest = p.contentDigest; } @@ -218,53 +242,91 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { if (is == null) return zeroid; try { - initializeDigest(); - - contentDigest.reset(); - contentDigest.update(hblob); - contentDigest.update((byte) ' '); - - final long blobLength = e.getLength(); - long sz = blobLength; - if (sz == 0) { - contentDigest.update((byte) '0'); - } else { - final int bufn = contentReadBuffer.length; - int p = bufn; - do { - contentReadBuffer[--p] = digits[(int) (sz % 10)]; - sz /= 10; - } while (sz > 0); - contentDigest.update(contentReadBuffer, p, bufn - p); + initializeDigestAndReadBuffer(); + + final long len = e.getLength(); + if (!mightNeedCleaning(e)) + return computeHash(is, len); + + if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) { + ByteBuffer rawbuf = IO.readWholeStream(is, (int) len); + byte[] raw = rawbuf.array(); + int n = rawbuf.limit(); + if (!isBinary(e, raw, n)) { + rawbuf = filterClean(e, raw, n); + raw = rawbuf.array(); + n = rawbuf.limit(); + } + return computeHash(new ByteArrayInputStream(raw, 0, n), n); } - contentDigest.update((byte) 0); - - for (;;) { - final int r = is.read(contentReadBuffer); - if (r <= 0) - break; - contentDigest.update(contentReadBuffer, 0, r); - sz += r; - } - if (sz != blobLength) - return zeroid; - return contentDigest.digest(); - } finally { + + if (isBinary(e)) + return computeHash(is, len); + + final long canonLen; + final InputStream lenIs = filterClean(e, e.openInputStream()); try { - is.close(); - } catch (IOException err2) { - // Suppress any error related to closing an input - // stream. We don't care, we should not have any - // outstanding data to flush or anything like that. + canonLen = computeLength(lenIs); + } finally { + safeClose(lenIs); } + + return computeHash(filterClean(e, is), canonLen); + } finally { + safeClose(is); } } catch (IOException err) { // Can't read the file? Don't report the failure either. - // return zeroid; } } + private static void safeClose(final InputStream in) { + try { + in.close(); + } catch (IOException err2) { + // Suppress any error related to closing an input + // stream. We don't care, we should not have any + // outstanding data to flush or anything like that. + } + } + + private boolean mightNeedCleaning(Entry entry) { + return options.isAutoCRLF(); + } + + private boolean isBinary(Entry entry, byte[] content, int sz) { + return RawText.isBinary(content, sz); + } + + private boolean isBinary(Entry entry) throws IOException { + InputStream in = entry.openInputStream(); + try { + return RawText.isBinary(in); + } finally { + safeClose(in); + } + } + + private ByteBuffer filterClean(Entry entry, byte[] src, int n) + throws IOException { + InputStream in = new ByteArrayInputStream(src); + return IO.readWholeStream(filterClean(entry, in), n); + } + + private InputStream filterClean(Entry entry, InputStream in) { + return new EolCanonicalizingInputStream(in); + } + + /** + * Returns the working tree options used by this iterator. + * + * @return working tree options + */ + public WorkingTreeOptions getOptions() { + return options; + } + @Override public int idOffset() { return 0; @@ -557,6 +619,51 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { } } + private long computeLength(InputStream in) throws IOException { + // Since we only care about the length, use skip. The stream + // may be able to more efficiently wade through its data. + // + long length = 0; + for (;;) { + long n = in.skip(1 << 20); + if (n <= 0) + break; + length += n; + } + return length; + } + + private byte[] computeHash(InputStream in, long length) throws IOException { + contentDigest.reset(); + contentDigest.update(hblob); + contentDigest.update((byte) ' '); + + long sz = length; + if (sz == 0) { + contentDigest.update((byte) '0'); + } else { + final int bufn = contentReadBuffer.length; + int p = bufn; + do { + contentReadBuffer[--p] = digits[(int) (sz % 10)]; + sz /= 10; + } while (sz > 0); + contentDigest.update(contentReadBuffer, p, bufn - p); + } + contentDigest.update((byte) 0); + + for (;;) { + final int r = in.read(contentReadBuffer); + if (r <= 0) + break; + contentDigest.update(contentReadBuffer, 0, r); + sz += r; + } + if (sz != length) + return zeroid; + return contentDigest.digest(); + } + /** A single entry within a working directory tree. */ protected static abstract class Entry { byte[] encodedName; @@ -569,7 +676,8 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { b = enc.encode(CharBuffer.wrap(getName())); } catch (CharacterCodingException e) { // This should so never happen. - throw new RuntimeException(MessageFormat.format(JGitText.get().unencodeableFile, getName())); + throw new RuntimeException(MessageFormat.format( + JGitText.get().unencodeableFile, getName())); } encodedNameLen = b.limit(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java new file mode 100644 index 000000000..50da3302d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.treewalk; + +import org.eclipse.jgit.lib.Config; +import org.eclipse.jgit.lib.CoreConfig; + +/** + * Contains options used by the WorkingTreeIterator. + */ +public class WorkingTreeOptions { + + /** + * Creates default options which reflect the original configuration of Git + * on Unix systems. + * + * @return created working tree options + */ + public static WorkingTreeOptions createDefaultInstance() { + return new WorkingTreeOptions(false); + } + + /** + * Creates options based on the specified repository configuration. + * + * @param config + * repository configuration to create options for + * + * @return created working tree options + */ + public static WorkingTreeOptions createConfigurationInstance(Config config) { + return new WorkingTreeOptions(config.get(CoreConfig.KEY).isAutoCRLF()); + } + + /** + * Indicates whether EOLs of text files should be converted to '\n' before + * calculating the blob ID. + **/ + private final boolean autoCRLF; + + /** + * Creates new options. + * + * @param autoCRLF + * indicates whether EOLs of text files should be converted to + * '\n' before calculating the blob ID. + */ + public WorkingTreeOptions(boolean autoCRLF) { + this.autoCRLF = autoCRLF; + } + + /** + * Indicates whether EOLs of text files should be converted to '\n' before + * calculating the blob ID. + * + * @return true if EOLs should be canonicalized. + */ + public boolean isAutoCRLF() { + return autoCRLF; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java index 1f2042d4c..a9c3853a9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java @@ -51,6 +51,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.text.MessageFormat; import org.eclipse.jgit.JGitText; @@ -96,7 +97,8 @@ public class IO { try { final long sz = in.getChannel().size(); if (sz > max) - throw new IOException(MessageFormat.format(JGitText.get().fileIsTooLarge, path)); + throw new IOException(MessageFormat.format( + JGitText.get().fileIsTooLarge, path)); final byte[] buf = new byte[(int) sz]; IO.readFully(in, buf, 0, buf.length); return buf; @@ -109,6 +111,48 @@ public class IO { } } + /** + * Read an entire input stream into memory as a ByteBuffer. + * + * Note: The stream is read to its end and is not usable after calling this + * method. The caller is responsible for closing the stream. + * + * @param in + * input stream to be read. + * @param sizeHint + * a hint on the approximate number of bytes contained in the + * stream, used to allocate temporary buffers more efficiently + * @return complete contents of the input stream. The ByteBuffer always has + * a writable backing array, with {@code position() == 0} and + * {@code limit()} equal to the actual length read. Callers may rely + * on obtaining the underlying array for efficient data access. If + * {@code sizeHint} was too large, the array may be over-allocated, + * resulting in {@code limit() < array().length}. + * @throws IOException + * there was an error reading from the stream. + */ + public static ByteBuffer readWholeStream(InputStream in, int sizeHint) + throws IOException { + byte[] out = new byte[sizeHint]; + int pos = 0; + while (pos < out.length) { + int read = in.read(out, pos, out.length - pos); + if (read < 0) + return ByteBuffer.wrap(out, 0, pos); + pos += read; + } + + int last = in.read(); + if (last < 0) + return ByteBuffer.wrap(out, 0, pos); + + TemporaryBuffer.Heap tmp = new TemporaryBuffer.Heap(Integer.MAX_VALUE); + tmp.write(out); + tmp.write(last); + tmp.copy(in); + return ByteBuffer.wrap(tmp.toByteArray()); + } + /** * Read the entire byte array into memory, or throw an exception. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java new file mode 100644 index 000000000..4bdd2b3e5 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.io; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An input stream which canonicalizes EOLs bytes on the fly to '\n'. + * + * Note: Make sure to apply this InputStream only to text files! + */ +public class EolCanonicalizingInputStream extends InputStream { + private final byte[] single = new byte[1]; + + private final byte[] buf = new byte[8096]; + + private final InputStream in; + + private int cnt; + + private int ptr; + + /** + * Creates a new InputStream, wrapping the specified stream + * + * @param in + * raw input stream + */ + public EolCanonicalizingInputStream(InputStream in) { + this.in = in; + } + + @Override + public int read() throws IOException { + final int read = read(single, 0, 1); + return read == 1 ? single[0] & 0xff : -1; + } + + @Override + public int read(byte[] bs, int off, int len) throws IOException { + if (len == 0) + return 0; + + if (cnt == -1) + return -1; + + final int startOff = off; + final int end = off + len; + + while (off < end) { + if (ptr == cnt && !fillBuffer()) { + break; + } + + byte b = buf[ptr++]; + if (b != '\r') { + bs[off++] = b; + continue; + } + + if (ptr == cnt && !fillBuffer()) { + bs[off++] = '\r'; + break; + } + + if (buf[ptr] == '\n') { + bs[off++] = '\n'; + ptr++; + } else + bs[off++] = '\r'; + } + + return startOff == off ? -1 : off - startOff; + } + + @Override + public void close() throws IOException { + in.close(); + } + + private boolean fillBuffer() throws IOException { + cnt = in.read(buf, 0, buf.length); + if (cnt < 1) + return false; + ptr = 0; + return true; + } +}