From e2e38792b5403da38d5e3ab0e15b626e051107f2 Mon Sep 17 00:00:00 2001 From: Marc Strapetz Date: Thu, 29 Jul 2010 16:21:37 +0200 Subject: [PATCH] Perform automatic CRLF to LF conversion during WorkingTreeIterator WorkingTreeIterator now optionally performs CRLF to LF conversion for text files. A basic framework is left in place to support enabling (or disabling) this feature based on gitattributes, and also to support the more generic smudge/clean filter system. As there is no gitattribute support yet in JGit this is left unimplemented, but the mightNeedCleaning(), isBinary() and filterClean() methods will provide reasonable places to plug that into in the future. [sp: All bugs inside of WorkingTreeIterator are my fault, I wrote most of it while cherry-picking this patch and building it on top of Marc's original work.] CQ: 4419 Bug: 301775 Change-Id: I0ca35cfbfe3f503729cbfc1d5034ad4abcd1097e Signed-off-by: Shawn O. Pearce --- .../pgm/opt/AbstractTreeIteratorHandler.java | 3 +- .../treewalk/AbstractTreeIteratorTest.java | 2 +- .../jgit/treewalk/FileTreeIteratorTest.java | 15 +- .../FileTreeIteratorWithTimeControl.java | 2 +- .../io/EolCanonicalizingInputStreamTest.java | 111 ++++++++++ .../src/org/eclipse/jgit/diff/RawText.java | 52 ++++- .../src/org/eclipse/jgit/lib/CoreConfig.java | 11 + .../jgit/treewalk/FileTreeIterator.java | 27 +-- .../jgit/treewalk/WorkingTreeIterator.java | 194 ++++++++++++++---- .../jgit/treewalk/WorkingTreeOptions.java | 101 +++++++++ .../src/org/eclipse/jgit/util/IO.java | 46 ++++- .../util/io/EolCanonicalizingInputStream.java | 130 ++++++++++++ 12 files changed, 627 insertions(+), 67 deletions(-) create mode 100644 org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java index 712698200..8d0b504c3 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java @@ -64,6 +64,7 @@ import org.eclipse.jgit.pgm.CLIText; import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.CanonicalTreeParser; import org.eclipse.jgit.treewalk.FileTreeIterator; +import org.eclipse.jgit.treewalk.WorkingTreeOptions; import org.eclipse.jgit.util.FS; /** @@ -96,7 +97,7 @@ public class AbstractTreeIteratorHandler extends final String name = params.getParameter(0); if (new File(name).isDirectory()) { - setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED)); + setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED, WorkingTreeOptions.createDefaultInstance())); return 1; } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java index 12c11482a..72354e4e9 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java @@ -62,7 +62,7 @@ public class AbstractTreeIteratorTest extends TestCase { public class FakeTreeIterator extends WorkingTreeIterator { public FakeTreeIterator(String pathName, FileMode fileMode) { - super(prefix(pathName)); + super(prefix(pathName), new WorkingTreeOptions(false)); mode = fileMode.getBits(); final int s = pathName.lastIndexOf('/'); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java index f939c90d8..838a56c13 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java @@ -78,7 +78,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { public void testEmptyIfRootIsFile() throws Exception { final File r = new File(trash, paths[0]); assertTrue(r.isFile()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } @@ -86,7 +87,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { public void testEmptyIfRootDoesNotExist() throws Exception { final File r = new File(trash, "not-existing-file"); assertFalse(r.exists()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } @@ -97,13 +99,15 @@ public class FileTreeIteratorTest extends RepositoryTestCase { r.mkdir(); assertTrue(r.isDirectory()); - final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); + final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(fti.first()); assertTrue(fti.eof()); } public void testSimpleIterate() throws Exception { - final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); + final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); assertTrue(top.first()); assertFalse(top.eof()); @@ -151,7 +155,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase { } public void testComputeFileObjectId() throws Exception { - final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); + final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(), + WorkingTreeOptions.createConfigurationInstance(db.getConfig())); final MessageDigest md = Constants.newMessageDigest(); md.update(Constants.encodeASCII(Constants.TYPE_BLOB)); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java index bb76d0075..58fb5297a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java @@ -87,7 +87,7 @@ public class FileTreeIteratorWithTimeControl extends FileTreeIterator { public FileTreeIteratorWithTimeControl(File f, FS fs, TreeSet modTimes) { - super(f, fs); + super(f, fs, new WorkingTreeOptions(false)); this.modTimes = modTimes; } diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java new file mode 100644 index 000000000..b8061dcf3 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; + +import junit.framework.TestCase; + +public class EolCanonicalizingInputStreamTest extends TestCase { + + public void testLF() throws IOException { + final byte[] bytes = asBytes("1\n2\n3"); + test(bytes, bytes); + } + + public void testCR() throws IOException { + final byte[] bytes = asBytes("1\r2\r3"); + test(bytes, bytes); + } + + public void testCRLF() throws IOException { + test(asBytes("1\r\n2\r\n3"), asBytes("1\n2\n3")); + } + + public void testLFCR() throws IOException { + final byte[] bytes = asBytes("1\n\r2\n\r3"); + test(bytes, bytes); + } + + private void test(byte[] input, byte[] expected) throws IOException { + final InputStream bis1 = new ByteArrayInputStream(input); + final InputStream cis1 = new EolCanonicalizingInputStream(bis1); + int index1 = 0; + for (int b = cis1.read(); b != -1; b = cis1.read()) { + assertEquals(expected[index1], (byte) b); + index1++; + } + + assertEquals(expected.length, index1); + + for (int bufferSize = 1; bufferSize < 10; bufferSize++) { + final byte[] buffer = new byte[bufferSize]; + final InputStream bis2 = new ByteArrayInputStream(input); + final InputStream cis2 = new EolCanonicalizingInputStream(bis2); + + int read = 0; + for (int readNow = cis2.read(buffer, 0, buffer.length); readNow != -1 + && read < expected.length; readNow = cis2.read(buffer, 0, + buffer.length)) { + for (int index2 = 0; index2 < readNow; index2++) { + assertEquals(expected[read + index2], buffer[index2]); + } + read += readNow; + } + + assertEquals(expected.length, read); + } + } + + private static byte[] asBytes(String in) { + try { + return in.getBytes("UTF-8"); + } catch (UnsupportedEncodingException ex) { + throw new AssertionError(); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index 574d2edf9..4befe586c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -46,6 +46,7 @@ package org.eclipse.jgit.diff; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import org.eclipse.jgit.util.IO; @@ -118,7 +119,8 @@ public class RawText implements Sequence { * * @param file * the text file. - * @throws IOException if Exceptions occur while reading the file + * @throws IOException + * if Exceptions occur while reading the file */ public RawText(File file) throws IOException { this(IO.readFully(file)); @@ -234,9 +236,53 @@ public class RawText implements Sequence { * @return true if raw is likely to be a binary file, false otherwise */ public static boolean isBinary(byte[] raw) { + return isBinary(raw, raw.length); + } + + /** + * Determine heuristically whether the bytes contained in a stream + * represents binary (as opposed to text) content. + * + * Note: Do not further use this stream after having called this method! The + * stream may not be fully read and will be left at an unknown position + * after consuming an unknown number of bytes. The caller is responsible for + * closing the stream. + * + * @param raw + * input stream containing the raw file content. + * @return true if raw is likely to be a binary file, false otherwise + * @throws IOException + * if input stream could not be read + */ + public static boolean isBinary(InputStream raw) throws IOException { + final byte[] buffer = new byte[FIRST_FEW_BYTES]; + int cnt = 0; + while (cnt < buffer.length) { + final int n = raw.read(buffer, cnt, buffer.length - cnt); + if (n == -1) + break; + cnt += n; + } + return isBinary(buffer, cnt); + } + + /** + * Determine heuristically whether a byte array represents binary (as + * opposed to text) content. + * + * @param raw + * the raw file content. + * @param length + * number of bytes in {@code raw} to evaluate. This should be + * {@code raw.length} unless {@code raw} was over-allocated by + * the caller. + * @return true if raw is likely to be a binary file, false otherwise + */ + public static boolean isBinary(byte[] raw, int length) { // Same heuristic as C Git - int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length; - for (int ptr = 0; ptr < size; ptr++) + if (length > FIRST_FEW_BYTES) + length = FIRST_FEW_BYTES; + for (int ptr = 0; ptr < length; ptr++) if (raw[ptr] == '\0') return true; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java index 5ad7910be..d37973799 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java @@ -70,6 +70,8 @@ public class CoreConfig { private final int streamFileThreshold; + private final boolean autoCRLF; + private CoreConfig(final Config rc) { compression = rc.getInt("core", "compression", DEFAULT_COMPRESSION); packIndexVersion = rc.getInt("pack", "indexversion", 2); @@ -80,6 +82,8 @@ public class CoreConfig { sft = Math.min(sft, maxMem / 4); // don't use more than 1/4 of the heap sft = Math.min(sft, Integer.MAX_VALUE); // cannot exceed array length streamFileThreshold = (int) sft; + + autoCRLF = rc.getBoolean("core", "autocrlf", false); } /** @@ -108,4 +112,11 @@ public class CoreConfig { public int getStreamFileThreshold() { return streamFileThreshold; } + + /** + * @return whether automatic CRLF conversion has been configured + */ + public boolean isAutoCRLF() { + return autoCRLF; + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java index 09dd50063..8f53a0df9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java @@ -66,14 +66,14 @@ import org.eclipse.jgit.util.FS; */ public class FileTreeIterator extends WorkingTreeIterator { /** - * the starting directory. This directory should correspond to - * the root of the repository. + * the starting directory. This directory should correspond to the root of + * the repository. */ protected final File directory; /** - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform certain + * file system operations. */ protected final FS fs; @@ -84,7 +84,8 @@ public class FileTreeIterator extends WorkingTreeIterator { * the repository whose working tree will be scanned. */ public FileTreeIterator(Repository repo) { - this(repo.getWorkTree(), repo.getFS()); + this(repo.getWorkTree(), repo.getFS(), WorkingTreeOptions + .createConfigurationInstance(repo.getConfig())); initRootIterator(repo); } @@ -95,10 +96,13 @@ public class FileTreeIterator extends WorkingTreeIterator { * the starting directory. This directory should correspond to * the root of the repository. * @param fs - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform + * certain file system operations. + * @param options + * working tree options to be used */ - public FileTreeIterator(final File root, FS fs) { + public FileTreeIterator(final File root, FS fs, WorkingTreeOptions options) { + super(options); directory = root; this.fs = fs; init(entries()); @@ -110,8 +114,8 @@ public class FileTreeIterator extends WorkingTreeIterator { * @param p * the parent iterator we were created from. * @param fs - * the file system abstraction which will be necessary to - * perform certain file system operations. + * the file system abstraction which will be necessary to perform + * certain file system operations. * @param root * the subdirectory. This should be a directory contained within * the parent directory. @@ -205,8 +209,7 @@ public class FileTreeIterator extends WorkingTreeIterator { } /** - * @return - * The root directory of this iterator + * @return The root directory of this iterator */ public File getDirectory() { return directory; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index a46f4dfc9..a1a60868a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -45,6 +45,7 @@ package org.eclipse.jgit.treewalk; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -60,6 +61,7 @@ import java.util.Collections; import java.util.Comparator; import org.eclipse.jgit.JGitText; +import org.eclipse.jgit.diff.RawText; import org.eclipse.jgit.dircache.DirCache; import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.errors.CorruptObjectException; @@ -69,6 +71,8 @@ import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.util.FS; +import org.eclipse.jgit.util.IO; +import org.eclipse.jgit.util.io.EolCanonicalizingInputStream; /** * Walks a working directory tree as part of a {@link TreeWalk}. @@ -88,6 +92,12 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { /** Size we perform file IO in if we have to read and hash a file. */ private static final int BUFFER_SIZE = 2048; + /** + * Maximum size of files which may be read fully into memory for performance + * reasons. + */ + private static final long MAXIMUM_FILE_SIZE_TO_READ_FULLY = 65536; + /** The {@link #idBuffer()} for the current entry. */ private byte[] contentId; @@ -115,10 +125,19 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { /** If there is a .gitignore file present, the parsed rules from it. */ private IgnoreNode ignoreNode; - /** Create a new iterator with no parent. */ - protected WorkingTreeIterator() { + /** Options used to process the working tree. */ + private final WorkingTreeOptions options; + + /** + * Create a new iterator with no parent. + * + * @param options + * working tree options to be used + */ + protected WorkingTreeIterator(WorkingTreeOptions options) { super(); nameEncoder = Constants.CHARSET.newEncoder(); + this.options = options; } /** @@ -135,10 +154,14 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { * may be null or the empty string to indicate the prefix is the * root of the repository. A trailing slash ('/') is * automatically appended if the prefix does not end in '/'. + * @param options + * working tree options to be used */ - protected WorkingTreeIterator(final String prefix) { + protected WorkingTreeIterator(final String prefix, + WorkingTreeOptions options) { super(prefix); nameEncoder = Constants.CHARSET.newEncoder(); + this.options = options; } /** @@ -150,6 +173,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { protected WorkingTreeIterator(final WorkingTreeIterator p) { super(p); nameEncoder = p.nameEncoder; + options = p.options; } /** @@ -191,7 +215,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { return zeroid; } - private void initializeDigest() { + private void initializeDigestAndReadBuffer() { if (contentDigest != null) return; @@ -200,7 +224,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { contentDigest = Constants.newMessageDigest(); } else { final WorkingTreeIterator p = (WorkingTreeIterator) parent; - p.initializeDigest(); + p.initializeDigestAndReadBuffer(); contentReadBuffer = p.contentReadBuffer; contentDigest = p.contentDigest; } @@ -218,53 +242,91 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { if (is == null) return zeroid; try { - initializeDigest(); - - contentDigest.reset(); - contentDigest.update(hblob); - contentDigest.update((byte) ' '); - - final long blobLength = e.getLength(); - long sz = blobLength; - if (sz == 0) { - contentDigest.update((byte) '0'); - } else { - final int bufn = contentReadBuffer.length; - int p = bufn; - do { - contentReadBuffer[--p] = digits[(int) (sz % 10)]; - sz /= 10; - } while (sz > 0); - contentDigest.update(contentReadBuffer, p, bufn - p); + initializeDigestAndReadBuffer(); + + final long len = e.getLength(); + if (!mightNeedCleaning(e)) + return computeHash(is, len); + + if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) { + ByteBuffer rawbuf = IO.readWholeStream(is, (int) len); + byte[] raw = rawbuf.array(); + int n = rawbuf.limit(); + if (!isBinary(e, raw, n)) { + rawbuf = filterClean(e, raw, n); + raw = rawbuf.array(); + n = rawbuf.limit(); + } + return computeHash(new ByteArrayInputStream(raw, 0, n), n); } - contentDigest.update((byte) 0); - - for (;;) { - final int r = is.read(contentReadBuffer); - if (r <= 0) - break; - contentDigest.update(contentReadBuffer, 0, r); - sz += r; - } - if (sz != blobLength) - return zeroid; - return contentDigest.digest(); - } finally { + + if (isBinary(e)) + return computeHash(is, len); + + final long canonLen; + final InputStream lenIs = filterClean(e, e.openInputStream()); try { - is.close(); - } catch (IOException err2) { - // Suppress any error related to closing an input - // stream. We don't care, we should not have any - // outstanding data to flush or anything like that. + canonLen = computeLength(lenIs); + } finally { + safeClose(lenIs); } + + return computeHash(filterClean(e, is), canonLen); + } finally { + safeClose(is); } } catch (IOException err) { // Can't read the file? Don't report the failure either. - // return zeroid; } } + private static void safeClose(final InputStream in) { + try { + in.close(); + } catch (IOException err2) { + // Suppress any error related to closing an input + // stream. We don't care, we should not have any + // outstanding data to flush or anything like that. + } + } + + private boolean mightNeedCleaning(Entry entry) { + return options.isAutoCRLF(); + } + + private boolean isBinary(Entry entry, byte[] content, int sz) { + return RawText.isBinary(content, sz); + } + + private boolean isBinary(Entry entry) throws IOException { + InputStream in = entry.openInputStream(); + try { + return RawText.isBinary(in); + } finally { + safeClose(in); + } + } + + private ByteBuffer filterClean(Entry entry, byte[] src, int n) + throws IOException { + InputStream in = new ByteArrayInputStream(src); + return IO.readWholeStream(filterClean(entry, in), n); + } + + private InputStream filterClean(Entry entry, InputStream in) { + return new EolCanonicalizingInputStream(in); + } + + /** + * Returns the working tree options used by this iterator. + * + * @return working tree options + */ + public WorkingTreeOptions getOptions() { + return options; + } + @Override public int idOffset() { return 0; @@ -557,6 +619,51 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { } } + private long computeLength(InputStream in) throws IOException { + // Since we only care about the length, use skip. The stream + // may be able to more efficiently wade through its data. + // + long length = 0; + for (;;) { + long n = in.skip(1 << 20); + if (n <= 0) + break; + length += n; + } + return length; + } + + private byte[] computeHash(InputStream in, long length) throws IOException { + contentDigest.reset(); + contentDigest.update(hblob); + contentDigest.update((byte) ' '); + + long sz = length; + if (sz == 0) { + contentDigest.update((byte) '0'); + } else { + final int bufn = contentReadBuffer.length; + int p = bufn; + do { + contentReadBuffer[--p] = digits[(int) (sz % 10)]; + sz /= 10; + } while (sz > 0); + contentDigest.update(contentReadBuffer, p, bufn - p); + } + contentDigest.update((byte) 0); + + for (;;) { + final int r = in.read(contentReadBuffer); + if (r <= 0) + break; + contentDigest.update(contentReadBuffer, 0, r); + sz += r; + } + if (sz != length) + return zeroid; + return contentDigest.digest(); + } + /** A single entry within a working directory tree. */ protected static abstract class Entry { byte[] encodedName; @@ -569,7 +676,8 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { b = enc.encode(CharBuffer.wrap(getName())); } catch (CharacterCodingException e) { // This should so never happen. - throw new RuntimeException(MessageFormat.format(JGitText.get().unencodeableFile, getName())); + throw new RuntimeException(MessageFormat.format( + JGitText.get().unencodeableFile, getName())); } encodedNameLen = b.limit(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java new file mode 100644 index 000000000..50da3302d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.treewalk; + +import org.eclipse.jgit.lib.Config; +import org.eclipse.jgit.lib.CoreConfig; + +/** + * Contains options used by the WorkingTreeIterator. + */ +public class WorkingTreeOptions { + + /** + * Creates default options which reflect the original configuration of Git + * on Unix systems. + * + * @return created working tree options + */ + public static WorkingTreeOptions createDefaultInstance() { + return new WorkingTreeOptions(false); + } + + /** + * Creates options based on the specified repository configuration. + * + * @param config + * repository configuration to create options for + * + * @return created working tree options + */ + public static WorkingTreeOptions createConfigurationInstance(Config config) { + return new WorkingTreeOptions(config.get(CoreConfig.KEY).isAutoCRLF()); + } + + /** + * Indicates whether EOLs of text files should be converted to '\n' before + * calculating the blob ID. + **/ + private final boolean autoCRLF; + + /** + * Creates new options. + * + * @param autoCRLF + * indicates whether EOLs of text files should be converted to + * '\n' before calculating the blob ID. + */ + public WorkingTreeOptions(boolean autoCRLF) { + this.autoCRLF = autoCRLF; + } + + /** + * Indicates whether EOLs of text files should be converted to '\n' before + * calculating the blob ID. + * + * @return true if EOLs should be canonicalized. + */ + public boolean isAutoCRLF() { + return autoCRLF; + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java index 1f2042d4c..a9c3853a9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java @@ -51,6 +51,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.text.MessageFormat; import org.eclipse.jgit.JGitText; @@ -96,7 +97,8 @@ public class IO { try { final long sz = in.getChannel().size(); if (sz > max) - throw new IOException(MessageFormat.format(JGitText.get().fileIsTooLarge, path)); + throw new IOException(MessageFormat.format( + JGitText.get().fileIsTooLarge, path)); final byte[] buf = new byte[(int) sz]; IO.readFully(in, buf, 0, buf.length); return buf; @@ -109,6 +111,48 @@ public class IO { } } + /** + * Read an entire input stream into memory as a ByteBuffer. + * + * Note: The stream is read to its end and is not usable after calling this + * method. The caller is responsible for closing the stream. + * + * @param in + * input stream to be read. + * @param sizeHint + * a hint on the approximate number of bytes contained in the + * stream, used to allocate temporary buffers more efficiently + * @return complete contents of the input stream. The ByteBuffer always has + * a writable backing array, with {@code position() == 0} and + * {@code limit()} equal to the actual length read. Callers may rely + * on obtaining the underlying array for efficient data access. If + * {@code sizeHint} was too large, the array may be over-allocated, + * resulting in {@code limit() < array().length}. + * @throws IOException + * there was an error reading from the stream. + */ + public static ByteBuffer readWholeStream(InputStream in, int sizeHint) + throws IOException { + byte[] out = new byte[sizeHint]; + int pos = 0; + while (pos < out.length) { + int read = in.read(out, pos, out.length - pos); + if (read < 0) + return ByteBuffer.wrap(out, 0, pos); + pos += read; + } + + int last = in.read(); + if (last < 0) + return ByteBuffer.wrap(out, 0, pos); + + TemporaryBuffer.Heap tmp = new TemporaryBuffer.Heap(Integer.MAX_VALUE); + tmp.write(out); + tmp.write(last); + tmp.copy(in); + return ByteBuffer.wrap(tmp.toByteArray()); + } + /** * Read the entire byte array into memory, or throw an exception. * diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java new file mode 100644 index 000000000..4bdd2b3e5 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2010, Marc Strapetz + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.util.io; + +import java.io.IOException; +import java.io.InputStream; + +/** + * An input stream which canonicalizes EOLs bytes on the fly to '\n'. + * + * Note: Make sure to apply this InputStream only to text files! + */ +public class EolCanonicalizingInputStream extends InputStream { + private final byte[] single = new byte[1]; + + private final byte[] buf = new byte[8096]; + + private final InputStream in; + + private int cnt; + + private int ptr; + + /** + * Creates a new InputStream, wrapping the specified stream + * + * @param in + * raw input stream + */ + public EolCanonicalizingInputStream(InputStream in) { + this.in = in; + } + + @Override + public int read() throws IOException { + final int read = read(single, 0, 1); + return read == 1 ? single[0] & 0xff : -1; + } + + @Override + public int read(byte[] bs, int off, int len) throws IOException { + if (len == 0) + return 0; + + if (cnt == -1) + return -1; + + final int startOff = off; + final int end = off + len; + + while (off < end) { + if (ptr == cnt && !fillBuffer()) { + break; + } + + byte b = buf[ptr++]; + if (b != '\r') { + bs[off++] = b; + continue; + } + + if (ptr == cnt && !fillBuffer()) { + bs[off++] = '\r'; + break; + } + + if (buf[ptr] == '\n') { + bs[off++] = '\n'; + ptr++; + } else + bs[off++] = '\r'; + } + + return startOff == off ? -1 : off - startOff; + } + + @Override + public void close() throws IOException { + in.close(); + } + + private boolean fillBuffer() throws IOException { + cnt = in.read(buf, 0, buf.length); + if (cnt < 1) + return false; + ptr = 0; + return true; + } +}