Browse Source

Perform automatic CRLF to LF conversion during WorkingTreeIterator

WorkingTreeIterator now optionally performs CRLF to LF conversion for
text files.  A basic framework is left in place to support enabling
(or disabling) this feature based on gitattributes, and also to
support the more generic smudge/clean filter system.  As there is
no gitattribute support yet in JGit this is left unimplemented,
but the mightNeedCleaning(), isBinary() and filterClean() methods
will provide reasonable places to plug that into in the future.

[sp: All bugs inside of WorkingTreeIterator are my fault, I wrote
     most of it while cherry-picking this patch and building it on
     top of Marc's original work.]

CQ: 4419
Bug: 301775
Change-Id: I0ca35cfbfe3f503729cbfc1d5034ad4abcd1097e
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
stable-0.9
Marc Strapetz 15 years ago committed by Shawn O. Pearce
parent
commit
e2e38792b5
  1. 3
      org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java
  2. 2
      org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java
  3. 15
      org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java
  4. 2
      org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java
  5. 111
      org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java
  6. 52
      org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
  7. 11
      org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java
  8. 27
      org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java
  9. 194
      org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java
  10. 101
      org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java
  11. 46
      org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java
  12. 130
      org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java

3
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/opt/AbstractTreeIteratorHandler.java

@ -64,6 +64,7 @@ import org.eclipse.jgit.pgm.CLIText;
import org.eclipse.jgit.treewalk.AbstractTreeIterator; import org.eclipse.jgit.treewalk.AbstractTreeIterator;
import org.eclipse.jgit.treewalk.CanonicalTreeParser; import org.eclipse.jgit.treewalk.CanonicalTreeParser;
import org.eclipse.jgit.treewalk.FileTreeIterator; import org.eclipse.jgit.treewalk.FileTreeIterator;
import org.eclipse.jgit.treewalk.WorkingTreeOptions;
import org.eclipse.jgit.util.FS; import org.eclipse.jgit.util.FS;
/** /**
@ -96,7 +97,7 @@ public class AbstractTreeIteratorHandler extends
final String name = params.getParameter(0); final String name = params.getParameter(0);
if (new File(name).isDirectory()) { if (new File(name).isDirectory()) {
setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED)); setter.addValue(new FileTreeIterator(new File(name), FS.DETECTED, WorkingTreeOptions.createDefaultInstance()));
return 1; return 1;
} }

2
org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/AbstractTreeIteratorTest.java

@ -62,7 +62,7 @@ public class AbstractTreeIteratorTest extends TestCase {
public class FakeTreeIterator extends WorkingTreeIterator { public class FakeTreeIterator extends WorkingTreeIterator {
public FakeTreeIterator(String pathName, FileMode fileMode) { public FakeTreeIterator(String pathName, FileMode fileMode) {
super(prefix(pathName)); super(prefix(pathName), new WorkingTreeOptions(false));
mode = fileMode.getBits(); mode = fileMode.getBits();
final int s = pathName.lastIndexOf('/'); final int s = pathName.lastIndexOf('/');

15
org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java

@ -78,7 +78,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase {
public void testEmptyIfRootIsFile() throws Exception { public void testEmptyIfRootIsFile() throws Exception {
final File r = new File(trash, paths[0]); final File r = new File(trash, paths[0]);
assertTrue(r.isFile()); assertTrue(r.isFile());
final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(),
WorkingTreeOptions.createConfigurationInstance(db.getConfig()));
assertTrue(fti.first()); assertTrue(fti.first());
assertTrue(fti.eof()); assertTrue(fti.eof());
} }
@ -86,7 +87,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase {
public void testEmptyIfRootDoesNotExist() throws Exception { public void testEmptyIfRootDoesNotExist() throws Exception {
final File r = new File(trash, "not-existing-file"); final File r = new File(trash, "not-existing-file");
assertFalse(r.exists()); assertFalse(r.exists());
final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(),
WorkingTreeOptions.createConfigurationInstance(db.getConfig()));
assertTrue(fti.first()); assertTrue(fti.first());
assertTrue(fti.eof()); assertTrue(fti.eof());
} }
@ -97,13 +99,15 @@ public class FileTreeIteratorTest extends RepositoryTestCase {
r.mkdir(); r.mkdir();
assertTrue(r.isDirectory()); assertTrue(r.isDirectory());
final FileTreeIterator fti = new FileTreeIterator(r, db.getFS()); final FileTreeIterator fti = new FileTreeIterator(r, db.getFS(),
WorkingTreeOptions.createConfigurationInstance(db.getConfig()));
assertTrue(fti.first()); assertTrue(fti.first());
assertTrue(fti.eof()); assertTrue(fti.eof());
} }
public void testSimpleIterate() throws Exception { public void testSimpleIterate() throws Exception {
final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(),
WorkingTreeOptions.createConfigurationInstance(db.getConfig()));
assertTrue(top.first()); assertTrue(top.first());
assertFalse(top.eof()); assertFalse(top.eof());
@ -151,7 +155,8 @@ public class FileTreeIteratorTest extends RepositoryTestCase {
} }
public void testComputeFileObjectId() throws Exception { public void testComputeFileObjectId() throws Exception {
final FileTreeIterator top = new FileTreeIterator(trash, db.getFS()); final FileTreeIterator top = new FileTreeIterator(trash, db.getFS(),
WorkingTreeOptions.createConfigurationInstance(db.getConfig()));
final MessageDigest md = Constants.newMessageDigest(); final MessageDigest md = Constants.newMessageDigest();
md.update(Constants.encodeASCII(Constants.TYPE_BLOB)); md.update(Constants.encodeASCII(Constants.TYPE_BLOB));

2
org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorWithTimeControl.java

@ -87,7 +87,7 @@ public class FileTreeIteratorWithTimeControl extends FileTreeIterator {
public FileTreeIteratorWithTimeControl(File f, FS fs, public FileTreeIteratorWithTimeControl(File f, FS fs,
TreeSet<Long> modTimes) { TreeSet<Long> modTimes) {
super(f, fs); super(f, fs, new WorkingTreeOptions(false));
this.modTimes = modTimes; this.modTimes = modTimes;
} }

111
org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java

@ -0,0 +1,111 @@
/*
* Copyright (C) 2010, Marc Strapetz <marc.strapetz@syntevo.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util.io;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import junit.framework.TestCase;
public class EolCanonicalizingInputStreamTest extends TestCase {
public void testLF() throws IOException {
final byte[] bytes = asBytes("1\n2\n3");
test(bytes, bytes);
}
public void testCR() throws IOException {
final byte[] bytes = asBytes("1\r2\r3");
test(bytes, bytes);
}
public void testCRLF() throws IOException {
test(asBytes("1\r\n2\r\n3"), asBytes("1\n2\n3"));
}
public void testLFCR() throws IOException {
final byte[] bytes = asBytes("1\n\r2\n\r3");
test(bytes, bytes);
}
private void test(byte[] input, byte[] expected) throws IOException {
final InputStream bis1 = new ByteArrayInputStream(input);
final InputStream cis1 = new EolCanonicalizingInputStream(bis1);
int index1 = 0;
for (int b = cis1.read(); b != -1; b = cis1.read()) {
assertEquals(expected[index1], (byte) b);
index1++;
}
assertEquals(expected.length, index1);
for (int bufferSize = 1; bufferSize < 10; bufferSize++) {
final byte[] buffer = new byte[bufferSize];
final InputStream bis2 = new ByteArrayInputStream(input);
final InputStream cis2 = new EolCanonicalizingInputStream(bis2);
int read = 0;
for (int readNow = cis2.read(buffer, 0, buffer.length); readNow != -1
&& read < expected.length; readNow = cis2.read(buffer, 0,
buffer.length)) {
for (int index2 = 0; index2 < readNow; index2++) {
assertEquals(expected[read + index2], buffer[index2]);
}
read += readNow;
}
assertEquals(expected.length, read);
}
}
private static byte[] asBytes(String in) {
try {
return in.getBytes("UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new AssertionError();
}
}
}

52
org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java

@ -46,6 +46,7 @@ package org.eclipse.jgit.diff;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import org.eclipse.jgit.util.IO; import org.eclipse.jgit.util.IO;
@ -118,7 +119,8 @@ public class RawText implements Sequence {
* *
* @param file * @param file
* the text file. * the text file.
* @throws IOException if Exceptions occur while reading the file * @throws IOException
* if Exceptions occur while reading the file
*/ */
public RawText(File file) throws IOException { public RawText(File file) throws IOException {
this(IO.readFully(file)); this(IO.readFully(file));
@ -234,9 +236,53 @@ public class RawText implements Sequence {
* @return true if raw is likely to be a binary file, false otherwise * @return true if raw is likely to be a binary file, false otherwise
*/ */
public static boolean isBinary(byte[] raw) { public static boolean isBinary(byte[] raw) {
return isBinary(raw, raw.length);
}
/**
* Determine heuristically whether the bytes contained in a stream
* represents binary (as opposed to text) content.
*
* Note: Do not further use this stream after having called this method! The
* stream may not be fully read and will be left at an unknown position
* after consuming an unknown number of bytes. The caller is responsible for
* closing the stream.
*
* @param raw
* input stream containing the raw file content.
* @return true if raw is likely to be a binary file, false otherwise
* @throws IOException
* if input stream could not be read
*/
public static boolean isBinary(InputStream raw) throws IOException {
final byte[] buffer = new byte[FIRST_FEW_BYTES];
int cnt = 0;
while (cnt < buffer.length) {
final int n = raw.read(buffer, cnt, buffer.length - cnt);
if (n == -1)
break;
cnt += n;
}
return isBinary(buffer, cnt);
}
/**
* Determine heuristically whether a byte array represents binary (as
* opposed to text) content.
*
* @param raw
* the raw file content.
* @param length
* number of bytes in {@code raw} to evaluate. This should be
* {@code raw.length} unless {@code raw} was over-allocated by
* the caller.
* @return true if raw is likely to be a binary file, false otherwise
*/
public static boolean isBinary(byte[] raw, int length) {
// Same heuristic as C Git // Same heuristic as C Git
int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length; if (length > FIRST_FEW_BYTES)
for (int ptr = 0; ptr < size; ptr++) length = FIRST_FEW_BYTES;
for (int ptr = 0; ptr < length; ptr++)
if (raw[ptr] == '\0') if (raw[ptr] == '\0')
return true; return true;

11
org.eclipse.jgit/src/org/eclipse/jgit/lib/CoreConfig.java

@ -70,6 +70,8 @@ public class CoreConfig {
private final int streamFileThreshold; private final int streamFileThreshold;
private final boolean autoCRLF;
private CoreConfig(final Config rc) { private CoreConfig(final Config rc) {
compression = rc.getInt("core", "compression", DEFAULT_COMPRESSION); compression = rc.getInt("core", "compression", DEFAULT_COMPRESSION);
packIndexVersion = rc.getInt("pack", "indexversion", 2); packIndexVersion = rc.getInt("pack", "indexversion", 2);
@ -80,6 +82,8 @@ public class CoreConfig {
sft = Math.min(sft, maxMem / 4); // don't use more than 1/4 of the heap sft = Math.min(sft, maxMem / 4); // don't use more than 1/4 of the heap
sft = Math.min(sft, Integer.MAX_VALUE); // cannot exceed array length sft = Math.min(sft, Integer.MAX_VALUE); // cannot exceed array length
streamFileThreshold = (int) sft; streamFileThreshold = (int) sft;
autoCRLF = rc.getBoolean("core", "autocrlf", false);
} }
/** /**
@ -108,4 +112,11 @@ public class CoreConfig {
public int getStreamFileThreshold() { public int getStreamFileThreshold() {
return streamFileThreshold; return streamFileThreshold;
} }
/**
* @return whether automatic CRLF conversion has been configured
*/
public boolean isAutoCRLF() {
return autoCRLF;
}
} }

27
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/FileTreeIterator.java

@ -66,14 +66,14 @@ import org.eclipse.jgit.util.FS;
*/ */
public class FileTreeIterator extends WorkingTreeIterator { public class FileTreeIterator extends WorkingTreeIterator {
/** /**
* the starting directory. This directory should correspond to * the starting directory. This directory should correspond to the root of
* the root of the repository. * the repository.
*/ */
protected final File directory; protected final File directory;
/** /**
* the file system abstraction which will be necessary to * the file system abstraction which will be necessary to perform certain
* perform certain file system operations. * file system operations.
*/ */
protected final FS fs; protected final FS fs;
@ -84,7 +84,8 @@ public class FileTreeIterator extends WorkingTreeIterator {
* the repository whose working tree will be scanned. * the repository whose working tree will be scanned.
*/ */
public FileTreeIterator(Repository repo) { public FileTreeIterator(Repository repo) {
this(repo.getWorkTree(), repo.getFS()); this(repo.getWorkTree(), repo.getFS(), WorkingTreeOptions
.createConfigurationInstance(repo.getConfig()));
initRootIterator(repo); initRootIterator(repo);
} }
@ -95,10 +96,13 @@ public class FileTreeIterator extends WorkingTreeIterator {
* the starting directory. This directory should correspond to * the starting directory. This directory should correspond to
* the root of the repository. * the root of the repository.
* @param fs * @param fs
* the file system abstraction which will be necessary to * the file system abstraction which will be necessary to perform
* perform certain file system operations. * certain file system operations.
* @param options
* working tree options to be used
*/ */
public FileTreeIterator(final File root, FS fs) { public FileTreeIterator(final File root, FS fs, WorkingTreeOptions options) {
super(options);
directory = root; directory = root;
this.fs = fs; this.fs = fs;
init(entries()); init(entries());
@ -110,8 +114,8 @@ public class FileTreeIterator extends WorkingTreeIterator {
* @param p * @param p
* the parent iterator we were created from. * the parent iterator we were created from.
* @param fs * @param fs
* the file system abstraction which will be necessary to * the file system abstraction which will be necessary to perform
* perform certain file system operations. * certain file system operations.
* @param root * @param root
* the subdirectory. This should be a directory contained within * the subdirectory. This should be a directory contained within
* the parent directory. * the parent directory.
@ -205,8 +209,7 @@ public class FileTreeIterator extends WorkingTreeIterator {
} }
/** /**
* @return * @return The root directory of this iterator
* The root directory of this iterator
*/ */
public File getDirectory() { public File getDirectory() {
return directory; return directory;

194
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java

@ -45,6 +45,7 @@
package org.eclipse.jgit.treewalk; package org.eclipse.jgit.treewalk;
import java.io.ByteArrayInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
@ -60,6 +61,7 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import org.eclipse.jgit.JGitText; import org.eclipse.jgit.JGitText;
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.dircache.DirCache; import org.eclipse.jgit.dircache.DirCache;
import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.CorruptObjectException;
@ -69,6 +71,8 @@ import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.util.FS; import org.eclipse.jgit.util.FS;
import org.eclipse.jgit.util.IO;
import org.eclipse.jgit.util.io.EolCanonicalizingInputStream;
/** /**
* Walks a working directory tree as part of a {@link TreeWalk}. * Walks a working directory tree as part of a {@link TreeWalk}.
@ -88,6 +92,12 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
/** Size we perform file IO in if we have to read and hash a file. */ /** Size we perform file IO in if we have to read and hash a file. */
private static final int BUFFER_SIZE = 2048; private static final int BUFFER_SIZE = 2048;
/**
* Maximum size of files which may be read fully into memory for performance
* reasons.
*/
private static final long MAXIMUM_FILE_SIZE_TO_READ_FULLY = 65536;
/** The {@link #idBuffer()} for the current entry. */ /** The {@link #idBuffer()} for the current entry. */
private byte[] contentId; private byte[] contentId;
@ -115,10 +125,19 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
/** If there is a .gitignore file present, the parsed rules from it. */ /** If there is a .gitignore file present, the parsed rules from it. */
private IgnoreNode ignoreNode; private IgnoreNode ignoreNode;
/** Create a new iterator with no parent. */ /** Options used to process the working tree. */
protected WorkingTreeIterator() { private final WorkingTreeOptions options;
/**
* Create a new iterator with no parent.
*
* @param options
* working tree options to be used
*/
protected WorkingTreeIterator(WorkingTreeOptions options) {
super(); super();
nameEncoder = Constants.CHARSET.newEncoder(); nameEncoder = Constants.CHARSET.newEncoder();
this.options = options;
} }
/** /**
@ -135,10 +154,14 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
* may be null or the empty string to indicate the prefix is the * may be null or the empty string to indicate the prefix is the
* root of the repository. A trailing slash ('/') is * root of the repository. A trailing slash ('/') is
* automatically appended if the prefix does not end in '/'. * automatically appended if the prefix does not end in '/'.
* @param options
* working tree options to be used
*/ */
protected WorkingTreeIterator(final String prefix) { protected WorkingTreeIterator(final String prefix,
WorkingTreeOptions options) {
super(prefix); super(prefix);
nameEncoder = Constants.CHARSET.newEncoder(); nameEncoder = Constants.CHARSET.newEncoder();
this.options = options;
} }
/** /**
@ -150,6 +173,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
protected WorkingTreeIterator(final WorkingTreeIterator p) { protected WorkingTreeIterator(final WorkingTreeIterator p) {
super(p); super(p);
nameEncoder = p.nameEncoder; nameEncoder = p.nameEncoder;
options = p.options;
} }
/** /**
@ -191,7 +215,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
return zeroid; return zeroid;
} }
private void initializeDigest() { private void initializeDigestAndReadBuffer() {
if (contentDigest != null) if (contentDigest != null)
return; return;
@ -200,7 +224,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
contentDigest = Constants.newMessageDigest(); contentDigest = Constants.newMessageDigest();
} else { } else {
final WorkingTreeIterator p = (WorkingTreeIterator) parent; final WorkingTreeIterator p = (WorkingTreeIterator) parent;
p.initializeDigest(); p.initializeDigestAndReadBuffer();
contentReadBuffer = p.contentReadBuffer; contentReadBuffer = p.contentReadBuffer;
contentDigest = p.contentDigest; contentDigest = p.contentDigest;
} }
@ -218,53 +242,91 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
if (is == null) if (is == null)
return zeroid; return zeroid;
try { try {
initializeDigest(); initializeDigestAndReadBuffer();
contentDigest.reset(); final long len = e.getLength();
contentDigest.update(hblob); if (!mightNeedCleaning(e))
contentDigest.update((byte) ' '); return computeHash(is, len);
final long blobLength = e.getLength(); if (len <= MAXIMUM_FILE_SIZE_TO_READ_FULLY) {
long sz = blobLength; ByteBuffer rawbuf = IO.readWholeStream(is, (int) len);
if (sz == 0) { byte[] raw = rawbuf.array();
contentDigest.update((byte) '0'); int n = rawbuf.limit();
} else { if (!isBinary(e, raw, n)) {
final int bufn = contentReadBuffer.length; rawbuf = filterClean(e, raw, n);
int p = bufn; raw = rawbuf.array();
do { n = rawbuf.limit();
contentReadBuffer[--p] = digits[(int) (sz % 10)]; }
sz /= 10; return computeHash(new ByteArrayInputStream(raw, 0, n), n);
} while (sz > 0);
contentDigest.update(contentReadBuffer, p, bufn - p);
} }
contentDigest.update((byte) 0);
if (isBinary(e))
for (;;) { return computeHash(is, len);
final int r = is.read(contentReadBuffer);
if (r <= 0) final long canonLen;
break; final InputStream lenIs = filterClean(e, e.openInputStream());
contentDigest.update(contentReadBuffer, 0, r);
sz += r;
}
if (sz != blobLength)
return zeroid;
return contentDigest.digest();
} finally {
try { try {
is.close(); canonLen = computeLength(lenIs);
} catch (IOException err2) { } finally {
// Suppress any error related to closing an input safeClose(lenIs);
// stream. We don't care, we should not have any
// outstanding data to flush or anything like that.
} }
return computeHash(filterClean(e, is), canonLen);
} finally {
safeClose(is);
} }
} catch (IOException err) { } catch (IOException err) {
// Can't read the file? Don't report the failure either. // Can't read the file? Don't report the failure either.
//
return zeroid; return zeroid;
} }
} }
private static void safeClose(final InputStream in) {
try {
in.close();
} catch (IOException err2) {
// Suppress any error related to closing an input
// stream. We don't care, we should not have any
// outstanding data to flush or anything like that.
}
}
private boolean mightNeedCleaning(Entry entry) {
return options.isAutoCRLF();
}
private boolean isBinary(Entry entry, byte[] content, int sz) {
return RawText.isBinary(content, sz);
}
private boolean isBinary(Entry entry) throws IOException {
InputStream in = entry.openInputStream();
try {
return RawText.isBinary(in);
} finally {
safeClose(in);
}
}
private ByteBuffer filterClean(Entry entry, byte[] src, int n)
throws IOException {
InputStream in = new ByteArrayInputStream(src);
return IO.readWholeStream(filterClean(entry, in), n);
}
private InputStream filterClean(Entry entry, InputStream in) {
return new EolCanonicalizingInputStream(in);
}
/**
* Returns the working tree options used by this iterator.
*
* @return working tree options
*/
public WorkingTreeOptions getOptions() {
return options;
}
@Override @Override
public int idOffset() { public int idOffset() {
return 0; return 0;
@ -557,6 +619,51 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
} }
} }
private long computeLength(InputStream in) throws IOException {
// Since we only care about the length, use skip. The stream
// may be able to more efficiently wade through its data.
//
long length = 0;
for (;;) {
long n = in.skip(1 << 20);
if (n <= 0)
break;
length += n;
}
return length;
}
private byte[] computeHash(InputStream in, long length) throws IOException {
contentDigest.reset();
contentDigest.update(hblob);
contentDigest.update((byte) ' ');
long sz = length;
if (sz == 0) {
contentDigest.update((byte) '0');
} else {
final int bufn = contentReadBuffer.length;
int p = bufn;
do {
contentReadBuffer[--p] = digits[(int) (sz % 10)];
sz /= 10;
} while (sz > 0);
contentDigest.update(contentReadBuffer, p, bufn - p);
}
contentDigest.update((byte) 0);
for (;;) {
final int r = in.read(contentReadBuffer);
if (r <= 0)
break;
contentDigest.update(contentReadBuffer, 0, r);
sz += r;
}
if (sz != length)
return zeroid;
return contentDigest.digest();
}
/** A single entry within a working directory tree. */ /** A single entry within a working directory tree. */
protected static abstract class Entry { protected static abstract class Entry {
byte[] encodedName; byte[] encodedName;
@ -569,7 +676,8 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
b = enc.encode(CharBuffer.wrap(getName())); b = enc.encode(CharBuffer.wrap(getName()));
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
// This should so never happen. // This should so never happen.
throw new RuntimeException(MessageFormat.format(JGitText.get().unencodeableFile, getName())); throw new RuntimeException(MessageFormat.format(
JGitText.get().unencodeableFile, getName()));
} }
encodedNameLen = b.limit(); encodedNameLen = b.limit();

101
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeOptions.java

@ -0,0 +1,101 @@
/*
* Copyright (C) 2010, Marc Strapetz <marc.strapetz@syntevo.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.treewalk;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.CoreConfig;
/**
* Contains options used by the WorkingTreeIterator.
*/
public class WorkingTreeOptions {
/**
* Creates default options which reflect the original configuration of Git
* on Unix systems.
*
* @return created working tree options
*/
public static WorkingTreeOptions createDefaultInstance() {
return new WorkingTreeOptions(false);
}
/**
* Creates options based on the specified repository configuration.
*
* @param config
* repository configuration to create options for
*
* @return created working tree options
*/
public static WorkingTreeOptions createConfigurationInstance(Config config) {
return new WorkingTreeOptions(config.get(CoreConfig.KEY).isAutoCRLF());
}
/**
* Indicates whether EOLs of text files should be converted to '\n' before
* calculating the blob ID.
**/
private final boolean autoCRLF;
/**
* Creates new options.
*
* @param autoCRLF
* indicates whether EOLs of text files should be converted to
* '\n' before calculating the blob ID.
*/
public WorkingTreeOptions(boolean autoCRLF) {
this.autoCRLF = autoCRLF;
}
/**
* Indicates whether EOLs of text files should be converted to '\n' before
* calculating the blob ID.
*
* @return true if EOLs should be canonicalized.
*/
public boolean isAutoCRLF() {
return autoCRLF;
}
}

46
org.eclipse.jgit/src/org/eclipse/jgit/util/IO.java

@ -51,6 +51,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.ByteBuffer;
import java.text.MessageFormat; import java.text.MessageFormat;
import org.eclipse.jgit.JGitText; import org.eclipse.jgit.JGitText;
@ -96,7 +97,8 @@ public class IO {
try { try {
final long sz = in.getChannel().size(); final long sz = in.getChannel().size();
if (sz > max) if (sz > max)
throw new IOException(MessageFormat.format(JGitText.get().fileIsTooLarge, path)); throw new IOException(MessageFormat.format(
JGitText.get().fileIsTooLarge, path));
final byte[] buf = new byte[(int) sz]; final byte[] buf = new byte[(int) sz];
IO.readFully(in, buf, 0, buf.length); IO.readFully(in, buf, 0, buf.length);
return buf; return buf;
@ -109,6 +111,48 @@ public class IO {
} }
} }
/**
* Read an entire input stream into memory as a ByteBuffer.
*
* Note: The stream is read to its end and is not usable after calling this
* method. The caller is responsible for closing the stream.
*
* @param in
* input stream to be read.
* @param sizeHint
* a hint on the approximate number of bytes contained in the
* stream, used to allocate temporary buffers more efficiently
* @return complete contents of the input stream. The ByteBuffer always has
* a writable backing array, with {@code position() == 0} and
* {@code limit()} equal to the actual length read. Callers may rely
* on obtaining the underlying array for efficient data access. If
* {@code sizeHint} was too large, the array may be over-allocated,
* resulting in {@code limit() < array().length}.
* @throws IOException
* there was an error reading from the stream.
*/
public static ByteBuffer readWholeStream(InputStream in, int sizeHint)
throws IOException {
byte[] out = new byte[sizeHint];
int pos = 0;
while (pos < out.length) {
int read = in.read(out, pos, out.length - pos);
if (read < 0)
return ByteBuffer.wrap(out, 0, pos);
pos += read;
}
int last = in.read();
if (last < 0)
return ByteBuffer.wrap(out, 0, pos);
TemporaryBuffer.Heap tmp = new TemporaryBuffer.Heap(Integer.MAX_VALUE);
tmp.write(out);
tmp.write(last);
tmp.copy(in);
return ByteBuffer.wrap(tmp.toByteArray());
}
/** /**
* Read the entire byte array into memory, or throw an exception. * Read the entire byte array into memory, or throw an exception.
* *

130
org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java

@ -0,0 +1,130 @@
/*
* Copyright (C) 2010, Marc Strapetz <marc.strapetz@syntevo.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.util.io;
import java.io.IOException;
import java.io.InputStream;
/**
* An input stream which canonicalizes EOLs bytes on the fly to '\n'.
*
* Note: Make sure to apply this InputStream only to text files!
*/
public class EolCanonicalizingInputStream extends InputStream {
private final byte[] single = new byte[1];
private final byte[] buf = new byte[8096];
private final InputStream in;
private int cnt;
private int ptr;
/**
* Creates a new InputStream, wrapping the specified stream
*
* @param in
* raw input stream
*/
public EolCanonicalizingInputStream(InputStream in) {
this.in = in;
}
@Override
public int read() throws IOException {
final int read = read(single, 0, 1);
return read == 1 ? single[0] & 0xff : -1;
}
@Override
public int read(byte[] bs, int off, int len) throws IOException {
if (len == 0)
return 0;
if (cnt == -1)
return -1;
final int startOff = off;
final int end = off + len;
while (off < end) {
if (ptr == cnt && !fillBuffer()) {
break;
}
byte b = buf[ptr++];
if (b != '\r') {
bs[off++] = b;
continue;
}
if (ptr == cnt && !fillBuffer()) {
bs[off++] = '\r';
break;
}
if (buf[ptr] == '\n') {
bs[off++] = '\n';
ptr++;
} else
bs[off++] = '\r';
}
return startOff == off ? -1 : off - startOff;
}
@Override
public void close() throws IOException {
in.close();
}
private boolean fillBuffer() throws IOException {
cnt = in.read(buf, 0, buf.length);
if (cnt < 1)
return false;
ptr = 0;
return true;
}
}
Loading…
Cancel
Save