From 7df17e57d4e736336de6b95810daf076e9b7dded Mon Sep 17 00:00:00 2001 From: Marc Strapetz Date: Tue, 21 Feb 2012 15:18:08 +0100 Subject: [PATCH] EolCanonicalizingInputStream: binary detection should be optional EolCanonicalizingInputStream may also be used in combination with .gitattributes. If .gitattributes states that a file is of type text, line endings have to be canonicalized even if the actual file content seems to be binary. Change-Id: Ie4ccdfc5cb91fbd55e06f51146cf5c7c84b8e18b --- .../io/EolCanonicalizingInputStreamTest.java | 27 +++++++++++++------ .../jgit/treewalk/WorkingTreeIterator.java | 4 +-- .../util/io/EolCanonicalizingInputStream.java | 16 ++++++----- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java index 960ca6241..52ad0139c 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/util/io/EolCanonicalizingInputStreamTest.java @@ -57,35 +57,46 @@ public class EolCanonicalizingInputStreamTest { @Test public void testLF() throws IOException { final byte[] bytes = asBytes("1\n2\n3"); - test(bytes, bytes); + test(bytes, bytes, false); } @Test public void testCR() throws IOException { final byte[] bytes = asBytes("1\r2\r3"); - test(bytes, bytes); + test(bytes, bytes, false); } @Test public void testCRLF() throws IOException { - test(asBytes("1\r\n2\r\n3"), asBytes("1\n2\n3")); + test(asBytes("1\r\n2\r\n3"), asBytes("1\n2\n3"), false); } @Test public void testLFCR() throws IOException { final byte[] bytes = asBytes("1\n\r2\n\r3"); - test(bytes, bytes); + test(bytes, bytes, false); } @Test public void testEmpty() throws IOException { final byte[] bytes = asBytes(""); - test(bytes, bytes); + test(bytes, bytes, false); } - private void test(byte[] input, byte[] expected) throws IOException { + @Test + public void testBinaryDetect() throws IOException { + final byte[] bytes = asBytes("1\r\n2\r\n3\0"); + test(bytes, bytes, true); + } + + @Test + public void testBinaryDontDetect() throws IOException { + test(asBytes("1\r\n2\r\n3\0"), asBytes("1\n2\n3\0"), false); + } + + private void test(byte[] input, byte[] expected, boolean detectBinary) throws IOException { final InputStream bis1 = new ByteArrayInputStream(input); - final InputStream cis1 = new EolCanonicalizingInputStream(bis1); + final InputStream cis1 = new EolCanonicalizingInputStream(bis1, detectBinary); int index1 = 0; for (int b = cis1.read(); b != -1; b = cis1.read()) { assertEquals(expected[index1], (byte) b); @@ -97,7 +108,7 @@ public class EolCanonicalizingInputStreamTest { for (int bufferSize = 1; bufferSize < 10; bufferSize++) { final byte[] buffer = new byte[bufferSize]; final InputStream bis2 = new ByteArrayInputStream(input); - final InputStream cis2 = new EolCanonicalizingInputStream(bis2); + final InputStream cis2 = new EolCanonicalizingInputStream(bis2, detectBinary); int read = 0; for (int readNow = cis2.read(buffer, 0, buffer.length); readNow != -1 diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index 89403993f..aaaf81b1d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -402,7 +402,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { } private InputStream filterClean(InputStream in) throws IOException { - return new EolCanonicalizingInputStream(in); + return new EolCanonicalizingInputStream(in, true); } /** @@ -500,7 +500,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { InputStream rawis = current().openInputStream(); InputStream is; if (getOptions().getAutoCRLF() != AutoCRLF.FALSE) - is = new EolCanonicalizingInputStream(rawis); + is = new EolCanonicalizingInputStream(rawis, true); else is = rawis; return is; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java index 387d9b82b..249239306 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java @@ -49,10 +49,11 @@ import java.io.InputStream; import org.eclipse.jgit.diff.RawText; /** - * An input stream which canonicalizes EOLs bytes on the fly to '\n', unless the - * first 8000 bytes indicate the stream is binary. + * An input stream which canonicalizes EOLs bytes on the fly to '\n'. * - * Note: Make sure to apply this InputStream only to text files! + * Optionally, a binary check on the first 8000 bytes is performed + * and in case of binary files, canonicalization is turned off + * (for the complete file). */ public class EolCanonicalizingInputStream extends InputStream { private final byte[] single = new byte[1]; @@ -67,7 +68,7 @@ public class EolCanonicalizingInputStream extends InputStream { private boolean isBinary; - private boolean modeDetected; + private boolean detectBinary; /** * Creates a new InputStream, wrapping the specified stream @@ -75,8 +76,9 @@ public class EolCanonicalizingInputStream extends InputStream { * @param in * raw input stream */ - public EolCanonicalizingInputStream(InputStream in) { + public EolCanonicalizingInputStream(InputStream in, boolean detectBinary) { this.in = in; + this.detectBinary = detectBinary; } @Override @@ -132,9 +134,9 @@ public class EolCanonicalizingInputStream extends InputStream { cnt = in.read(buf, 0, buf.length); if (cnt < 1) return false; - if (!modeDetected) { + if (detectBinary) { isBinary = RawText.isBinary(buf, cnt); - modeDetected = true; + detectBinary = false; } ptr = 0; return true;