From 5404e70dc64201786cd6a21efb41310912860122 Mon Sep 17 00:00:00 2001 From: Robin Rosenberg Date: Thu, 26 Dec 2013 01:22:04 +0100 Subject: [PATCH] Fix for core.autocrlf=input resulting in modified file This version does not attempt to unsmudge, unlike the first attempt in Idafad150553df14827eccfde2e3b95760e16a8b6. Bug: 372834 Change-Id: I9300e735cb16d6208e1df963abb1ff69f688155d Also-by: Robin Stocker Signed-off-by: Matthias Sohn Signed-off-by: Robin Stocker --- .../org/eclipse/jgit/lib/IndexDiffTest.java | 31 +++++++ .../jgit/treewalk/FileTreeIteratorTest.java | 12 ++- .../jgit/dircache/DirCacheCheckout.java | 20 +++-- .../src/org/eclipse/jgit/lib/IndexDiff.java | 3 +- .../jgit/treewalk/WorkingTreeIterator.java | 85 +++++++++++++++++-- .../jgit/treewalk/filter/IndexDiffFilter.java | 5 +- .../util/io/EolCanonicalizingInputStream.java | 44 ++++++++++ 7 files changed, 181 insertions(+), 19 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/IndexDiffTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/IndexDiffTest.java index 64e72e8fc..a1e16e4cf 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/IndexDiffTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/lib/IndexDiffTest.java @@ -68,9 +68,11 @@ import org.eclipse.jgit.dircache.DirCacheEditor; import org.eclipse.jgit.dircache.DirCacheEditor.PathEdit; import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.lib.CoreConfig.AutoCRLF; import org.eclipse.jgit.lib.IndexDiff.StageState; import org.eclipse.jgit.merge.MergeStrategy; import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.storage.file.FileBasedConfig; import org.eclipse.jgit.treewalk.FileTreeIterator; import org.eclipse.jgit.util.IO; import org.junit.Test; @@ -576,6 +578,35 @@ public class IndexDiffTest extends RepositoryTestCase { assertTrue(StageState.BOTH_ADDED.hasTheirs()); } + @Test + public void testAutoCRLFInput() throws Exception { + Git git = new Git(db); + FileBasedConfig config = db.getConfig(); + + // Make sure core.autocrlf is false before adding + config.setEnum(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_KEY_AUTOCRLF, AutoCRLF.FALSE); + config.save(); + + // File is already in repository with CRLF + writeTrashFile("crlf.txt", "this\r\ncontains\r\ncrlf\r\n"); + git.add().addFilepattern("crlf.txt").call(); + git.commit().setMessage("Add crlf.txt").call(); + + // Now set core.autocrlf to input + config.setEnum(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_KEY_AUTOCRLF, AutoCRLF.INPUT); + config.save(); + + FileTreeIterator iterator = new FileTreeIterator(db); + IndexDiff diff = new IndexDiff(db, Constants.HEAD, iterator); + diff.diff(); + + assertTrue( + "Expected no modified files, but there were: " + + diff.getModified(), diff.getModified().isEmpty()); + } + private void verifyStageState(StageState expected, int... stages) throws IOException { DirCacheBuilder builder = db.lockDirCache().builder(); diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java index 6014f3b60..4ce4c8d04 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/FileTreeIteratorTest.java @@ -223,7 +223,9 @@ public class FileTreeIteratorTest extends RepositoryTestCase { ObjectId fromRaw = ObjectId.fromRaw(fti.idBuffer(), fti.idOffset()); assertEquals("6b584e8ece562ebffc15d38808cd6b98fc3d97ea", fromRaw.getName()); - assertFalse(fti.isModified(dce, false)); + ObjectReader objectReader = db.newObjectReader(); + assertFalse(fti.isModified(dce, false, objectReader)); + objectReader.release(); } @Test @@ -242,7 +244,9 @@ public class FileTreeIteratorTest extends RepositoryTestCase { .getConfig().get(WorkingTreeOptions.KEY)); while (!fti.getEntryPathString().equals("symlink")) fti.next(1); - assertFalse(fti.isModified(dce, false)); + ObjectReader objectReader = db.newObjectReader(); + assertFalse(fti.isModified(dce, false, objectReader)); + objectReader.release(); } @Test @@ -265,7 +269,9 @@ public class FileTreeIteratorTest extends RepositoryTestCase { // If the rounding trick does not work we could skip the compareMetaData // test and hope that we are usually testing the intended code path. assertEquals(MetadataDiff.SMUDGED, fti.compareMetadata(dce)); - assertTrue(fti.isModified(dce, false)); + ObjectReader objectReader = db.newObjectReader(); + assertTrue(fti.isModified(dce, false, objectReader)); + objectReader.release(); } @Test diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheCheckout.java b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheCheckout.java index f8c8442ff..40efc95f8 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheCheckout.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheCheckout.java @@ -326,7 +326,8 @@ public class DirCacheCheckout { m.getEntryFileMode()); } else if (i.getDirCacheEntry() != null) { // The index contains a file (and not a folder) - if (f.isModified(i.getDirCacheEntry(), true) + if (f.isModified(i.getDirCacheEntry(), true, + this.walk.getObjectReader()) || i.getDirCacheEntry().getStage() != 0) // The working tree file is dirty or the index contains a // conflict @@ -660,7 +661,9 @@ public class DirCacheCheckout { break; case 0xFFD: // 12 13 14 if (equalIdAndMode(hId, hMode, iId, iMode)) - if (f == null || f.isModified(dce, true)) + if (f == null + || f.isModified(dce, true, + this.walk.getObjectReader())) conflict(name, dce, h, m); else remove(name); @@ -774,7 +777,8 @@ public class DirCacheCheckout { // Nothing in Head // Something in Index if (dce != null - && (f == null || f.isModified(dce, true))) + && (f == null || f.isModified(dce, true, + this.walk.getObjectReader()))) // No file or file is dirty // Nothing in Merge and current path is part of // File/Folder conflict @@ -841,7 +845,9 @@ public class DirCacheCheckout { // Something different from a submodule in Index // Nothing in Merge // Something in Head - if (f == null || f.isModified(dce, true)) + if (f == null + || f.isModified(dce, true, + this.walk.getObjectReader())) // file is dirty // Index contains the same as Head // Something different from a submodule in Index @@ -904,7 +910,8 @@ public class DirCacheCheckout { // file content update(name, mId, mMode); } else if (dce != null - && (f == null || f.isModified(dce, true))) { + && (f == null || f.isModified(dce, true, + this.walk.getObjectReader()))) { // File doesn't exist or is dirty // Head and Index don't contain a submodule // Head contains the same as Index. Merge differs @@ -1041,7 +1048,8 @@ public class DirCacheCheckout { wtIt = tw.getTree(1, WorkingTreeIterator.class); if (dcIt == null || wtIt == null) return true; - if (wtIt.isModified(dcIt.getDirCacheEntry(), true)) { + if (wtIt.isModified(dcIt.getDirCacheEntry(), true, + this.walk.getObjectReader())) { return true; } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/IndexDiff.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/IndexDiff.java index 33654447a..8eb033355 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/IndexDiff.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/IndexDiff.java @@ -440,7 +440,8 @@ public class IndexDiff { missing.add(treeWalk.getPathString()); } else { if (workingTreeIterator.isModified( - dirCacheIterator.getDirCacheEntry(), true)) { + dirCacheIterator.getDirCacheEntry(), true, + treeWalk.getObjectReader())) { // in index, in workdir, content differs => modified modified.add(treeWalk.getPathString()); } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java index 07ba9d73a..280f64f4f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java @@ -73,10 +73,12 @@ import org.eclipse.jgit.ignore.IgnoreRule; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.CoreConfig; +import org.eclipse.jgit.lib.CoreConfig.CheckStat; import org.eclipse.jgit.lib.FileMode; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectLoader; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.Repository; -import org.eclipse.jgit.lib.CoreConfig.CheckStat; import org.eclipse.jgit.submodule.SubmoduleWalk; import org.eclipse.jgit.util.FS; import org.eclipse.jgit.util.IO; @@ -796,22 +798,46 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { * True if the actual file content should be checked if * modification time differs. * @return true if content is most likely different. + * @deprecated Use {@link #isModified(DirCacheEntry, boolean, ObjectReader)} */ public boolean isModified(DirCacheEntry entry, boolean forceContentCheck) { + return isModified(entry, false, null); + } + + /** + * Checks whether this entry differs from a given entry from the + * {@link DirCache}. + * + * File status information is used and if status is same we consider the + * file identical to the state in the working directory. Native git uses + * more stat fields than we have accessible in Java. + * + * @param entry + * the entry from the dircache we want to compare against + * @param forceContentCheck + * True if the actual file content should be checked if + * modification time differs. + * @param reader + * access to repository objects if necessary. Should not be null. + * @return true if content is most likely different. + * @since 3.3 + */ + public boolean isModified(DirCacheEntry entry, boolean forceContentCheck, + ObjectReader reader) { MetadataDiff diff = compareMetadata(entry); switch (diff) { case DIFFER_BY_TIMESTAMP: if (forceContentCheck) // But we are told to look at content even though timestamps // tell us about modification - return contentCheck(entry); + return contentCheck(entry, reader); else // We are told to assume a modification if timestamps differs return true; case SMUDGED: // The file is clean by timestamps but the entry was smudged. // Lets do a content check - return contentCheck(entry); + return contentCheck(entry, reader); case EQUAL: return false; case DIFFER_BY_METADATA: @@ -854,10 +880,12 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { * * @param entry * the entry to be checked - * @return true if the content matches, false - * otherwise + * @param reader + * acccess to repository data if necessary + * @return true if the content doesn't match, + * false if it matches */ - private boolean contentCheck(DirCacheEntry entry) { + private boolean contentCheck(DirCacheEntry entry, ObjectReader reader) { if (getEntryObjectId().equals(entry.getObjectId())) { // Content has not changed @@ -873,7 +901,50 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator { return false; } else { - // Content differs: that's a real change! + // Content differs: that's a real change, perhaps + if (reader == null) // deprecated use, do no further checks + return true; + switch (getOptions().getAutoCRLF()) { + case INPUT: + case TRUE: + InputStream dcIn = null; + try { + ObjectLoader loader = reader.open(entry.getObjectId()); + if (loader == null) + return true; + + // We need to compute the length, but only if it is not + // a binary stream. + dcIn = new EolCanonicalizingInputStream( + loader.openStream(), true, true /* abort if binary */); + long dcInLen; + try { + dcInLen = computeLength(dcIn); + } catch (EolCanonicalizingInputStream.IsBinaryException e) { + return true; + } finally { + dcIn.close(); + } + + dcIn = new EolCanonicalizingInputStream( + loader.openStream(), true); + byte[] autoCrLfHash = computeHash(dcIn, dcInLen); + boolean changed = getEntryObjectId().compareTo( + autoCrLfHash, 0) != 0; + return changed; + } catch (IOException e) { + return true; + } finally { + if (dcIn != null) + try { + dcIn.close(); + } catch (IOException e) { + // empty + } + } + case FALSE: + break; + } return true; } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/IndexDiffFilter.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/IndexDiffFilter.java index c3323b868..79cd2193f 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/IndexDiffFilter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/IndexDiffFilter.java @@ -53,6 +53,7 @@ import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.treewalk.TreeWalk; import org.eclipse.jgit.treewalk.WorkingTreeIterator; @@ -72,7 +73,7 @@ import org.eclipse.jgit.treewalk.WorkingTreeIterator; *

* If no difference is found then we have to compare index and working-tree as * the last step. By making use of - * {@link WorkingTreeIterator#isModified(org.eclipse.jgit.dircache.DirCacheEntry, boolean)} + * {@link WorkingTreeIterator#isModified(org.eclipse.jgit.dircache.DirCacheEntry, boolean, ObjectReader)} * we can avoid the computation of the content id if the file is not dirty. *

* Instances of this filter should not be used for multiple {@link TreeWalk}s. @@ -219,7 +220,7 @@ public class IndexDiffFilter extends TreeFilter { // Only one chance left to detect a diff: between index and working // tree. Make use of the WorkingTreeIterator#isModified() method to // avoid computing SHA1 on filesystem content if not really needed. - return wi.isModified(di.getDirCacheEntry(), true); + return wi.isModified(di.getDirCacheEntry(), true, tw.getObjectReader()); } /** diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java index f87ab6896..98485e909 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/util/io/EolCanonicalizingInputStream.java @@ -70,6 +70,23 @@ public class EolCanonicalizingInputStream extends InputStream { private boolean detectBinary; + private boolean abortIfBinary; + + /** + * A special exception thrown when {@link EolCanonicalizingInputStream} is + * told to throw an exception when attempting to read a binary file. The + * exception may be thrown at any stage during reading. + * + * @since 3.3 + */ + public static class IsBinaryException extends IOException { + private static final long serialVersionUID = 1L; + + IsBinaryException() { + super(); + } + } + /** * Creates a new InputStream, wrapping the specified stream * @@ -80,8 +97,25 @@ public class EolCanonicalizingInputStream extends InputStream { * @since 2.0 */ public EolCanonicalizingInputStream(InputStream in, boolean detectBinary) { + this(in, detectBinary, false); + } + + /** + * Creates a new InputStream, wrapping the specified stream + * + * @param in + * raw input stream + * @param detectBinary + * whether binaries should be detected + * @param abortIfBinary + * throw an IOException if the file is binary + * @since 3.3 + */ + public EolCanonicalizingInputStream(InputStream in, boolean detectBinary, + boolean abortIfBinary) { this.in = in; this.detectBinary = detectBinary; + this.abortIfBinary = abortIfBinary; } @Override @@ -128,6 +162,14 @@ public class EolCanonicalizingInputStream extends InputStream { return i == off ? -1 : i - off; } + /** + * @return true if the stream has detected as a binary so far + * @since 3.3 + */ + public boolean isBinary() { + return isBinary; + } + @Override public void close() throws IOException { in.close(); @@ -140,6 +182,8 @@ public class EolCanonicalizingInputStream extends InputStream { if (detectBinary) { isBinary = RawText.isBinary(buf, cnt); detectBinary = false; + if (isBinary && abortIfBinary) + throw new IsBinaryException(); } ptr = 0; return true;