Browse Source

Enable large file support

Allow adding files with size over 2 GB. The drawback is that the tests
for huge file support adds roughly 10 minutes of execution time.
For that reason we @Ignore the test in the standard test execution.

Change-Id: I5788e8009899203b346f353297166825b3744575
stable-2.0
Robin Rosenberg 13 years ago
parent
commit
9c5b31703f
  1. 236
      org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java
  2. 15
      org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java
  3. 2
      org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java

236
org.eclipse.jgit.test/tst/org/eclipse/jgit/api/HugeFileTest.java

@ -0,0 +1,236 @@
/*
* Copyright (C) 2012, Robin Rosenberg <robin.rosenberg@dewire.com>
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.api;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.RandomAccessFile;
import java.util.Arrays;
import java.util.Collection;
import org.eclipse.jgit.api.ResetCommand.ResetType;
import org.eclipse.jgit.lib.RepositoryTestCase;
import org.junit.Ignore;
import org.junit.Test;
public class HugeFileTest extends RepositoryTestCase {
private long t = System.currentTimeMillis();
private long lastt = t;
private void measure(String name) {
long c = System.currentTimeMillis();
System.out.println(name + ", dt=" + (c - lastt) / 1000.0 + "s");
lastt = c;
}
@Ignore("Test takes way too long (~10 minutes) to be part of the standard suite")
@Test
public void testAddHugeFile() throws Exception {
measure("Commencing test");
File file = new File(db.getWorkTree(), "a.txt");
RandomAccessFile rf = new RandomAccessFile(file, "rw");
rf.setLength(4429185024L);
rf.close();
measure("Created file");
Git git = new Git(db);
git.add().addFilepattern("a.txt").call();
measure("Added file");
assertEquals(
"[a.txt, mode:100644, length:134217728, sha1:b8cfba97c2b962a44f080b3ca4e03b3204b6a350]",
indexState(LENGTH | CONTENT_ID));
Status status = git.status().call();
measure("Status after add");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Does not change anything, but modified timestamp
rf = new RandomAccessFile(file, "rw");
rf.write(0);
rf.close();
status = git.status().call();
measure("Status after non-modifying update");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();
status = git.status().call();
measure("Status after modifying update");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Truncate mod 4G and re-establish equality
rf = new RandomAccessFile(file, "rw");
rf.setLength(134217728L);
rf.write(0);
rf.close();
status = git.status().call();
measure("Status after truncating update");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();
status = git.status().call();
measure("Status after modifying and truncating update");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Truncate to entry length becomes negative int
rf = new RandomAccessFile(file, "rw");
rf.setLength(3429185024L);
rf.write(0);
rf.close();
git.add().addFilepattern("a.txt").call();
measure("Added truncated file");
assertEquals(
"[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
indexState(LENGTH | CONTENT_ID));
status = git.status().call();
measure("Status after status on truncated file");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
// Change something
rf = new RandomAccessFile(file, "rw");
rf.write('a');
rf.close();
status = git.status().call();
measure("Status after modifying and truncating update");
assertCollectionEquals(Arrays.asList("a.txt"), status.getAdded());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
git.commit().setMessage("make a commit").call();
measure("After commit");
status = git.status().call();
measure("After status after commit");
assertEquals(0, status.getAdded().size());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertCollectionEquals(Arrays.asList("a.txt"), status.getModified());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
git.reset().setMode(ResetType.HARD).call();
measure("After reset --hard");
assertEquals(
"[a.txt, mode:100644, length:-865782272, sha1:59b3282f8f59f22d953df956ad3511bf2dc660fd]",
indexState(LENGTH | CONTENT_ID));
status = git.status().call();
measure("Status after hard reset");
assertEquals(0, status.getAdded().size());
assertEquals(0, status.getChanged().size());
assertEquals(0, status.getConflicting().size());
assertEquals(0, status.getMissing().size());
assertEquals(0, status.getModified().size());
assertEquals(0, status.getRemoved().size());
assertEquals(0, status.getUntracked().size());
}
private void assertCollectionEquals(Collection<?> asList,
Collection<?> added) {
assertEquals(asList.toString(), added.toString());
}
}

15
org.eclipse.jgit/src/org/eclipse/jgit/dircache/DirCacheEntry.java vendored

@ -525,7 +525,7 @@ public class DirCacheEntry {
} }
/** /**
* Get the cached size (in bytes) of this file. * Get the cached size (mod 4 GB) (in bytes) of this file.
* <p> * <p>
* One of the indicators that the file has been modified by an application * One of the indicators that the file has been modified by an application
* changing the working tree is if the size of the file (in bytes) differs * changing the working tree is if the size of the file (in bytes) differs
@ -534,6 +534,10 @@ public class DirCacheEntry {
* Note that this is the length of the file in the working directory, which * Note that this is the length of the file in the working directory, which
* may differ from the size of the decompressed blob if work tree filters * may differ from the size of the decompressed blob if work tree filters
* are being used, such as LF<->CRLF conversion. * are being used, such as LF<->CRLF conversion.
* <p>
* Note also that for very large files, this is the size of the on-disk file
* truncated to 32 bits, i.e. modulo 4294967296. If that value is larger
* than 2GB, it will appear negative.
* *
* @return cached size of the working directory file, in bytes. * @return cached size of the working directory file, in bytes.
*/ */
@ -545,7 +549,8 @@ public class DirCacheEntry {
* Set the cached size (in bytes) of this file. * Set the cached size (in bytes) of this file.
* *
* @param sz * @param sz
* new cached size of the file, as bytes. * new cached size of the file, as bytes. If the file is larger
* than 2G, cast it to (int) before calling this method.
*/ */
public void setLength(final int sz) { public void setLength(final int sz) {
NB.encodeInt32(info, infoOffset + P_SIZE, sz); NB.encodeInt32(info, infoOffset + P_SIZE, sz);
@ -556,15 +561,9 @@ public class DirCacheEntry {
* *
* @param sz * @param sz
* new cached size of the file, as bytes. * new cached size of the file, as bytes.
* @throws IllegalArgumentException
* if the size exceeds the 2 GiB barrier imposed by current file
* format limitations.
*/ */
@SuppressWarnings("boxing") @SuppressWarnings("boxing")
public void setLength(final long sz) { public void setLength(final long sz) {
if (Integer.MAX_VALUE <= sz)
throw new IllegalArgumentException(MessageFormat.format(JGitText
.get().sizeExceeds2GB, getPathString(), sz));
setLength((int) sz); setLength((int) sz);
} }

2
org.eclipse.jgit/src/org/eclipse/jgit/treewalk/WorkingTreeIterator.java

@ -711,7 +711,7 @@ public abstract class WorkingTreeIterator extends AbstractTreeIterator {
if (entry.isUpdateNeeded()) if (entry.isUpdateNeeded())
return MetadataDiff.DIFFER_BY_METADATA; return MetadataDiff.DIFFER_BY_METADATA;
if (!entry.isSmudged() && (getEntryLength() != entry.getLength())) if (!entry.isSmudged() && entry.getLength() != (int) getEntryLength())
return MetadataDiff.DIFFER_BY_METADATA; return MetadataDiff.DIFFER_BY_METADATA;
// Determine difference in mode-bits of file and index-entry. In the // Determine difference in mode-bits of file and index-entry. In the

Loading…
Cancel
Save