From 11d24e68442e97ba8a4132d749e7e7f4ebf1ff52 Mon Sep 17 00:00:00 2001 From: Christian Halstrick Date: Thu, 8 Dec 2016 15:44:23 +0100 Subject: [PATCH] Fix FileSnapshot.isModified FileSnapshot.isModified may have reported a file to be clean although it was actually dirty. Imagine you have a FileSnapshot on file f. lastmodified and lastread are both t0. Now time is t1 and you 1) modify the file 2) update the FileSnapshot of the file (lastModified=t1, lastRead=t1) 3) modify the file again 4) wait 3 seconds 5) ask the Filesnapshot whether the file is dirty or not. It erroneously answered it's clean. Any file which has been modified longer than 2.5 seconds ago was reported to be clean. As the test shows that's not always correct. The real-world problem fixed by this change is the following: * A gerrit server using JGit to serve git repositories is processing fetch requests while simultaneously a native git garbage collection runs on the repo. * At time t1 native git writes temporary files in the pack folder setting the mtime of the pack folder to t1. * A fetch request causes JGit to search for new packfiles and JGit remembers this scan in a Filesnapshot on the packs folder. Since the gc is not finished JGit doesn't see any new packfiles. * The fetch is processed and the gc ends while the filesystem timer is still t1. GC writes a new packfile and deletes the old packfile. * 3 seconds later another request arrives. JGit does not yet know about the new packfile but is also not rescanning the pack folder because it cached that the last scan happened at time t1 and pack folder's mtime is also t1. Now JGit will not be able to resolve any object contained in this new pack. This behavior may be persistent if objects referenced by the ref/meta/config branch are affected so gerrit can't read permissions stored in the refs/meta/config branch anymore and will not allow any pushes anymore. The pack folder will not change its mtime and therefore no rescan will take place. Change-Id: I3efd0ccffeb97b01207dc3e7a6b85c6b06928fad Signed-off-by: Matthias Sohn --- .../storage/file/FileSnapshotTest.java | 17 --- .../storage/file/ObjectDirectoryTest.java | 103 ++++++++++++++++++ .../internal/storage/file/FileSnapshot.java | 6 - 3 files changed, 103 insertions(+), 23 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java index 850675382..98a950176 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java @@ -42,7 +42,6 @@ */ package org.eclipse.jgit.internal.storage.file; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.File; @@ -97,22 +96,6 @@ public class FileSnapshotTest { assertTrue(save.isModified(f1)); } - /** - * Create a file, wait long enough and verify that it has not been modified. - * 3.5 seconds mean any difference between file system timestamp and system - * clock should be significant. - * - * @throws Exception - */ - @Test - public void testOldFile() throws Exception { - File f1 = createFile("oldfile"); - waitNextSec(f1); - FileSnapshot save = FileSnapshot.save(f1); - Thread.sleep(3500); - assertFalse(save.isModified(f1)); - } - /** * Create a file, but don't wait long enough for the difference between file * system clock and system clock to be significant. Assume the file may have diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java index 7f247ff0a..87554ae6a 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java @@ -42,6 +42,11 @@ package org.eclipse.jgit.internal.storage.file; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FilenameFilter; import java.util.Collection; import java.util.Collections; import java.util.concurrent.Callable; @@ -50,8 +55,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.storage.file.FileBasedConfig; +import org.junit.Assume; import org.junit.Test; public class ObjectDirectoryTest extends RepositoryTestCase { @@ -68,6 +77,100 @@ public class ObjectDirectoryTest extends RepositoryTestCase { } } + /** + * Test packfile scanning while a gc is done from the outside (different + * process or different Repository instance). This situation occurs e.g. if + * a gerrit server is serving fetch requests while native git is doing a + * garbage collection. The test shows that when core.trustfolderstat==true + * jgit may miss to detect that a new packfile was created. This situation + * is persistent until a new full rescan of the pack directory is triggered. + * + * The test works with two Repository instances working on the same disk + * location. One (db) for all write operations (creating commits, doing gc) + * and another one (receivingDB) which just reads and which in the end shows + * the bug + * + * @throws Exception + */ + @Test + public void testScanningForPackfiles() throws Exception { + ObjectId unknownID = ObjectId + .fromString("c0ffee09d0b63d694bf49bc1e6847473f42d4a8c"); + GC gc = new GC(db); + gc.setExpireAgeMillis(0); + gc.setPackExpireAgeMillis(0); + + // the default repo db is used to create the objects. The receivingDB + // repo is used to trigger gc's + try (FileRepository receivingDB = new FileRepository( + db.getDirectory())) { + // set trustfolderstat to true. If set to false the test always + // succeeds. + FileBasedConfig cfg = receivingDB.getConfig(); + cfg.setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_KEY_TRUSTFOLDERSTAT, true); + cfg.save(); + + // setup a repo which has at least one pack file and trigger + // scanning of the packs directory + ObjectId id = commitFile("file.txt", "test", "master").getId(); + gc.gc(); + assertFalse(receivingDB.hasObject(unknownID)); + assertTrue(receivingDB.getObjectDatabase().hasPackedObject(id)); + + // preparations + File packsFolder = new File(receivingDB.getObjectsDirectory(), + "pack"); + // prepare creation of a temporary file in the pack folder. This + // simulates that a native git gc is happening starting to write + // temporary files but has not yet finished + File tmpFile = new File(packsFolder, "1.tmp"); + RevCommit id2 = commitFile("file.txt", "test2", "master"); + // wait until filesystem timer ticks. This raises probability that + // the next statements are executed in the same tick as the + // filesystem timer + fsTick(null); + + // create a Temp file in the packs folder and trigger a rescan of + // the packs folder. This lets receivingDB think it has scanned the + // packs folder at the current fs timestamp t1. The following gc + // will create new files which have the same timestamp t1 but this + // will not update the mtime of the packs folder. Because of that + // JGit will not rescan the packs folder later on and fails to see + // the pack file created during gc. + assertTrue(tmpFile.createNewFile()); + assertFalse(receivingDB.hasObject(unknownID)); + + // trigger a gc. This will create packfiles which have likely the + // same mtime than the packfolder + gc.gc(); + + // To deal with racy-git situations JGit's Filesnapshot class will + // report a file/folder potentially dirty if + // cachedLastReadTime-cachedLastModificationTime < 2500ms. This + // causes JGit to always rescan a file after modification. But: + // this was true only if the difference between current system time + // and cachedLastModification time was less than 2500ms. If the + // modification is more than 2500ms ago we may have reported a + // file/folder to be clean although it has not been rescanned. A + // Bug. To show the bug we sleep for more than 2500ms + Thread.sleep(2600); + + File[] ret = packsFolder.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".pack"); + } + }); + assertTrue(ret.length == 1); + Assume.assumeTrue(tmpFile.lastModified() == ret[0].lastModified()); + + // all objects are in a new packfile but we will not detect it + assertFalse(receivingDB.hasObject(unknownID)); + assertTrue(receivingDB.hasObject(id2)); + } + } + private Collection> blobInsertersForTheSameFanOutDir( final ObjectDirectory dir) { Callable callable = new Callable() { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java index 8926d7930..97f3b573d 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java @@ -264,12 +264,6 @@ public class FileSnapshot { return false; } - // Our lastRead flag may be old, refresh and retry - lastRead = System.currentTimeMillis(); - if (notRacyClean(lastRead)) { - return false; - } - // We last read this path too close to its last observed // modification time. We may have missed a modification. // Scan again, to ensure we still see the same state.