From 6b1e3c58b16651dc72ec79a614d507e014d18393 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 8 Feb 2017 15:07:18 -0500 Subject: [PATCH] Run auto GC in the background When running an automatic GC on a FileRepository, when the caller passes a NullProgressMonitor, run the GC in a background thread. Use a thread pool of size 1 to limit the number of background threads spawned for background gc in the same application. In the next minor release we can make the thread pool configurable. In some cases, the auto GC limit is lower than the true number of unreachable loose objects, so auto GC will run after every (e.g) fetch operation. This leads to the appearance of poor fetch performance. Since these GCs will never make progress (until either the objects become referenced, or the two week timeout expires), blocking on them simply reduces throughput. In the event that an auto GC would make progress, it's still OK if it runs in the background. The progress will still happen. This matches the behavior of regular git. Git (and now jgit) uses the lock file for gc.log to prevent simultaneous runs of background gc. Further, it writes errors to gc.log, and won't run background gc if that file is present and recent. If gc.log is too old (according to the config gc.logexpiry), it will be ignored. Change-Id: I3870cadb4a0a6763feff252e6eaef99f4aa8d0df Signed-off-by: David Turner Signed-off-by: Matthias Sohn --- .../junit/LocalDiskRepositoryTestCase.java | 7 + .../tst/org/eclipse/jgit/pgm/ConfigTest.java | 1 + org.eclipse.jgit/.settings/.api_filters | 8 + .../eclipse/jgit/internal/JGitText.properties | 2 + .../org/eclipse/jgit/internal/JGitText.java | 2 + .../internal/storage/file/FileRepository.java | 6 + .../jgit/internal/storage/file/GC.java | 91 ++++++++- .../jgit/internal/storage/file/GcLog.java | 191 ++++++++++++++++++ .../org/eclipse/jgit/lib/ConfigConstants.java | 14 ++ 9 files changed, 320 insertions(+), 2 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java diff --git a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java index 933faf9bf..6ace9fc12 100644 --- a/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java +++ b/org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java @@ -62,6 +62,7 @@ import java.util.TreeSet; import org.eclipse.jgit.dircache.DirCache; import org.eclipse.jgit.dircache.DirCacheEntry; import org.eclipse.jgit.internal.storage.file.FileRepository; +import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.PersonIdent; @@ -121,6 +122,12 @@ public abstract class LocalDiskRepositoryTestCase { mockSystemReader = new MockSystemReader(); mockSystemReader.userGitConfig = new FileBasedConfig(new File(tmp, "usergitconfig"), FS.DETECTED); + // We have to set autoDetach to false for tests, because tests expect to be able + // to clean up by recursively removing the repository, and background GC might be + // in the middle of writing or deleting files, which would disrupt this. + mockSystemReader.userGitConfig.setBoolean(ConfigConstants.CONFIG_GC_SECTION, + null, ConfigConstants.CONFIG_KEY_AUTODETACH, false); + mockSystemReader.userGitConfig.save(); ceilTestDirectories(getCeilings()); SystemReader.setInstance(mockSystemReader); diff --git a/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java b/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java index c43accdb6..0ce645139 100644 --- a/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java +++ b/org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java @@ -74,6 +74,7 @@ public class ConfigTest extends CLIRepositoryTestCase { String[] output = execute("git config --list"); List expect = new ArrayList<>(); + expect.add("gc.autoDetach=false"); expect.add("core.filemode=" + !isWindows); expect.add("core.logallrefupdates=true"); if (isMac) diff --git a/org.eclipse.jgit/.settings/.api_filters b/org.eclipse.jgit/.settings/.api_filters index 644123236..822f1215d 100644 --- a/org.eclipse.jgit/.settings/.api_filters +++ b/org.eclipse.jgit/.settings/.api_filters @@ -1,5 +1,13 @@ + + + + + + + + diff --git a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties index 4ac399f9e..03065da8a 100644 --- a/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties +++ b/org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties @@ -299,6 +299,8 @@ flagNotFromThis={0} not from this. flagsAlreadyCreated={0} flags already created. funnyRefname=funny refname gcFailed=Garbage collection failed. +gcLogExists=A previous GC run reported an error: ''{0}''. Automatic gc will fail until ''{1}'' is removed. +gcTooManyUnpruned=Too many loose, unpruneable objects after garbage collection. Consider adjusting gc.auto or gc.pruneExpire. gitmodulesNotFound=.gitmodules not found in tree. headRequiredToStash=HEAD required to stash local changes hoursAgo={0} hours ago diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java index a81c93a01..32bfb7e57 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java @@ -358,6 +358,8 @@ public class JGitText extends TranslationBundle { /***/ public String flagsAlreadyCreated; /***/ public String funnyRefname; /***/ public String gcFailed; + /***/ public String gcLogExists; + /***/ public String gcTooManyUnpruned; /***/ public String gitmodulesNotFound; /***/ public String headRequiredToStash; /***/ public String hoursAgo; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java index ba52251a4..ef9aa37d0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java @@ -619,12 +619,18 @@ public class FileRepository extends Repository { } + private boolean shouldAutoDetach() { + return getConfig().getBoolean(ConfigConstants.CONFIG_GC_SECTION, + ConfigConstants.CONFIG_KEY_AUTODETACH, true); + } + @Override public void autoGC(ProgressMonitor monitor) { GC gc = new GC(this); gc.setPackConfig(new PackConfig(this)); gc.setProgressMonitor(monitor); gc.setAuto(true); + gc.setBackground(shouldAutoDetach()); try { gc.gc(); } catch (ParseException | IOException e) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java index c68e5f7f3..e1b309da6 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java @@ -50,6 +50,8 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.StringWriter; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.file.DirectoryStream; @@ -73,11 +75,17 @@ import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.eclipse.jgit.annotations.NonNull; +import org.eclipse.jgit.api.errors.JGitInternalException; import org.eclipse.jgit.dircache.DirCacheIterator; import org.eclipse.jgit.errors.CancelledException; import org.eclipse.jgit.errors.CorruptObjectException; @@ -143,6 +151,8 @@ public class GC { private static final int DEFAULT_AUTOLIMIT = 6700; + private static ExecutorService executor = Executors.newFixedThreadPool(1); + private final FileRepository repo; private ProgressMonitor pm; @@ -177,6 +187,11 @@ public class GC { */ private boolean automatic; + /** + * Whether to run gc in a background thread + */ + private boolean background; + /** * Creates a new garbage collector with default values. An expirationTime of * two weeks and null as progress monitor will be used. @@ -202,13 +217,73 @@ public class GC { * first check whether any housekeeping is required; if not, it exits * without performing any work. * + * If {@link #setBackground(boolean)} was set to {@code true} + * {@code collectGarbage} will start the gc in the background, and then + * return immediately. In this case, errors will not be reported except in + * gc.log. + * * @return the collection of {@link PackFile}'s which are newly created * @throws IOException * @throws ParseException * If the configuration parameter "gc.pruneexpire" couldn't be * parsed */ + // TODO(ms): in 5.0 change signature and return Future> public Collection gc() throws IOException, ParseException { + final GcLog gcLog = background ? new GcLog(repo) : null; + if (gcLog != null && !gcLog.lock(background)) { + // there is already a background gc running + return Collections.emptyList(); + } + + Callable> gcTask = () -> { + try { + Collection newPacks = doGc(); + if (automatic && tooManyLooseObjects() && gcLog != null) { + String message = JGitText.get().gcTooManyUnpruned; + gcLog.write(message); + gcLog.commit(); + } + return newPacks; + } catch (IOException | ParseException e) { + if (background) { + if (gcLog == null) { + // Lacking a log, there's no way to report this. + return Collections.emptyList(); + } + try { + gcLog.write(e.getMessage()); + StringWriter sw = new StringWriter(); + e.printStackTrace(new PrintWriter(sw)); + gcLog.write(sw.toString()); + gcLog.commit(); + } catch (IOException e2) { + e2.addSuppressed(e); + LOG.error(e2.getMessage(), e2); + } + } else { + throw new JGitInternalException(e.getMessage(), e); + } + } finally { + if (gcLog != null) { + gcLog.unlock(); + } + } + return Collections.emptyList(); + }; + Future> result = executor.submit(gcTask); + if (background) { + // TODO(ms): in 5.0 change signature and return the Future + return Collections.emptyList(); + } + try { + return result.get(); + } catch (InterruptedException | ExecutionException e) { + throw new IOException(e); + } + } + + private Collection doGc() throws IOException, ParseException { if (automatic && !needGc()) { return Collections.emptyList(); } @@ -1348,6 +1423,14 @@ public class GC { this.automatic = auto; } + /** + * @param background + * whether to run the gc in a background thread. + */ + void setBackground(boolean background) { + this.background = background; + } + private boolean needGc() { if (tooManyPacks()) { addRepackAllOption(); @@ -1386,8 +1469,7 @@ public class GC { * @return {@code true} if number of loose objects > gc.auto (default 6700) */ boolean tooManyLooseObjects() { - int auto = repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION, - ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT); + int auto = getLooseObjectLimit(); if (auto <= 0) { return false; } @@ -1419,4 +1501,9 @@ public class GC { } return false; } + + private int getLooseObjectLimit() { + return repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION, + ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT); + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java new file mode 100644 index 000000000..9ea77cc7d --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2017 Two Sigma Open Source + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.internal.storage.file; + +import org.eclipse.jgit.api.errors.JGitInternalException; +import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.lib.ConfigConstants; +import org.eclipse.jgit.util.GitDateParser; +import org.eclipse.jgit.util.SystemReader; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.attribute.FileTime; +import java.text.MessageFormat; +import java.text.ParseException; +import java.time.Instant; + +/** + * This class manages the gc.log file for a {@link FileRepository}. + */ +class GcLog { + private final FileRepository repo; + + private final File logFile; + + private final LockFile lock; + + private Instant gcLogExpire; + + private static final String LOG_EXPIRY_DEFAULT = "1.day.ago"; //$NON-NLS-1$ + + private boolean nonEmpty = false; + + /** + * Construct a GcLog object for a {@link FileRepository} + * + * @param repo + * the repository + */ + GcLog(FileRepository repo) { + this.repo = repo; + logFile = new File(repo.getDirectory(), "gc.log"); //$NON-NLS-1$ + lock = new LockFile(logFile); + } + + private Instant getLogExpiry() throws ParseException { + if (gcLogExpire == null) { + String logExpiryStr = repo.getConfig().getString( + ConfigConstants.CONFIG_GC_SECTION, null, + ConfigConstants.CONFIG_KEY_LOGEXPIRY); + if (logExpiryStr == null) { + logExpiryStr = LOG_EXPIRY_DEFAULT; + } + gcLogExpire = GitDateParser.parse(logExpiryStr, null, + SystemReader.getInstance().getLocale()).toInstant(); + } + return gcLogExpire; + } + + private boolean autoGcBlockedByOldLockFile(boolean background) { + try { + FileTime lastModified = Files.getLastModifiedTime(logFile.toPath()); + if (lastModified.toInstant().compareTo(getLogExpiry()) > 0) { + // There is an existing log file, which is too recent to ignore + if (!background) { + try (BufferedReader reader = Files + .newBufferedReader(logFile.toPath())) { + char[] buf = new char[1000]; + int len = reader.read(buf, 0, 1000); + String oldError = new String(buf, 0, len); + + throw new JGitInternalException(MessageFormat.format( + JGitText.get().gcLogExists, oldError, logFile)); + } + } + return true; + } + } catch (NoSuchFileException e) { + // No existing log file, OK. + } catch (IOException | ParseException e) { + throw new JGitInternalException(e.getMessage(), e); + } + return false; + } + + /** + * Lock the GC log file for updates + * + * @param background + * If true, and if gc.log already exists, unlock and return false + * @return {@code true} if we hold the lock + */ + boolean lock(boolean background) { + try { + if (!lock.lock()) { + return false; + } + } catch (IOException e) { + throw new JGitInternalException(e.getMessage(), e); + } + if (autoGcBlockedByOldLockFile(background)) { + lock.unlock(); + return false; + } + return true; + } + + /** + * Unlock (roll back) the GC log lock + */ + void unlock() { + lock.unlock(); + } + + /** + * Commit changes to the gc log, if there have been any writes. Otherwise, + * just unlock and delete the existing file (if any) + * + * @return true if committing (or unlocking/deleting) succeeds. + */ + boolean commit() { + if (nonEmpty) { + return lock.commit(); + } else { + logFile.delete(); + lock.unlock(); + return true; + } + } + + /** + * Write to the pending gc log. Content will be committed upon a call to + * commit() + * + * @param content + * The content to write + * @throws IOException + */ + void write(String content) throws IOException { + if (content.length() > 0) { + nonEmpty = true; + } + lock.write(content.getBytes(UTF_8)); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java index 74fc7067a..189361b70 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java @@ -290,6 +290,20 @@ public class ConfigConstants { */ public static final String CONFIG_KEY_PRUNEPACKEXPIRE = "prunepackexpire"; + /** + * The "logexpiry" key + * + * @since 4.7 + */ + public static final String CONFIG_KEY_LOGEXPIRY = "logExpiry"; + + /** + * The "autodetach" key + * + * @since 4.7 + */ + public static final String CONFIG_KEY_AUTODETACH = "autoDetach"; + /** * The "aggressiveDepth" key * @since 3.6