Browse Source

Run auto GC in the background

When running an automatic GC on a FileRepository, when the caller
passes a NullProgressMonitor, run the GC in a background thread. Use a
thread pool of size 1 to limit the number of background threads spawned
for background gc in the same application. In the next minor release we
can make the thread pool configurable.

In some cases, the auto GC limit is lower than the true number of
unreachable loose objects, so auto GC will run after every (e.g) fetch
operation.  This leads to the appearance of poor fetch performance.
Since these GCs will never make progress (until either the objects
become referenced, or the two week timeout expires), blocking on them
simply reduces throughput.

In the event that an auto GC would make progress, it's still OK if it
runs in the background. The progress will still happen.

This matches the behavior of regular git.

Git (and now jgit) uses the lock file for gc.log to prevent simultaneous
runs of background gc. Further, it writes errors to gc.log, and won't
run background gc if that file is present and recent. If gc.log is too
old (according to the config gc.logexpiry), it will be ignored.

Change-Id: I3870cadb4a0a6763feff252e6eaef99f4aa8d0df
Signed-off-by: David Turner <dturner@twosigma.com>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
stable-4.7
David Turner 8 years ago committed by Matthias Sohn
parent
commit
6b1e3c58b1
  1. 7
      org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java
  2. 1
      org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java
  3. 8
      org.eclipse.jgit/.settings/.api_filters
  4. 2
      org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties
  5. 2
      org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java
  6. 6
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java
  7. 91
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java
  8. 191
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java
  9. 14
      org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java

7
org.eclipse.jgit.junit/src/org/eclipse/jgit/junit/LocalDiskRepositoryTestCase.java

@ -62,6 +62,7 @@ import java.util.TreeSet;
import org.eclipse.jgit.dircache.DirCache;
import org.eclipse.jgit.dircache.DirCacheEntry;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.PersonIdent;
@ -121,6 +122,12 @@ public abstract class LocalDiskRepositoryTestCase {
mockSystemReader = new MockSystemReader();
mockSystemReader.userGitConfig = new FileBasedConfig(new File(tmp,
"usergitconfig"), FS.DETECTED);
// We have to set autoDetach to false for tests, because tests expect to be able
// to clean up by recursively removing the repository, and background GC might be
// in the middle of writing or deleting files, which would disrupt this.
mockSystemReader.userGitConfig.setBoolean(ConfigConstants.CONFIG_GC_SECTION,
null, ConfigConstants.CONFIG_KEY_AUTODETACH, false);
mockSystemReader.userGitConfig.save();
ceilTestDirectories(getCeilings());
SystemReader.setInstance(mockSystemReader);

1
org.eclipse.jgit.pgm.test/tst/org/eclipse/jgit/pgm/ConfigTest.java

@ -74,6 +74,7 @@ public class ConfigTest extends CLIRepositoryTestCase {
String[] output = execute("git config --list");
List<String> expect = new ArrayList<>();
expect.add("gc.autoDetach=false");
expect.add("core.filemode=" + !isWindows);
expect.add("core.logallrefupdates=true");
if (isMac)

8
org.eclipse.jgit/.settings/.api_filters

@ -1,5 +1,13 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<component id="org.eclipse.jgit" version="2">
<resource path="META-INF/MANIFEST.MF">
<filter id="924844039">
<message_arguments>
<message_argument value="4.7.1"/>
<message_argument value="4.7.0"/>
</message_arguments>
</filter>
</resource>
<resource path="src/org/eclipse/jgit/api/ArchiveCommand.java" type="org.eclipse.jgit.api.ArchiveCommand$Format">
<filter comment="OSGi semver allows to break implementors in minor releases" id="403804204">
<message_arguments>

2
org.eclipse.jgit/resources/org/eclipse/jgit/internal/JGitText.properties

@ -299,6 +299,8 @@ flagNotFromThis={0} not from this.
flagsAlreadyCreated={0} flags already created.
funnyRefname=funny refname
gcFailed=Garbage collection failed.
gcLogExists=A previous GC run reported an error: ''{0}''. Automatic gc will fail until ''{1}'' is removed.
gcTooManyUnpruned=Too many loose, unpruneable objects after garbage collection. Consider adjusting gc.auto or gc.pruneExpire.
gitmodulesNotFound=.gitmodules not found in tree.
headRequiredToStash=HEAD required to stash local changes
hoursAgo={0} hours ago

2
org.eclipse.jgit/src/org/eclipse/jgit/internal/JGitText.java

@ -358,6 +358,8 @@ public class JGitText extends TranslationBundle {
/***/ public String flagsAlreadyCreated;
/***/ public String funnyRefname;
/***/ public String gcFailed;
/***/ public String gcLogExists;
/***/ public String gcTooManyUnpruned;
/***/ public String gitmodulesNotFound;
/***/ public String headRequiredToStash;
/***/ public String hoursAgo;

6
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileRepository.java

@ -619,12 +619,18 @@ public class FileRepository extends Repository {
}
private boolean shouldAutoDetach() {
return getConfig().getBoolean(ConfigConstants.CONFIG_GC_SECTION,
ConfigConstants.CONFIG_KEY_AUTODETACH, true);
}
@Override
public void autoGC(ProgressMonitor monitor) {
GC gc = new GC(this);
gc.setPackConfig(new PackConfig(this));
gc.setProgressMonitor(monitor);
gc.setAuto(true);
gc.setBackground(shouldAutoDetach());
try {
gc.gc();
} catch (ParseException | IOException e) {

91
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java

@ -50,6 +50,8 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.file.DirectoryStream;
@ -73,11 +75,17 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.api.errors.JGitInternalException;
import org.eclipse.jgit.dircache.DirCacheIterator;
import org.eclipse.jgit.errors.CancelledException;
import org.eclipse.jgit.errors.CorruptObjectException;
@ -143,6 +151,8 @@ public class GC {
private static final int DEFAULT_AUTOLIMIT = 6700;
private static ExecutorService executor = Executors.newFixedThreadPool(1);
private final FileRepository repo;
private ProgressMonitor pm;
@ -177,6 +187,11 @@ public class GC {
*/
private boolean automatic;
/**
* Whether to run gc in a background thread
*/
private boolean background;
/**
* Creates a new garbage collector with default values. An expirationTime of
* two weeks and <code>null</code> as progress monitor will be used.
@ -202,13 +217,73 @@ public class GC {
* first check whether any housekeeping is required; if not, it exits
* without performing any work.
*
* If {@link #setBackground(boolean)} was set to {@code true}
* {@code collectGarbage} will start the gc in the background, and then
* return immediately. In this case, errors will not be reported except in
* gc.log.
*
* @return the collection of {@link PackFile}'s which are newly created
* @throws IOException
* @throws ParseException
* If the configuration parameter "gc.pruneexpire" couldn't be
* parsed
*/
// TODO(ms): in 5.0 change signature and return Future<Collection<PackFile>>
public Collection<PackFile> gc() throws IOException, ParseException {
final GcLog gcLog = background ? new GcLog(repo) : null;
if (gcLog != null && !gcLog.lock(background)) {
// there is already a background gc running
return Collections.emptyList();
}
Callable<Collection<PackFile>> gcTask = () -> {
try {
Collection<PackFile> newPacks = doGc();
if (automatic && tooManyLooseObjects() && gcLog != null) {
String message = JGitText.get().gcTooManyUnpruned;
gcLog.write(message);
gcLog.commit();
}
return newPacks;
} catch (IOException | ParseException e) {
if (background) {
if (gcLog == null) {
// Lacking a log, there's no way to report this.
return Collections.emptyList();
}
try {
gcLog.write(e.getMessage());
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
gcLog.write(sw.toString());
gcLog.commit();
} catch (IOException e2) {
e2.addSuppressed(e);
LOG.error(e2.getMessage(), e2);
}
} else {
throw new JGitInternalException(e.getMessage(), e);
}
} finally {
if (gcLog != null) {
gcLog.unlock();
}
}
return Collections.emptyList();
};
Future<Collection<PackFile>> result = executor.submit(gcTask);
if (background) {
// TODO(ms): in 5.0 change signature and return the Future
return Collections.emptyList();
}
try {
return result.get();
} catch (InterruptedException | ExecutionException e) {
throw new IOException(e);
}
}
private Collection<PackFile> doGc() throws IOException, ParseException {
if (automatic && !needGc()) {
return Collections.emptyList();
}
@ -1348,6 +1423,14 @@ public class GC {
this.automatic = auto;
}
/**
* @param background
* whether to run the gc in a background thread.
*/
void setBackground(boolean background) {
this.background = background;
}
private boolean needGc() {
if (tooManyPacks()) {
addRepackAllOption();
@ -1386,8 +1469,7 @@ public class GC {
* @return {@code true} if number of loose objects > gc.auto (default 6700)
*/
boolean tooManyLooseObjects() {
int auto = repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION,
ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT);
int auto = getLooseObjectLimit();
if (auto <= 0) {
return false;
}
@ -1419,4 +1501,9 @@ public class GC {
}
return false;
}
private int getLooseObjectLimit() {
return repo.getConfig().getInt(ConfigConstants.CONFIG_GC_SECTION,
ConfigConstants.CONFIG_KEY_AUTO, DEFAULT_AUTOLIMIT);
}
}

191
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GcLog.java

@ -0,0 +1,191 @@
/*
* Copyright (C) 2017 Two Sigma Open Source
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.internal.storage.file;
import org.eclipse.jgit.api.errors.JGitInternalException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.ConfigConstants;
import org.eclipse.jgit.util.GitDateParser;
import org.eclipse.jgit.util.SystemReader;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.attribute.FileTime;
import java.text.MessageFormat;
import java.text.ParseException;
import java.time.Instant;
/**
* This class manages the gc.log file for a {@link FileRepository}.
*/
class GcLog {
private final FileRepository repo;
private final File logFile;
private final LockFile lock;
private Instant gcLogExpire;
private static final String LOG_EXPIRY_DEFAULT = "1.day.ago"; //$NON-NLS-1$
private boolean nonEmpty = false;
/**
* Construct a GcLog object for a {@link FileRepository}
*
* @param repo
* the repository
*/
GcLog(FileRepository repo) {
this.repo = repo;
logFile = new File(repo.getDirectory(), "gc.log"); //$NON-NLS-1$
lock = new LockFile(logFile);
}
private Instant getLogExpiry() throws ParseException {
if (gcLogExpire == null) {
String logExpiryStr = repo.getConfig().getString(
ConfigConstants.CONFIG_GC_SECTION, null,
ConfigConstants.CONFIG_KEY_LOGEXPIRY);
if (logExpiryStr == null) {
logExpiryStr = LOG_EXPIRY_DEFAULT;
}
gcLogExpire = GitDateParser.parse(logExpiryStr, null,
SystemReader.getInstance().getLocale()).toInstant();
}
return gcLogExpire;
}
private boolean autoGcBlockedByOldLockFile(boolean background) {
try {
FileTime lastModified = Files.getLastModifiedTime(logFile.toPath());
if (lastModified.toInstant().compareTo(getLogExpiry()) > 0) {
// There is an existing log file, which is too recent to ignore
if (!background) {
try (BufferedReader reader = Files
.newBufferedReader(logFile.toPath())) {
char[] buf = new char[1000];
int len = reader.read(buf, 0, 1000);
String oldError = new String(buf, 0, len);
throw new JGitInternalException(MessageFormat.format(
JGitText.get().gcLogExists, oldError, logFile));
}
}
return true;
}
} catch (NoSuchFileException e) {
// No existing log file, OK.
} catch (IOException | ParseException e) {
throw new JGitInternalException(e.getMessage(), e);
}
return false;
}
/**
* Lock the GC log file for updates
*
* @param background
* If true, and if gc.log already exists, unlock and return false
* @return {@code true} if we hold the lock
*/
boolean lock(boolean background) {
try {
if (!lock.lock()) {
return false;
}
} catch (IOException e) {
throw new JGitInternalException(e.getMessage(), e);
}
if (autoGcBlockedByOldLockFile(background)) {
lock.unlock();
return false;
}
return true;
}
/**
* Unlock (roll back) the GC log lock
*/
void unlock() {
lock.unlock();
}
/**
* Commit changes to the gc log, if there have been any writes. Otherwise,
* just unlock and delete the existing file (if any)
*
* @return true if committing (or unlocking/deleting) succeeds.
*/
boolean commit() {
if (nonEmpty) {
return lock.commit();
} else {
logFile.delete();
lock.unlock();
return true;
}
}
/**
* Write to the pending gc log. Content will be committed upon a call to
* commit()
*
* @param content
* The content to write
* @throws IOException
*/
void write(String content) throws IOException {
if (content.length() > 0) {
nonEmpty = true;
}
lock.write(content.getBytes(UTF_8));
}
}

14
org.eclipse.jgit/src/org/eclipse/jgit/lib/ConfigConstants.java

@ -290,6 +290,20 @@ public class ConfigConstants {
*/
public static final String CONFIG_KEY_PRUNEPACKEXPIRE = "prunepackexpire";
/**
* The "logexpiry" key
*
* @since 4.7
*/
public static final String CONFIG_KEY_LOGEXPIRY = "logExpiry";
/**
* The "autodetach" key
*
* @since 4.7
*/
public static final String CONFIG_KEY_AUTODETACH = "autoDetach";
/**
* The "aggressiveDepth" key
* @since 3.6

Loading…
Cancel
Save