From 153c11a49bc6c846e8286cf30d3c959d7aa6e2c8 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 29 Aug 2017 18:14:51 -0700 Subject: [PATCH 1/3] DfsFsck: run connectivity check pass exactly once The simpler algorithm is to load all branch tips into an ObjectWalk and run that walk exactly once. This avoids redoing work related to parsing and considering trees reused across side branches. Move the connectivity check into its own helper method. This moves it left one level of identation, and makes it easier to fit the method's logic with less line wrapping. Add a "Counting objects..." progress monitor around this phase. Its what is used when a server receives a push and is also trying to verify the client sent all required objects. Change-Id: I4d53d75d0cdd1a13fff7d513a6ae0b2d14ea4090 --- .../jgit/internal/storage/dfs/DfsFsck.java | 54 ++++++++++++------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java index f90ba7d98..7468bf5c9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java @@ -48,11 +48,13 @@ import java.util.List; import org.eclipse.jgit.errors.CorruptPackIndexException; import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.fsck.FsckError; import org.eclipse.jgit.internal.fsck.FsckError.CorruptIndex; import org.eclipse.jgit.internal.fsck.FsckPackParser; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectChecker; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; @@ -94,6 +96,10 @@ public class DfsFsck { * if encounters IO errors during the process. */ public FsckError check(ProgressMonitor pm) throws IOException { + if (pm == null) { + pm = NullProgressMonitor.INSTANCE; + } + FsckError errors = new FsckError(); try { for (DfsPackFile pack : objdb.getPacks()) { @@ -120,31 +126,41 @@ public class DfsFsck { } } - try (ObjectWalk ow = new ObjectWalk(ctx)) { - for (Ref r : repo.getAllRefs().values()) { - try { - RevObject tip = ow.parseAny(r.getObjectId()); - if (r.getLeaf().getName().startsWith(Constants.R_HEADS)) { - // check if heads point to a commit object - if (tip.getType() != Constants.OBJ_COMMIT) { - errors.getNonCommitHeads() - .add(r.getLeaf().getName()); - } - } - ow.markStart(tip); - ow.checkConnectivity(); - ow.markUninteresting(tip); - } catch (MissingObjectException e) { - errors.getMissingObjects().add(e.getObjectId()); - } - } - } + checkConnectivity(pm, errors); } finally { ctx.close(); } return errors; } + private void checkConnectivity(ProgressMonitor pm, FsckError errors) + throws IOException { + pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); + try (ObjectWalk ow = new ObjectWalk(ctx)) { + for (Ref r : repo.getAllRefs().values()) { + RevObject tip; + try { + tip = ow.parseAny(r.getObjectId()); + if (r.getLeaf().getName().startsWith(Constants.R_HEADS) + && tip.getType() != Constants.OBJ_COMMIT) { + // heads should only point to a commit object + errors.getNonCommitHeads().add(r.getLeaf().getName()); + } + } catch (MissingObjectException e) { + errors.getMissingObjects().add(e.getObjectId()); + continue; + } + ow.markStart(tip); + } + try { + ow.checkConnectivity(); + } catch (MissingObjectException e) { + errors.getMissingObjects().add(e.getObjectId()); + } + } + pm.endTask(); + } + /** * Use a customized object checker instead of the default one. Caller can * specify a skip list to ignore some errors. From 69588c21fe2b2dd578a51015b5331dcb1b6fb9c0 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 29 Aug 2017 18:26:29 -0700 Subject: [PATCH 2/3] DfsFsck: refactor pack verify into its own method This simplifies the logic about allocation of the DfsReader, and clarifies the code considerably by using smaller scopes with less indentation. A few static imports from PackExt and slightly shorter variable names make for a more understandable-at-glance implementation. Change-Id: Iaf5a0e14fe0349215d9e44446f68d1129ad3bb3d --- .../jgit/internal/storage/dfs/DfsFsck.java | 58 +++++++++---------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java index 7468bf5c9..2580047b1 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java @@ -43,8 +43,11 @@ package org.eclipse.jgit.internal.storage.dfs; +import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; +import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; + +import java.io.FileNotFoundException; import java.io.IOException; -import java.util.List; import org.eclipse.jgit.errors.CorruptPackIndexException; import org.eclipse.jgit.errors.MissingObjectException; @@ -52,7 +55,6 @@ import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.fsck.FsckError; import org.eclipse.jgit.internal.fsck.FsckError.CorruptIndex; import org.eclipse.jgit.internal.fsck.FsckPackParser; -import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectChecker; @@ -60,16 +62,11 @@ import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; import org.eclipse.jgit.revwalk.ObjectWalk; import org.eclipse.jgit.revwalk.RevObject; -import org.eclipse.jgit.transport.PackedObjectInfo; /** Verify the validity and connectivity of a DFS repository. */ public class DfsFsck { private final DfsRepository repo; - private final DfsObjDatabase objdb; - - private final DfsReader ctx; - private ObjectChecker objChecker = new ObjectChecker(); /** @@ -81,10 +78,8 @@ public class DfsFsck { public DfsFsck(DfsRepository repository) { repo = repository; objdb = repo.getObjectDatabase(); - ctx = objdb.newReader(); } - /** * Verify the integrity and connectivity of all objects in the object * database. @@ -101,42 +96,45 @@ public class DfsFsck { } FsckError errors = new FsckError(); - try { + checkPacks(pm, errors); + checkConnectivity(pm, errors); + return errors; + } + + private void checkPacks(ProgressMonitor pm, FsckError errors) + throws IOException, FileNotFoundException { + try (DfsReader ctx = objdb.newReader()) { for (DfsPackFile pack : objdb.getPacks()) { DfsPackDescription packDesc = pack.getPackDescription(); - try (ReadableChannel channel = repo.getObjectDatabase() - .openFile(packDesc, PackExt.PACK)) { - List objectsInPack; - FsckPackParser parser = new FsckPackParser( - repo.getObjectDatabase(), channel); - parser.setObjectChecker(objChecker); - parser.overwriteObjectCount(packDesc.getObjectCount()); - parser.parse(pm); - errors.getCorruptObjects() - .addAll(parser.getCorruptObjects()); - objectsInPack = parser.getSortedObjectList(null); - parser.verifyIndex(objectsInPack, pack.getPackIndex(ctx)); + try (ReadableChannel rc = objdb.openFile(packDesc, PACK)) { + verifyPack(pm, errors, ctx, pack, rc); } catch (MissingObjectException e) { errors.getMissingObjects().add(e.getObjectId()); } catch (CorruptPackIndexException e) { errors.getCorruptIndices().add(new CorruptIndex( - pack.getPackDescription() - .getFileName(PackExt.INDEX), + pack.getPackDescription().getFileName(INDEX), e.getErrorType())); } } - - checkConnectivity(pm, errors); - } finally { - ctx.close(); } - return errors; + } + + private void verifyPack(ProgressMonitor pm, FsckError errors, DfsReader ctx, + DfsPackFile pack, ReadableChannel ch) + throws IOException, CorruptPackIndexException { + FsckPackParser fpp = new FsckPackParser(objdb, ch); + fpp.setObjectChecker(objChecker); + fpp.overwriteObjectCount(pack.getPackDescription().getObjectCount()); + fpp.parse(pm); + errors.getCorruptObjects().addAll(fpp.getCorruptObjects()); + + fpp.verifyIndex(fpp.getSortedObjectList(null), pack.getPackIndex(ctx)); } private void checkConnectivity(ProgressMonitor pm, FsckError errors) throws IOException { pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN); - try (ObjectWalk ow = new ObjectWalk(ctx)) { + try (ObjectWalk ow = new ObjectWalk(repo)) { for (Ref r : repo.getAllRefs().values()) { RevObject tip; try { From e5db7c1f0e7431e68e854dc1152633999cf01555 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 29 Aug 2017 18:40:03 -0700 Subject: [PATCH 3/3] DfsFsck: reduce memory usage during verifyIndex Don't convert a lot of ObjectId to String stored in generic java.util.HashSet. This is a very expensive way to store objects. Instead rely on "this" from the FsckPackParser to lookup information about the objects in this pack file, which lets the verify code avoid sorting the object list. Use ObjectIdOwnerMap, which is the most efficient format JGit has for storing lots of objects. Change-Id: Ib68f93acb4d91b96d0a44c0612f704500d332ac1 --- .../jgit/internal/fsck/FsckPackParser.java | 22 ++++++++++++------- .../jgit/internal/storage/dfs/DfsFsck.java | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java index e6ec6814b..184bf416e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/fsck/FsckPackParser.java @@ -49,7 +49,6 @@ import java.nio.channels.Channels; import java.text.MessageFormat; import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Set; import java.util.zip.CRC32; @@ -65,6 +64,7 @@ import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ObjectChecker; import org.eclipse.jgit.lib.ObjectDatabase; +import org.eclipse.jgit.lib.ObjectIdOwnerMap; import org.eclipse.jgit.transport.PackParser; import org.eclipse.jgit.transport.PackedObjectInfo; @@ -265,18 +265,18 @@ public class FsckPackParser extends PackParser { /** * Verify the existing index file with all objects from the pack. * - * @param entries - * all the entries that are expected in the index file * @param idx * index file associate with the pack * @throws CorruptPackIndexException * when the index file is corrupt. */ - public void verifyIndex(List entries, PackIndex idx) + public void verifyIndex(PackIndex idx) throws CorruptPackIndexException { - Set all = new HashSet<>(); - for (PackedObjectInfo entry : entries) { - all.add(entry.getName()); + ObjectIdOwnerMap inPack = new ObjectIdOwnerMap<>(); + for (int i = 0; i < getObjectCount(); i++) { + PackedObjectInfo entry = getObject(i); + inPack.add(new ObjFromPack(entry)); + long offset = idx.findOffset(entry); if (offset == -1) { throw new CorruptPackIndexException( @@ -305,7 +305,7 @@ public class FsckPackParser extends PackParser { } for (MutableEntry entry : idx) { - if (!all.contains(entry.name())) { + if (!inPack.contains(entry.toObjectId())) { throw new CorruptPackIndexException(MessageFormat.format( JGitText.get().unknownObjectInIndex, entry.name()), ErrorType.UNKNOWN_OBJ); @@ -323,4 +323,10 @@ public class FsckPackParser extends PackParser { public void overwriteObjectCount(long expectedObjectCount) { this.expectedObjectCount = expectedObjectCount; } + + static class ObjFromPack extends ObjectIdOwnerMap.Entry { + ObjFromPack(AnyObjectId id) { + super(id); + } + } } diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java index 2580047b1..75eade227 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsFsck.java @@ -128,7 +128,7 @@ public class DfsFsck { fpp.parse(pm); errors.getCorruptObjects().addAll(fpp.getCorruptObjects()); - fpp.verifyIndex(fpp.getSortedObjectList(null), pack.getPackIndex(ctx)); + fpp.verifyIndex(pack.getPackIndex(ctx)); } private void checkConnectivity(ProgressMonitor pm, FsckError errors)