Browse Source

PackBitmapIndex: Reduce memory usage in GC

Currently, the garbage collection is consistently failing for some large
repositories in the building bitmap phase, e.g.Linux-MSM project:
https://source.codeaurora.org/quic/la/kernel/msm-3.18

Historically, bitmap index creation happened in 3 phases:
1. Select the commits to which bitmaps should be attached.
2. Create all bitmaps for these commits, stored in uncompressed format
in the PackBitmapIndexBuilder.
3. Deltify the bitmaps and write them to disk.

We investigated the process. For phase 2 it's most efficient to create
bitmaps starting with oldest commit and moving to the newest commit,
because the newer commits are able to reuse the work for the old ones.
But for bitmap deltification in phase 3, it's better when a newer
commit's bitmap is the base, and the current disk format writes bitmaps
out for the newest commits first.

This change introduces a new collection to hold the deltified and
compressed representations of the bitmaps, keeping a smaller subset of
commits in the PackBitmapIndexBuilder to help make the bitmap index
creation more memory efficient.

And in this commit, we're setting DISTANCE_THRESHOLD to 0 in the
PackWriterBitmapPreparer, which means the garbage collection will not
have much behavoir change and will still use as much memory as before.

Change-Id: I6ec2c3e8dde11805af47874d67d33cf1ef83660e
Signed-off-by: Yunjie Li <yunjieli@google.com>
master
Yunjie Li 5 years ago
parent
commit
dcb0265436
  1. 144
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java
  2. 10
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java
  3. 18
      org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java
  4. 33
      org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java

144
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java

@ -11,12 +11,13 @@
package org.eclipse.jgit.internal.storage.file;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.internal.storage.pack.BitmapCommit;
import org.eclipse.jgit.internal.storage.pack.ObjectToPack;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.BitmapIndex.Bitmap;
@ -39,8 +40,12 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex {
private final EWAHCompressedBitmap blobs;
private final EWAHCompressedBitmap tags;
private final BlockList<PositionEntry> byOffset;
final BlockList<StoredBitmap>
byAddOrder = new BlockList<>();
private final LinkedList<StoredBitmap>
bitmapsToWriteXorBuffer = new LinkedList<>();
private List<StoredEntry> bitmapsToWrite = new ArrayList<>();
final ObjectIdOwnerMap<PositionEntry>
positionEntries = new ObjectIdOwnerMap<>();
@ -135,6 +140,63 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex {
addBitmap(objectId, bitmap.retrieveCompressed(), flags);
}
/**
* Processes a commit and prepares its bitmap to write to the bitmap index
* file.
*
* @param c
* the commit corresponds to the bitmap.
* @param bitmap
* the bitmap to be written.
* @param flags
* the flags of the commit.
*/
public void processBitmapForWrite(BitmapCommit c, Bitmap bitmap,
int flags) {
EWAHCompressedBitmap compressed = bitmap.retrieveCompressed();
compressed.trim();
StoredBitmap newest = new StoredBitmap(c, compressed, null, flags);
bitmapsToWriteXorBuffer.add(newest);
if (bitmapsToWriteXorBuffer.size() > MAX_XOR_OFFSET_SEARCH) {
bitmapsToWrite.add(
generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst()));
}
if (c.isAddToIndex()) {
// The Bitmap map in the base class is used to make revwalks
// efficient, so only add bitmaps that keep it efficient without
// bloating memory.
addBitmap(c, bitmap, flags);
}
}
private StoredEntry generateStoredEntry(StoredBitmap bitmapToWrite) {
int bestXorOffset = 0;
EWAHCompressedBitmap bestBitmap = bitmapToWrite.getBitmap();
int offset = 1;
for (StoredBitmap curr : bitmapsToWriteXorBuffer) {
EWAHCompressedBitmap bitmap = curr.getBitmap()
.xor(bitmapToWrite.getBitmap());
if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) {
bestBitmap = bitmap;
bestXorOffset = offset;
}
offset++;
}
PositionEntry entry = positionEntries.get(bitmapToWrite);
if (entry == null) {
throw new IllegalStateException();
}
bestBitmap.trim();
StoredEntry result = new StoredEntry(entry.namePosition, bestBitmap,
bestXorOffset, bitmapToWrite.getFlags());
return result;
}
/**
* Stores the bitmap for the objectId.
*
@ -150,7 +212,6 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex {
bitmap.trim();
StoredBitmap result = new StoredBitmap(objectId, bitmap, null, flags);
getBitmaps().add(result);
byAddOrder.add(result);
}
/** {@inheritDoc} */
@ -236,15 +297,18 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex {
/** {@inheritDoc} */
@Override
public int getBitmapCount() {
return getBitmaps().size();
return bitmapsToWriteXorBuffer.size() + bitmapsToWrite.size();
}
/**
* Remove all the bitmaps entries added.
*
* @param size
* the expected number of bitmap entries to be written.
*/
public void clearBitmaps() {
byAddOrder.clear();
public void resetBitmaps(int size) {
getBitmaps().clear();
bitmapsToWrite = new ArrayList<>(size);
}
/** {@inheritDoc} */
@ -254,64 +318,18 @@ public class PackBitmapIndexBuilder extends BasePackBitmapIndex {
}
/**
* Get an iterator over the xor compressed entries.
* Get list of xor compressed entries that need to be written.
*
* @return an iterator over the xor compressed entries.
* @return a list of the xor compressed entries.
*/
public Iterable<StoredEntry> getCompressedBitmaps() {
// Add order is from oldest to newest. The reverse add order is the
// output order.
return () -> new Iterator<StoredEntry>() {
private int index = byAddOrder.size() - 1;
@Override
public boolean hasNext() {
return index >= 0;
}
@Override
public StoredEntry next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
StoredBitmap item = byAddOrder.get(index);
int bestXorOffset = 0;
EWAHCompressedBitmap bestBitmap = item.getBitmap();
// Attempt to compress the bitmap with an XOR of the
// previously written entries.
for (int i = 1; i <= MAX_XOR_OFFSET_SEARCH; i++) {
int curr = i + index;
if (curr >= byAddOrder.size()) {
break;
}
StoredBitmap other = byAddOrder.get(curr);
EWAHCompressedBitmap bitmap = other.getBitmap()
.xor(item.getBitmap());
if (bitmap.sizeInBytes() < bestBitmap.sizeInBytes()) {
bestBitmap = bitmap;
bestXorOffset = i;
}
}
index--;
PositionEntry entry = positionEntries.get(item);
if (entry == null) {
throw new IllegalStateException();
}
bestBitmap.trim();
return new StoredEntry(entry.namePosition, bestBitmap,
bestXorOffset, item.getFlags());
}
public List<StoredEntry> getCompressedBitmaps() {
while (!bitmapsToWriteXorBuffer.isEmpty()) {
bitmapsToWrite.add(
generateStoredEntry(bitmapsToWriteXorBuffer.pollFirst()));
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
Collections.reverse(bitmapsToWrite);
return bitmapsToWrite;
}
/** Data object for the on disk representation of a bitmap entry. */

10
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java

@ -2331,8 +2331,14 @@ public class PackWriter implements AutoCloseable {
throw new IllegalStateException(MessageFormat.format(
JGitText.get().bitmapMissingObject, cmit.name(),
last.name()));
last = cmit;
writeBitmaps.addBitmap(cmit, bitmap.build(), cmit.getFlags());
last = BitmapCommit.copyFrom(cmit).build();
writeBitmaps.processBitmapForWrite(cmit, bitmap.build(),
cmit.getFlags());
// The bitmap walker should stop when the walk hits the previous
// commit, which saves time.
walker.setPrevCommit(last);
walker.setPrevBitmap(bitmap);
pm.update(1);
}

18
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java

@ -58,6 +58,8 @@ class PackWriterBitmapPreparer {
private static final int DAY_IN_SECONDS = 24 * 60 * 60;
private static final int DISTANCE_THRESHOLD = 0;
private static final Comparator<RevCommit> ORDER_BY_REVERSE_TIMESTAMP = (
RevCommit a, RevCommit b) -> Integer
.signum(b.getCommitTime() - a.getCommitTime());
@ -244,6 +246,7 @@ class PackWriterBitmapPreparer {
// This commit is selected.
// Calculate where to look for the next one.
int flags = nextFlg;
int currDist = distanceFromTip;
nextIn = nextSpan(distanceFromTip);
nextFlg = nextIn == distantCommitSpan
? PackBitmapIndex.FLAG_REUSE
@ -279,8 +282,17 @@ class PackWriterBitmapPreparer {
longestAncestorChain = new ArrayList<>();
chains.add(longestAncestorChain);
}
longestAncestorChain.add(new BitmapCommit(c,
!longestAncestorChain.isEmpty(), flags));
// The commit bc should reuse bitmap walker from its close
// ancestor. And the bitmap of bc should only be added to
// PackBitmapIndexBuilder when it's an old enough
// commit,i.e. distance from tip should be greater than
// DISTANCE_THRESHOLD, to save memory.
BitmapCommit bc = BitmapCommit.newBuilder(c).setFlags(flags)
.setAddToIndex(currDist >= DISTANCE_THRESHOLD)
.setReuseWalker(!longestAncestorChain.isEmpty())
.build();
longestAncestorChain.add(bc);
writeBitmaps.addBitmap(c, bitmap, 0);
}
@ -288,12 +300,12 @@ class PackWriterBitmapPreparer {
selections.addAll(chain);
}
}
writeBitmaps.clearBitmaps(); // Remove the temporary commit bitmaps.
// Add the remaining peeledWant
for (AnyObjectId remainingWant : selectionHelper.newWants) {
selections.add(new BitmapCommit(remainingWant, false, 0));
}
writeBitmaps.resetBitmaps(selections.size()); // Remove the temporary commit bitmaps.
pm.endTask();
return selections;

33
org.eclipse.jgit/src/org/eclipse/jgit/revwalk/BitmapWalker.java

@ -16,6 +16,7 @@ import java.util.Arrays;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.internal.revwalk.AddToBitmapFilter;
import org.eclipse.jgit.internal.revwalk.AddToBitmapWithCacheFilter;
import org.eclipse.jgit.internal.revwalk.AddUnseenToBitmapFilter;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.BitmapIndex;
@ -41,6 +42,11 @@ public final class BitmapWalker {
private long countOfBitmapIndexMisses;
// Cached bitmap and commit to save walk time.
private AnyObjectId prevCommit;
private Bitmap prevBitmap;
/**
* Create a BitmapWalker.
*
@ -55,6 +61,28 @@ public final class BitmapWalker {
this.pm = (pm == null) ? NullProgressMonitor.INSTANCE : pm;
}
/**
* Set the cached commit for the walker.
*
* @param prevCommit
* the cached commit.
* @since 5.7
*/
public void setPrevCommit(AnyObjectId prevCommit) {
this.prevCommit = prevCommit;
}
/**
* Set the bitmap associated with the cached commit for the walker.
*
* @param prevBitmap
* the bitmap associated with the cached commit.
* @since 5.7
*/
public void setPrevBitmap(Bitmap prevBitmap) {
this.prevBitmap = prevBitmap;
}
/**
* Return the number of objects that had to be walked because they were not covered by a
* bitmap.
@ -169,7 +197,10 @@ public final class BitmapWalker {
}
if (marked) {
if (seen == null) {
if (prevCommit != null) {
walker.setRevFilter(new AddToBitmapWithCacheFilter(prevCommit,
prevBitmap, bitmapResult));
} else if (seen == null) {
walker.setRevFilter(new AddToBitmapFilter(bitmapResult));
} else {
walker.setRevFilter(

Loading…
Cancel
Save