From 6a3127b257ae275e43b91d3b733ac2129c0792c3 Mon Sep 17 00:00:00 2001 From: Terry Parker Date: Fri, 30 Oct 2015 17:47:06 -0700 Subject: [PATCH] [performance] Speed up delta packing When packing is able to reuse lots of deltas from existing packs, those objects are marked as "doNotAttemptDelta" and do not contribute to DeltaTask's computeTopPaths() "totalWeight" calculation. In the extreme case when all packs are reusable, "totalWeight" will be zero. DeltaTask.partitionTasks() uses "totalWeight" to determine a "weightPerThread" size it uses to set up DeltaTasks. When "totalWeight" is small, partitionTasks() ends up creating a DeltaTask for every unique path. For a large repository, the small "weightPerThread" can result in the creation of >100k tasks (for the MSM 3.10 Linux repository, the count was ~150k). This makes the "task stealing" mechanism in DeltaTask very inefficient, because every attempt to steal work does a linear walk through all tasks, searching for the one with the most work remaining, which is O(N^2) comparisons. For the MSM 3.10 repository when all deltas were reusable, PackWriter.parallelDeltaSearch() took (1615+1633+1458)/3 = 1568 seconds. The error is that DeltaTask treats the weights of objects marked as "doNotAttemptDelta" inconsistently. It ignores the weights when calculating "totalWeight" but uses them when partitioning the tasks. The fix is to also ignore them when partitioning the tasks. With this patch applied, PackWriter.parallelDeltaSearch() on the MSM 3.10 repository when all deltas are reused went from taking 1568 seconds to 62ms (>25k speedup). This patch also fixes a totalWeight initialization error in DeltaTask.computeTopPaths(). Change-Id: I2ae37efa83bca42b0e716266ae6aa9d182e76d9c Signed-off-by: Terry Parker --- .../jgit/internal/storage/pack/DeltaTask.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java index 9c84fb7ad..076df1880 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaTask.java @@ -145,7 +145,7 @@ final class DeltaTask implements Callable { } s = i = topPaths.get(nextTop++).slice.endIndex; } else { - w += list[i++].getWeight(); + w += getAdjustedWeight(list[i++]); } } @@ -180,8 +180,8 @@ final class DeltaTask implements Callable { threads); int cp = beginIndex; int ch = list[cp].getPathHash(); - long cw = list[cp].getWeight(); - totalWeight = list[cp].getWeight(); + long cw = getAdjustedWeight(list[cp]); + totalWeight = cw; for (int i = cp + 1; i < endIndex; i++) { ObjectToPack o = list[i]; @@ -206,11 +206,9 @@ final class DeltaTask implements Callable { ch = o.getPathHash(); cw = 0; } - if (o.isEdge() || o.doNotAttemptDelta()) { - continue; - } - cw += o.getWeight(); - totalWeight += o.getWeight(); + int weight = getAdjustedWeight(o); + cw += weight; + totalWeight += weight; } // Sort by starting index to identify gaps later. @@ -228,6 +226,15 @@ final class DeltaTask implements Callable { } } + private static int getAdjustedWeight(ObjectToPack o) { + // Edge objects and those with reused deltas do not need to be + // compressed. For compression calculations, ignore their weights. + if (o.isEdge() || o.doNotAttemptDelta()) { + return 0; + } + return o.getWeight(); + } + static final class WeightedPath implements Comparable { final long weight; final Slice slice;