From 61d492292853247cb7f26344c0d457f52f284a6a Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 7 Feb 2017 21:00:30 -0800 Subject: [PATCH] Fix missing deltas near type boundaries Delta search was discarding discovered deltas if an object appeared near a type boundary in the delta search window. This has caused JGit to produce larger pack files than other implementations of the packing algorithm. Delta search works by pushing prior objects into a search window, an ordered list of objects to attempt to delta compress the next object against. (The window size is bounded, avoiding O(N^2) behavior.) For implementation reasons multiple object types can appear in the input list, and the window. PackWriter commonly passes both trees and blobs in the input list handed to the DeltaWindow algorithm. The pack file format requires an object to only delta compress against the same type, so the DeltaWindow algorithm must stop doing comparisions if a blob would be compared to a tree. Because the input list is sorted by object type and the window is recently considered prior objects, once a wrong type is discovered in the window the search algorithm stops and uses the current result. Unfortunately the termination condition was discarding any found delta by setting deltaBase and deltaBuf to null when it was trying to break the window search. When this bug occurs, the state of the DeltaWindow looks like this: current | \ / input list: tree0 tree1 blob1 blob2 window: blob1 tree1 tree0 / \ | res.prev As the loop iterates to the right across the window, it first finds that blob1 is a suitable delta base for blob2, and temporarily holds this in the bestDelta/deltaBuf fields. It then considers tree1, but tree1 has the wrong type (blob != tree), so the window loop must give up and fall through the remaining code. Moving the condition up and discarding the window contents allows the bestDelta/deltaBuf to be kept, letting the final file delta compress blob1 against blob0. The impact of this bug (and its fix) on real world repositories is likely minimal. The boundary from blob to tree happens approximately once in the search, as the input list is sorted by type. Only the first window size worth of blobs (e.g. 10 or 250) were failing to produce a delta in the final file. This bug fix does produce significantly different results for small test repositories created in the unit test suite, such as when a pack may contains 6 objects (2 commits, 2 trees, 2 blobs). Packing test cases can now better sample different output pack file sizes depending on delta compression and object reuse flags in PackConfig. Change-Id: Ibec09398d0305d4dbc0c66fce1daaf38eb71148f --- .../internal/storage/file/PackWriterTest.java | 28 +++++++++++-------- .../internal/storage/pack/DeltaWindow.java | 16 +++++++---- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java index 2e5795248..e4dfe7587 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/PackWriterTest.java @@ -364,8 +364,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"), ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), - ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"), - ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") }; + ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") , + ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") }; try (final RevWalk parser = new RevWalk(db)) { final RevObject forcedOrderRevs[] = new RevObject[forcedOrder.length]; for (int i = 0; i < forcedOrder.length; i++) @@ -412,6 +412,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { */ @Test public void testWritePack2SizeDeltasVsNoDeltas() throws Exception { + config.setReuseDeltas(false); + config.setDeltaCompress(false); testWritePack2(); final long sizePack2NoDeltas = os.size(); tearDown(); @@ -739,8 +741,8 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), ObjectId.fromString("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), - ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"), - ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") }; + ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3"), + ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") }; assertEquals(expectedOrder.length, writer.getObjectCount()); verifyObjectsOrder(expectedOrder); @@ -763,13 +765,11 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { ObjectId.fromString("c59759f143fb1fe21c197981df75a7ee00290799"), ObjectId.fromString("aabf2ffaec9b497f0950352b3e582d73035c2035"), ObjectId.fromString("902d5476fa249b7abc9d84c611577a81381f0327"), - ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259"), - ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") }; - if (deltaReuse) { - // objects order influenced (swapped) by delta-base first rule - ObjectId temp = expectedOrder[4]; - expectedOrder[4] = expectedOrder[5]; - expectedOrder[5] = temp; + ObjectId.fromString("6ff87c4664981e4397625791c8ea3bbb5f2279a3") , + ObjectId.fromString("5b6e7c66c276e7610d4a73c70ec1a1f7c1003259") }; + if (!config.isReuseDeltas() && !config.isDeltaCompress()) { + // If no deltas are in the file the final two entries swap places. + swap(expectedOrder, 4, 5); } assertEquals(expectedOrder.length, writer.getObjectCount()); verifyObjectsOrder(expectedOrder); @@ -777,6 +777,12 @@ public class PackWriterTest extends SampleDataRepositoryTestCase { .computeName().name()); } + private static void swap(ObjectId[] arr, int a, int b) { + ObjectId tmp = arr[a]; + arr[a] = arr[b]; + arr[b] = tmp; + } + private void writeVerifyPack4(final boolean thin) throws IOException { final HashSet interestings = new HashSet(); interestings.add(ObjectId diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java index 19d06a23f..73b285afc 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaWindow.java @@ -160,6 +160,7 @@ final class DeltaWindow { clear(n); } res.set(next); + clearWindowOnTypeSwitch(); if (res.object.isEdge() || res.object.doNotAttemptDelta()) { // We don't actually want to make a delta for @@ -194,6 +195,15 @@ final class DeltaWindow { return DeltaIndex.estimateIndexSize(len) - len; } + private void clearWindowOnTypeSwitch() { + DeltaWindowEntry p = res.prev; + if (!p.empty() && res.type() != p.type()) { + for (; p != res; p = p.prev) { + clear(p); + } + } + } + private void clear(DeltaWindowEntry ent) { if (ent.index != null) loaded -= ent.index.getIndexSize(); @@ -258,12 +268,6 @@ final class DeltaWindow { private boolean delta(final DeltaWindowEntry src) throws IOException { - // Objects must use only the same type as their delta base. - if (src.type() != res.type()) { - keepInWindow(); - return NEXT_RES; - } - // If the sizes are radically different, this is a bad pairing. if (res.size() < src.size() >>> 4) return NEXT_SRC;