From 6cc532a43cf28403cb623d3df8600a2542a40a43 Mon Sep 17 00:00:00 2001 From: Colby Ranger Date: Fri, 28 Jun 2013 10:37:56 -0700 Subject: [PATCH] Use a bucket sort for PackReverseIndex. Previously it took 1200ms to create a reverse index (sorted by offset). Using a simple bucket sort algorithm, that time is reduced to 450ms. The bucket index into the offset array is kept, in order to decrease the binary search window. Don't keep a copy of the offsets. Instead, use nth position to lookup the offset in the PackIndex. Change-Id: If51ab76752622e04a4430d9a14db95ad02f5329d --- .../storage/file/PackReverseIndex.java | 173 +++++++++--------- 1 file changed, 87 insertions(+), 86 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java index c5aa5d3cc..fe9f70092 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/PackReverseIndex.java @@ -44,7 +44,6 @@ package org.eclipse.jgit.internal.storage.file; import java.text.MessageFormat; -import java.util.Arrays; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.internal.JGitText; @@ -65,21 +64,22 @@ public class PackReverseIndex { /** Index we were created from, and that has our ObjectId data. */ private final PackIndex index; - /** - * (offset31, truly) Offsets accommodating in 31 bits. - */ - private final int offsets32[]; + /** The number of bytes per entry in the offsetIndex. */ + private final long bucketSize; /** - * Offsets not accommodating in 31 bits. + * An index into the nth mapping, where the value is the position after the + * the last index that contains the values of the bucket. For example given + * offset o (and bucket = o / bucketSize), the offset will be contained in + * the range nth[offsetIndex[bucket - 1]] inclusive to + * nth[offsetIndex[bucket]] exclusive. + * + * See {@link #binarySearch} */ - private final long offsets64[]; + private final int[] offsetIndex; - /** Position of the corresponding {@link #offsets32} in {@link #index}. */ - private final int nth32[]; - - /** Position of the corresponding {@link #offsets64} in {@link #index}. */ - private final int nth64[]; + /** Mapping from indices in offset order to indices in SHA-1 order. */ + private final int[] nth; /** * Create reverse index from straight/forward pack index, by indexing all @@ -92,38 +92,58 @@ public class PackReverseIndex { index = packIndex; final long cnt = index.getObjectCount(); - final long n64 = index.getOffset64Count(); - final long n32 = cnt - n64; - if (n32 > Integer.MAX_VALUE || n64 > Integer.MAX_VALUE - || cnt > 0xffffffffL) + if (cnt + 1 > Integer.MAX_VALUE) throw new IllegalArgumentException( JGitText.get().hugeIndexesAreNotSupportedByJgitYet); - offsets32 = new int[(int) n32]; - offsets64 = new long[(int) n64]; - nth32 = new int[offsets32.length]; - nth64 = new int[offsets64.length]; + if (cnt == 0) { + bucketSize = Long.MAX_VALUE; + offsetIndex = new int[1]; + nth = new int[0]; + return; + } - int i32 = 0; - int i64 = 0; + final long[] offsetsBySha1 = new long[(int) cnt]; + + long maxOffset = 0; + int ith = 0; for (final MutableEntry me : index) { final long o = me.getOffset(); - if (o <= Integer.MAX_VALUE) - offsets32[i32++] = (int) o; - else - offsets64[i64++] = o; + offsetsBySha1[ith++] = o; + if (o > maxOffset) + maxOffset = o; } - Arrays.sort(offsets32); - Arrays.sort(offsets64); + bucketSize = maxOffset / cnt + 1; + int[] bucketIndex = new int[(int) cnt]; + int[] bucketValues = new int[(int) cnt + 1]; + for (int oi = 0; oi < offsetsBySha1.length; oi++) { + final long o = offsetsBySha1[oi]; + final int bucket = (int) (o / bucketSize); + final int bucketValuesPos = oi + 1; + final int current = bucketIndex[bucket]; + bucketIndex[bucket] = bucketValuesPos; + bucketValues[bucketValuesPos] = current; + } - int nth = 0; - for (final MutableEntry me : index) { - final long o = me.getOffset(); - if (o <= Integer.MAX_VALUE) - nth32[Arrays.binarySearch(offsets32, (int) o)] = nth++; - else - nth64[Arrays.binarySearch(offsets64, o)] = nth++; + int nthByOffset = 0; + nth = new int[offsetsBySha1.length]; + offsetIndex = bucketIndex; // Reuse the allocation + for (int bi = 0; bi < bucketIndex.length; bi++) { + final int start = nthByOffset; + // Insertion sort of the values in the bucket. + for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) { + final int nthBySha1 = vi - 1; + final long o = offsetsBySha1[nthBySha1]; + int insertion = nthByOffset++; + for (; start < insertion; insertion--) { + if (o > offsetsBySha1[nth[insertion - 1]]) + break; + nth[insertion] = nth[insertion - 1]; + } + nth[insertion] = nthBySha1; + } + offsetIndex[bi] = nthByOffset; } } @@ -136,17 +156,10 @@ public class PackReverseIndex { * @return object id for this offset, or null if no object was found. */ public ObjectId findObject(final long offset) { - if (offset <= Integer.MAX_VALUE) { - final int i32 = Arrays.binarySearch(offsets32, (int) offset); - if (i32 < 0) - return null; - return index.getObjectId(nth32[i32]); - } else { - final int i64 = Arrays.binarySearch(offsets64, offset); - if (i64 < 0) - return null; - return index.getObjectId(nth64[i64]); - } + final int ith = binarySearch(offset); + if (ith < 0) + return null; + return index.getObjectId(nth[ith]); } /** @@ -166,52 +179,40 @@ public class PackReverseIndex { */ public long findNextOffset(final long offset, final long maxOffset) throws CorruptObjectException { - if (offset <= Integer.MAX_VALUE) { - final int i32 = Arrays.binarySearch(offsets32, (int) offset); - if (i32 < 0) - throw new CorruptObjectException( - MessageFormat.format( - JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, - Long.valueOf(offset))); - - if (i32 + 1 == offsets32.length) { - if (offsets64.length > 0) - return offsets64[0]; - return maxOffset; - } - return offsets32[i32 + 1]; - } else { - final int i64 = Arrays.binarySearch(offsets64, offset); - if (i64 < 0) - throw new CorruptObjectException( - MessageFormat.format( - JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, - Long.valueOf(offset))); - - if (i64 + 1 == offsets64.length) - return maxOffset; - return offsets64[i64 + 1]; - } + final int ith = binarySearch(offset); + if (ith < 0) + throw new CorruptObjectException( + MessageFormat.format( + JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, + Long.valueOf(offset))); + + if (ith + 1 == nth.length) + return maxOffset; + return index.getOffset(nth[ith + 1]); } int findPostion(long offset) { - if (offset <= Integer.MAX_VALUE) { - final int i32 = Arrays.binarySearch(offsets32, (int) offset); - if (i32 < 0) - return -1; - return i32; - } else { - final int i64 = Arrays.binarySearch(offsets64, offset); - if (i64 < 0) - return -1; - return nth32.length + i64; + return binarySearch(offset); + } + + private int binarySearch(final long offset) { + int bucket = (int) (offset / bucketSize); + int low = bucket == 0 ? 0 : offsetIndex[bucket - 1]; + int high = offsetIndex[bucket]; + while (low < high) { + final int mid = (low + high) >>> 1; + final long o = index.getOffset(nth[mid]); + if (offset < o) + high = mid; + else if (offset == o) + return mid; + else + low = mid + 1; } + return -1; } ObjectId findObjectByPosition(int nthPosition) { - if (nthPosition < nth32.length) - return index.getObjectId(nth32[nthPosition]); - final int i64 = nthPosition - nth32.length; - return index.getObjectId(nth64[i64]); + return index.getObjectId(nth[nthPosition]); } }