From 84afea9179932995d1e59f8fda4e6b11217382ad Mon Sep 17 00:00:00 2001 From: Gustaf Lundh Date: Fri, 1 Feb 2013 13:20:31 +0100 Subject: [PATCH] Performance fixes in DateRevQueue When a lot of commits are added to DateRevQueue, the sort-on-insertion approach is very heavy on CPU cycles. One approach to fix this was made by Dave Borowitz: https://git.eclipse.org/r/#/c/5491/ But using Java's PriorityQueue seems to have brought some extra overhead, and the desired performance could not be reached. This fix takes another approach to the insertion problem, without changing the expected behaviour or bringing extra memory overhead: If we detect over 1000 commits in the DateRevQueue, a "seek-index" is rebuilt every 1000th added commit. The index keeps track of every 100th commit in the DateRevQueue. During insertions, it will be used for a preliminary scanning (binary search) of the queue, with the intention of helping add() find a good starting point to start walking from. After finding this starting point, add() will step commit-by-commit until the correct insertion place in the queue is found (today, the queue is expected to be sorted at all times). When applied to repositories with many refs, this approach has proven to bring huge performance gains and scales quite well. For instance, in a repository with close to 80000 refs, we could cut down the time a typical Gerrit replication of 1 commit would take (just a push from JGit's point of view) from 32sec down to 3.5sec. Below you see some typical times to add a specific amount of commits (with random commit times) to the DateRevQueue and the difference the preliminary seek-index makes: Commits | Index | No Index 1024 8ms 8ms 2048 13ms 9ms 4096 5ms 59ms 8192 11ms 595ms 16384 22ms 3058ms 32768 64ms 13811ms 65536 201ms 62677ms 131072 783ms 331585ms Only one extra reference is needed for every 100 inserted commits (and only when we see more than 1000 commits in the queue), so the memory overhead should be negligible. Various index-stepping values were tested, and 100 seemed to scale very well and be effective from start. In the future, it should probably be dynamic and based on the number of refs in the queue, but this should serve well as a starting point. Note: While other fundamentally different data structures may be more suitable, the DateRevQueue is extremely central to many of the Git core operations. This approach was chosen, since the effect of the patch is easy to predict in conjuction with the current implementation. A totally new data structure will make it harder to predict behaviour in many common and uncommon cases (in terms of breaking ties, memory usage, cost when using few elements, object creation/disposing overhead, etc). Change-Id: Ie7b99f40eacf6324bfb4716d82073adeda64d10f --- .../eclipse/jgit/revwalk/DateRevQueue.java | 64 ++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/DateRevQueue.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/DateRevQueue.java index 35460a7c4..2c46f8eb4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/DateRevQueue.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/DateRevQueue.java @@ -1,5 +1,6 @@ /* - * Copyright (C) 2008, Shawn O. Pearce + * Copyright (C) 2008, Shawn O. Pearce , + * Copyright (C) 2013, Gustaf Lundh * and other copyright owners as documented in the project's IP log. * * This program and the accompanying materials are made available @@ -50,10 +51,22 @@ import org.eclipse.jgit.errors.MissingObjectException; /** A queue of commits sorted by commit time order. */ public class DateRevQueue extends AbstractRevQueue { + private static final int REBUILD_INDEX_COUNT = 1000; + private Entry head; private Entry free; + private int inQueue; + + private int sinceLastIndex; + + private Entry[] index; + + private int first; + + private int last = -1; + /** Create an empty date queue. */ public DateRevQueue() { super(); @@ -70,10 +83,36 @@ public class DateRevQueue extends AbstractRevQueue { } public void add(final RevCommit c) { + sinceLastIndex++; + if (++inQueue > REBUILD_INDEX_COUNT + && sinceLastIndex > REBUILD_INDEX_COUNT) + buildIndex(); + Entry q = head; final long when = c.commitTime; + + if (first <= last && index[first].commit.commitTime > when) { + int low = first, high = last; + while (low <= high) { + int mid = (low + high) >>> 1; + int t = index[mid].commit.commitTime; + if (t < when) + high = mid - 1; + else if (t > when) + low = mid + 1; + else { + low = mid - 1; + break; + } + } + low = Math.min(low, high); + while (low >= first && when == index[low].commit.commitTime) + --low; + q = index[low]; + } + final Entry n = newEntry(c); - if (q == null || when > q.commit.commitTime) { + if (q == null || (q == head && when > q.commit.commitTime)) { n.next = q; head = n; } else { @@ -91,11 +130,28 @@ public class DateRevQueue extends AbstractRevQueue { final Entry q = head; if (q == null) return null; + + if (index != null && q == index[first]) + index[first++] = null; + inQueue--; + head = q.next; freeEntry(q); return q.commit; } + private void buildIndex() { + sinceLastIndex = 0; + first = 0; + index = new Entry[inQueue / 100 + 1]; + int qi = 0, ii = 0; + for (Entry q = head; q != null; q = q.next) { + if (++qi % 100 == 0) + index[ii++] = q; + } + last = ii - 1; + } + /** * Peek at the next commit, without removing it. * @@ -108,6 +164,10 @@ public class DateRevQueue extends AbstractRevQueue { public void clear() { head = null; free = null; + index = null; + inQueue = 0; + sinceLastIndex = 0; + last = -1; } boolean everbodyHasFlag(final int f) {