From c20daa73146a3c385f4fed237708c4a7d28d8745 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 8 Jul 2010 17:11:38 -0700 Subject: [PATCH] Add path hash code to ObjectWalk PackWriter wants to categorize objects that are similar in path name, so blobs that are probably from the same file (or same sort of file) can be delta compressed against each other. Avoid converting into a string by performing the hashing directly against the path buffer in the tree iterator. We only hash the last 16 bytes of the path, and we try avoid any spaces, as we want the suffix of a file such as ".java" to be more important than the directory it is in, like "src". Change-Id: I31770ee711526306769a6f534afb19f937e0ba85 Signed-off-by: Shawn O. Pearce --- .../org/eclipse/jgit/revwalk/ObjectWalk.java | 12 ++++++++++++ .../jgit/treewalk/AbstractTreeIterator.java | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java index 9393e2d17..a6ecfe219 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/revwalk/ObjectWalk.java @@ -384,6 +384,18 @@ public class ObjectWalk extends RevWalk { return last != null ? treeWalk.getEntryPathString() : null; } + /** + * Get the current object's path hash code. + *

+ * This method computes a hash code on the fly for this path, the hash is + * suitable to cluster objects that may have similar paths together. + * + * @return path hash code; any integer may be returned. + */ + public int getPathHashCode() { + return last != null ? treeWalk.getEntryPathHashCode() : 0; + } + @Override public void dispose() { super.dispose(); diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java index e74f13e85..23773862a 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/AbstractTreeIterator.java @@ -402,6 +402,24 @@ public abstract class AbstractTreeIterator { return TreeWalk.pathOf(this); } + /** + * Get the current entry's path hash code. + *

+ * This method computes a hash code on the fly for this path, the hash is + * suitable to cluster objects that may have similar paths together. + * + * @return path hash code; any integer may be returned. + */ + public int getEntryPathHashCode() { + int hash = 0; + for (int i = Math.max(0, pathLen - 16); i < pathLen; i++) { + byte c = path[i]; + if (c != ' ') + hash = (hash >>> 2) + (c << 24); + } + return hash; + } + /** * Get the byte array buffer object IDs must be copied out of. *