diff --git a/fine-jgit/lib/JavaEWAH-0.7.9.jar b/fine-jgit/lib/JavaEWAH-0.7.9.jar deleted file mode 100644 index ccdb37143..000000000 Binary files a/fine-jgit/lib/JavaEWAH-0.7.9.jar and /dev/null differ diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java index ef00dc7a1..b5ee537ef 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java @@ -45,7 +45,7 @@ package com.fr.third.eclipse.jgit.internal.storage.file; import com.fr.third.eclipse.jgit.lib.AnyObjectId; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; /** * Base implementation of the PackBitmapIndex. diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java index f86208d35..26633b626 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java @@ -43,7 +43,7 @@ package com.fr.third.eclipse.jgit.internal.storage.file; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import java.util.Arrays; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java index ebd829334..856fcbccb 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java @@ -51,8 +51,8 @@ import com.fr.third.eclipse.jgit.lib.Constants; import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.util.BlockList; -import com.googlecode.javaewah.EWAHCompressedBitmap; -import com.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.IntIterator; import java.text.MessageFormat; import java.util.Iterator; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java index 2e856451b..fec34f17e 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java @@ -181,8 +181,9 @@ public class GC { * @param newPacks */ private void deleteOldPacks(Collection oldPacks, - Collection newPacks) { - oldPackLoop: for (PackFile oldPack : oldPacks) { + Collection newPacks) { + oldPackLoop: + for (PackFile oldPack : oldPacks) { String oldName = oldPack.getPackName(); // check whether an old pack file is also among the list of new // pack files. Then we must not delete it. diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java index 096be5f33..ac1705f43 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java @@ -43,8 +43,8 @@ package com.fr.third.eclipse.jgit.internal.storage.file; -import com.googlecode.javaewah.EWAHCompressedBitmap; -import com.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.IntIterator; /** * A wrapper around the EWAHCompressedBitmap optimized for the contains diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java index 48c3c681f..ef5c5d15e 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java @@ -47,7 +47,7 @@ import com.fr.third.eclipse.jgit.errors.CorruptObjectException; import com.fr.third.eclipse.jgit.internal.JGitText; import com.fr.third.eclipse.jgit.lib.AnyObjectId; import com.fr.third.eclipse.jgit.lib.ObjectId; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import java.io.File; import java.io.FileInputStream; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java index fec58ad7a..aa6c9f3c3 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java @@ -50,7 +50,7 @@ import com.fr.third.eclipse.jgit.lib.Constants; import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.util.BlockList; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap; import com.fr.third.eclipse.jgit.lib.BitmapIndex.Bitmap; import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java index c896a5db5..20d5c79f6 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java @@ -47,8 +47,8 @@ import com.fr.third.eclipse.jgit.lib.AnyObjectId; import com.fr.third.eclipse.jgit.lib.BitmapIndex; import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; -import com.googlecode.javaewah.EWAHCompressedBitmap; -import com.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.IntIterator; import com.fr.third.eclipse.jgit.internal.storage.file.BasePackBitmapIndex.StoredBitmap; import java.util.Collections; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java index f103996d7..b099c004c 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java @@ -50,7 +50,7 @@ import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.util.IO; import com.fr.third.eclipse.jgit.util.NB; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import java.io.DataInput; import java.io.IOException; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java index 70c5d0457..13552f8f1 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java @@ -46,7 +46,7 @@ package com.fr.third.eclipse.jgit.internal.storage.file; import com.fr.third.eclipse.jgit.internal.JGitText; import com.fr.third.eclipse.jgit.lib.Constants; import com.fr.third.eclipse.jgit.util.io.SafeBufferedOutputStream; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder.StoredEntry; import java.io.BufferedOutputStream; diff --git a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java index 6f3d1fcd4..534d9c890 100755 --- a/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java +++ b/fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java @@ -60,7 +60,7 @@ import com.fr.third.eclipse.jgit.revwalk.RevCommit; import com.fr.third.eclipse.jgit.revwalk.RevObject; import com.fr.third.eclipse.jgit.revwalk.RevWalk; import com.fr.third.eclipse.jgit.util.BlockList; -import com.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; import java.io.IOException; diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/BitCounter.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/BitCounter.java new file mode 100644 index 000000000..85df35fe5 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/BitCounter.java @@ -0,0 +1,106 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * BitCounter is a fake bitset data structure. Instead of storing the actual + * data, it only records the number of set bits. + * + * @since 0.4.0 + * @author David McIntosh + */ + +public final class BitCounter implements BitmapStorage { + + /** + * Virtually add words directly to the bitmap + * + * @param newdata + * the word + */ + @Override + public void add(final long newdata) { + this.oneBits += Long.bitCount(newdata); + return; + } + + /** + * virtually add several literal words. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + @Override + public void addStreamOfLiteralWords(long[] data, int start, int number) { + for (int i = start; i < start + number; i++) { + add(data[i]); + } + return; + } + + /** + * virtually add many zeroes or ones. + * + * @param v + * zeros or ones + * @param number + * how many to words add + */ + @Override +public void addStreamOfEmptyWords(boolean v, long number) { + if (v) { + this.oneBits += number * EWAHCompressedBitmap.wordinbits; + } + return; + } + + /** + * virtually add several negated literal words. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + // @Override : causes problems with Java 1.5 + @Override +public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { + for (int i = start; i < start + number; i++) { + add(~data[i]); + } + return; + } + + /** + * As you act on this class, it records the number of set (true) bits. + * + * @return number of set bits + */ + public int getCount() { + return this.oneBits; + } + + /** + * should directly set the sizeinbits field, but is effectively ignored in + * this class. + * + * @param bits + * number of bits + */ + // @Override : causes problems with Java 1.5 + @Override +public void setSizeInBits(int bits) { + // no action + } + + private int oneBits; + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/BitmapStorage.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/BitmapStorage.java new file mode 100644 index 000000000..929b2b8b3 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/BitmapStorage.java @@ -0,0 +1,71 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Low level bitset writing methods. + * + * @since 0.4.0 + * @author David McIntosh + */ +public interface BitmapStorage { + + /** + * Adding words directly to the bitmap (for expert use). + * + * This is normally how you add data to the array. So you add bits in streams + * of 8*8 bits. + * + * @param newdata + * the word + */ + public void add(final long newdata); + + /** + * if you have several literal words to copy over, this might be faster. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + public void addStreamOfLiteralWords(final long[] data, final int start, + final int number); + + /** + * For experts: You want to add many zeroes or ones? This is the method you + * use. + * + * @param v + * zeros or ones + * @param number + * how many to words add + */ + public void addStreamOfEmptyWords(final boolean v, final long number); + + /** + * Like "addStreamOfLiteralWords" but negates the words being added. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + public void addStreamOfNegatedLiteralWords(long[] data, final int start, + final int number); + + /** + * directly set the sizeinbits field + * + * @param bits + * number of bits + */ + public void setSizeInBits(final int bits); +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedIterator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedIterator.java new file mode 100644 index 000000000..bf0e0c94a --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedIterator.java @@ -0,0 +1,151 @@ +package com.fr.third.googlecode.javaewah; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * This class can be used to iterate over blocks of bitmap data. + * + * @author Daniel Lemire + * + */ +public class BufferedIterator implements IteratingRLW { + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public BufferedIterator(final CloneableIterator iterator) { + this.masteriterator = iterator; + if(this.masteriterator.hasNext()) { + this.iterator = this.masteriterator.next(); + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + } + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(long x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iterator.hasNext()) { + if(!reload()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + private boolean reload() { + if(!this.masteriterator.hasNext()) { + return false; + } + this.iterator = this.masteriterator.next(); + this.buffer = this.iterator.buffer(); + return true; + } + + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override + public long getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public long getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public long size() { + return this.brlw.size(); + } + + + @Override + public BufferedIterator clone() throws CloneNotSupportedException { + BufferedIterator answer = (BufferedIterator) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + answer.masteriterator = this.masteriterator.clone(); + return answer; + } + + private BufferedRunningLengthWord brlw; + private long[] buffer; + private int literalWordStartPosition; + private EWAHIterator iterator; + private CloneableIterator masteriterator; + } \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedRunningLengthWord.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedRunningLengthWord.java new file mode 100644 index 000000000..82afd2014 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedRunningLengthWord.java @@ -0,0 +1,175 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + + + +/** + * Mostly for internal use. Similar to RunningLengthWord, but can + * be modified without access to the array, and has faster access. + * + * @author Daniel Lemire + * @since 0.1.0 + * + */ +public final class BufferedRunningLengthWord implements Cloneable { + + /** + * Instantiates a new buffered running length word. + * + * @param a the word + */ + public BufferedRunningLengthWord(final long a) { + this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); + } + + /** + * Instantiates a new buffered running length word. + * + * @param rlw the rlw + */ + public BufferedRunningLengthWord(final RunningLengthWord rlw) { + this(rlw.parent.buffer[rlw.position]); + } + + /** + * Discard first words. + * + * @param x the x + */ + public void discardFirstWords(long x) { + if (this.RunningLength >= x) { + this.RunningLength -= x; + return; + } + x -= this.RunningLength; + this.RunningLength = 0; + this.literalwordoffset += x; + this.NumberOfLiteralWords -= x; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return this.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return this.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public long getRunningLength() { + return this.RunningLength; + } + + /** + * Reset the values using the provided word. + * + * @param a the word + */ + public void reset(final long a) { + this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); + this.literalwordoffset = 0; + } + + /** + * Reset the values of this running length word so that it has the same values + * as the other running length word. + * + * @param rlw the other running length word + */ + public void reset(final RunningLengthWord rlw) { + reset(rlw.parent.buffer[rlw.position]); + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.NumberOfLiteralWords = number; + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + this.RunningBit = b; + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final long number) { + this.RunningLength = number; + } + + /** + * Size in uncompressed words. + * + * @return the long + */ + public long size() { + return this.RunningLength + this.NumberOfLiteralWords; + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + " running length = " + + getRunningLength() + " number of lit. words " + + getNumberOfLiteralWords(); + } + + @Override +public BufferedRunningLengthWord clone() throws CloneNotSupportedException { + BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); + answer.literalwordoffset = this.literalwordoffset; + answer.NumberOfLiteralWords = this.NumberOfLiteralWords; + answer.RunningBit = this.RunningBit; + answer.RunningLength = this.RunningLength; + return answer; + } + + + /** how many literal words have we read so far? */ + public int literalwordoffset = 0; + + /** The Number of literal words. */ + public int NumberOfLiteralWords; + + /** The Running bit. */ + public boolean RunningBit; + + /** The Running length. */ + public long RunningLength; + + +} \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/CloneableIterator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/CloneableIterator.java new file mode 100644 index 000000000..a17eded16 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/CloneableIterator.java @@ -0,0 +1,24 @@ +package com.fr.third.googlecode.javaewah; + +/** + * Like a standard Java iterator, except that you can clone it. + * + * @param the data type of the iterator + */ +public interface CloneableIterator extends Cloneable { + + /** + * @return whether there is more + */ + public boolean hasNext(); + /** + * @return the next element + */ + public E next(); + /** + * @return a copy + * @throws CloneNotSupportedException this should never happen in practice + */ + public CloneableIterator clone() throws CloneNotSupportedException; + +} \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHCompressedBitmap.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHCompressedBitmap.java new file mode 100644 index 000000000..bc9397da9 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHCompressedBitmap.java @@ -0,0 +1,1631 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.*; +import java.io.*; + + + +/** + *

+ * This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a + * 64-bit variant of the BBC compression scheme used by Oracle for its bitmap + * indexes. + *

+ * + *

+ * The objective of this compression type is to provide some compression, while + * reducing as much as possible the CPU cycle usage. + *

+ * + * + *

+ * This implementation being 64-bit, it assumes a 64-bit CPU together with a + * 64-bit Java Virtual Machine. This same code on a 32-bit machine may not be as + * fast. + *

+ * + *

+ * There is also a 32-bit version of this code in the class + * javaewah32.EWAHCompressedBitmap32 + *

+ * + * @see com.googlecode.javaewah32.EWAHCompressedBitmap32 + * + *

+ * For more details, see the following paper: + *

+ * + *
    + *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves + * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages + * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • + *
+ * + *

+ * A 32-bit version of the compressed format was described by Wu et al. and + * named WBC: + *

+ * + *
    + *
  • K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and + * implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, + * Lawrence Berkeley National Laboratory, available from http://crd.lbl. + * gov/~kewu/ps/PUB-3161.html (2001).
  • + *
+ * + *

+ * Probably, the best prior art is the Oracle bitmap compression scheme + * (BBC): + *

+ *
    + *
  • G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
  • + *
+ * + *

+ * 1- The authors do not know of any patent infringed by the following + * implementation. However, similar schemes, like WAH are covered by + * patents. + *

+ * + * @since 0.1.0 + */ +public final class EWAHCompressedBitmap implements Cloneable, Externalizable, + Iterable, BitmapStorage, LogicalElement { + + /** + * Creates an empty bitmap (no bit set to true). + */ + public EWAHCompressedBitmap() { + this.buffer = new long[defaultbuffersize]; + this.rlw = new RunningLengthWord(this, 0); + } + + /** + * Sets explicitly the buffer size (in 64-bit words). The initial memory usage + * will be "buffersize * 64". For large poorly compressible bitmaps, using + * large values may improve performance. + * + * @param buffersize + * number of 64-bit words reserved when the object is created) + */ + public EWAHCompressedBitmap(final int buffersize) { + this.buffer = new long[buffersize]; + this.rlw = new RunningLengthWord(this, 0); + } + + /** + * Adding words directly to the bitmap (for expert use). + * + * This is normally how you add data to the array. So you add bits in streams + * of 8*8 bits. + * + * Example: if you add 321, you are have added (in binary notation) + * 0b101000001, so you have effectively called set(0), set(6), set(8) + * in sequence. + * + * @param newdata + * the word + */ + @Override +public void add(final long newdata) { + add(newdata, wordinbits); + } + + /** + * Adding words directly to the bitmap (for expert use). + * + * @param newdata + * the word + * @param bitsthatmatter + * the number of significant bits (by default it should be 64) + */ + public void add(final long newdata, final int bitsthatmatter) { + this.sizeinbits += bitsthatmatter; + if (newdata == 0) { + addEmptyWord(false); + } else if (newdata == ~0l) { + addEmptyWord(true); + } else { + addLiteralWord(newdata); + } + } + + /** + * For internal use. + * + * @param v + * the boolean value + */ + private void addEmptyWord(final boolean v) { + final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); + final long runlen = this.rlw.getRunningLength(); + if ((noliteralword) && (runlen == 0)) { + this.rlw.setRunningBit(v); + } + if ((noliteralword) && (this.rlw.getRunningBit() == v) + && (runlen < RunningLengthWord.largestrunninglengthcount)) { + this.rlw.setRunningLength(runlen + 1); + return; + } + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(1); + return; + } + + /** + * For internal use. + * + * @param newdata + * the literal word + */ + private void addLiteralWord(final long newdata) { + final int numbersofar = this.rlw.getNumberOfLiteralWords(); + if (numbersofar >= RunningLengthWord.largestliteralcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + this.rlw.setNumberOfLiteralWords(1); + push_back(newdata); + } + this.rlw.setNumberOfLiteralWords(numbersofar + 1); + push_back(newdata); + } + + /** + * if you have several literal words to copy over, this might be faster. + * + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + @Override +public void addStreamOfLiteralWords(final long[] data, final int start, + final int number) { + int leftovernumber = number; + while(leftovernumber > 0) { + final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount + - NumberOfLiteralWords ? leftovernumber : RunningLengthWord.largestliteralcount + - NumberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd); + leftovernumber -= whatwecanadd; + push_back(data, start, whatwecanadd); + this.sizeinbits += whatwecanadd * wordinbits; + if (leftovernumber > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + } + } + } + + /** + * For experts: You want to add many zeroes or ones? This is the method you + * use. + * + * @param v + * the boolean value + * @param number + * the number + */ + @Override +public void addStreamOfEmptyWords(final boolean v, long number) { + if (number == 0) + return; + this.sizeinbits += number * wordinbits; + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) + || (this.rlw.getRunningBit() != v)) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + } + final long runlen = this.rlw.getRunningLength(); + final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount + - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; + this.rlw.setRunningLength(runlen + whatwecanadd); + number -= whatwecanadd; + while (number >= RunningLengthWord.largestrunninglengthcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); + number -= RunningLengthWord.largestrunninglengthcount; + } + if (number > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(number); + } + } + + /** + * Same as addStreamOfLiteralWords, but the words are negated. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + @Override + public void addStreamOfNegatedLiteralWords(final long[] data, + final int start, final int number) { + int leftovernumber = number; + while (leftovernumber > 0) { + final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatwecanadd = leftovernumber < RunningLengthWord.largestliteralcount + - NumberOfLiteralWords ? leftovernumber + : RunningLengthWord.largestliteralcount + - NumberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + + whatwecanadd); + leftovernumber -= whatwecanadd; + negative_push_back(data, start, whatwecanadd); + this.sizeinbits += whatwecanadd * wordinbits; + if (leftovernumber > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @since 0.4.3 + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) { + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + container + .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords + : a.actualsizeinwords); + andToContainer(a, container); + return container; + } + /** + * Computes new compressed bitmap containing the bitwise AND values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @since 0.4.0 + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void andToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + if(adjustContainerSizeWhenAggregating) { + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + remaining.dischargeAsEmpty(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + } + + + /** + * Returns the cardinality of the result of a bitwise AND of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @since 0.4.0 + * @param a + * the other bitmap + * @return the cardinality + */ + public int andCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + andToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) { + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + container + .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords + : a.actualsizeinwords); + andNotToContainer(a, container); + return container; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values of + * the current bitmap with some other bitmap. This method is expected to + * be faster than doing A.and(B.clone().not()). + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @since 0.4.0 + * @param a the other bitmap + * @param container where to store the result + */ + public void andNotToContainer(final EWAHCompressedBitmap a, + final BitmapStorage container) { + final EWAHIterator i = getEWAHIterator(); + final EWAHIterator j = a.getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj + : rlwi; + if ( ((predator.getRunningBit() == true) && (i_is_prey)) + || ((predator.getRunningBit() == false) && (!i_is_prey))){ + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else if (i_is_prey) { + long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = prey.dischargeNegated(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + if(i_remains) + remaining.discharge(container); + else if(adjustContainerSizeWhenAggregating) + remaining.dischargeAsEmpty(container); + if(adjustContainerSizeWhenAggregating) + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise AND NOT of the values of + * the current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @since 0.4.0 + * @param a + * the other bitmap + * @return the cardinality + */ + public int andNotCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + andNotToContainer(a, counter); + return counter.getCount(); + } + + /** + * reports the number of bits set to true. Running time is proportional to + * compressed size (as reported by sizeInBytes). + * + * @return the number of bits set to true + */ + public int cardinality() { + int counter = 0; + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + counter += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + counter += Long.bitCount(i.buffer()[i.literalWords() + j]); + } + } + return counter; + } + + /** + * Clear any set bits and set size in bits back to 0 + */ + public void clear() { + this.sizeinbits = 0; + this.actualsizeinwords = 1; + this.rlw.position = 0; + // buffer is not fully cleared but any new set operations should overwrite + // stale data + this.buffer[0] = 0; + } + + /* + * @see java.lang.Object#clone() + */ + @Override + public EWAHCompressedBitmap clone() throws CloneNotSupportedException { + final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone(); + clone.buffer = this.buffer.clone(); + clone.rlw = new RunningLengthWord(clone, this.rlw.position); + clone.actualsizeinwords = this.actualsizeinwords; + clone.sizeinbits = this.sizeinbits; + return clone; + } + + /** + * Deserialize. + * + * @param in + * the DataInput stream + * @throws IOException + * Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.sizeinbits = in.readInt(); + this.actualsizeinwords = in.readInt(); + if (this.buffer.length < this.actualsizeinwords) { + this.buffer = new long[this.actualsizeinwords]; + } + for (int k = 0; k < this.actualsizeinwords; ++k) + this.buffer[k] = in.readLong(); + this.rlw = new RunningLengthWord(this, in.readInt()); + } + + /** + * Check to see whether the two compressed bitmaps contain the same set bits. + * + * @see Object#equals(Object) + */ + @Override + public boolean equals(Object o) { + if (o instanceof EWAHCompressedBitmap) { + try { + this.xorToContainer((EWAHCompressedBitmap) o, new NonEmptyVirtualStorage()); + return true; + } catch (NonEmptyVirtualStorage.NonEmptyException e) { + return false; + } + } + return false; + } + + /** + * For experts: You want to add many zeroes or ones faster? + * + * This method does not update sizeinbits. + * + * @param v + * the boolean value + * @param number + * the number (must be greater than 0) + */ + private void fastaddStreamOfEmptyWords(final boolean v, long number) { + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) + || (this.rlw.getRunningBit() != v)) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + } + + final long runlen = this.rlw.getRunningLength(); + final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount + - runlen ? number : RunningLengthWord.largestrunninglengthcount - runlen; + this.rlw.setRunningLength(runlen + whatwecanadd); + number -= whatwecanadd; + + while (number >= RunningLengthWord.largestrunninglengthcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(RunningLengthWord.largestrunninglengthcount); + number -= RunningLengthWord.largestrunninglengthcount; + } + if (number > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(number); + } + } + + /** + * Gets an EWAHIterator over the data. This is a customized iterator which + * iterates over run length word. For experts only. + * + * @return the EWAHIterator + */ + public EWAHIterator getEWAHIterator() { + return new EWAHIterator(this, this.actualsizeinwords); + } + + /** + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW getIteratingRLW() { + return new IteratingBufferedRunningLengthWord(this); + } + /** + * get the locations of the true values as one vector. (may use more memory + * than iterator()) + * + * @return the positions + */ + public List getPositions() { + final ArrayList v = new ArrayList(); + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + int pos = 0; + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + for (int j = 0; j < localrlw.getRunningLength(); ++j) { + for (int c = 0; c < wordinbits; ++c) + v.add(new Integer(pos++)); + } + } else { + pos += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + long data = i.buffer()[i.literalWords() + j]; + while (data != 0) { + final int ntz = Long.numberOfTrailingZeros(data); + data ^= (1l << ntz); + v.add(new Integer(ntz + pos)); + } + pos += wordinbits; + } + } + while ((v.size() > 0) + && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) + v.remove(v.size() - 1); + return v; + } + + /** + * Returns a customized hash code (based on Karp-Rabin). Naturally, if the + * bitmaps are equal, they will hash to the same value. + * + */ + @Override + public int hashCode() { + int karprabin = 0; + final int B = 31; + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + while( i.hasNext() ) { + i.next(); + if (i.rlw.getRunningBit() == true) { + karprabin += B * karprabin + + (i.rlw.getRunningLength() & ((1l << 32) - 1)); + karprabin += B * karprabin + (i.rlw.getRunningLength() >>> 32); + } + for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { + karprabin += B * karprabin + (this.buffer[i.literalWords() + k] & ((1l << 32) - 1)); + karprabin += B * karprabin + (this.buffer[i.literalWords() + k] >>> 32); + } + } + return karprabin; + } + + /** + * Return true if the two EWAHCompressedBitmap have both at least one true bit + * in the same position. Equivalently, you could call "and" and check whether + * there is a set bit, but intersects will run faster if you don't need the + * result of the "and" operation. + * + * @since 0.3.2 + * @param a + * the other bitmap + * @return whether they intersect + */ + public boolean intersects(final EWAHCompressedBitmap a) { + NonEmptyVirtualStorage nevs = new NonEmptyVirtualStorage(); + try { + this.andToContainer(a, nevs); + } catch (NonEmptyVirtualStorage.NonEmptyException nee) { + return true; + } + return false; + } + + /** + * Iterator over the set bits (this is what most people will want to use to + * browse the content if they want an iterator). The location of the set bits + * is returned, in increasing order. + * + * @return the int iterator + */ + public IntIterator intIterator() { + return new IntIteratorImpl( + new EWAHIterator(this, this.actualsizeinwords)); + } + + /** + * iterate over the positions of the true values. This is similar to + * intIterator(), but it uses Java generics. + * + * @return the iterator + */ + @Override +public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("bitsets do not support remove"); + } + + final private IntIterator under = intIterator(); + }; + } + + /** + * For internal use. + * + * @param data + * the array of words to be added + * @param start + * the starting point + * @param number + * the number of words to add + */ + private void negative_push_back(final long[] data, final int start, + final int number) { + while (this.actualsizeinwords + number >= this.buffer.length) { + final long oldbuffer[] = this.buffer; + if((this.actualsizeinwords + number) < 32768) + this.buffer = new long[ (this.actualsizeinwords + number) * 2]; + else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[(this.actualsizeinwords + number) * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + for (int k = 0; k < number; ++k) + this.buffer[this.actualsizeinwords + k] = ~data[start + k]; + this.actualsizeinwords += number; + } + + /** + * Negate (bitwise) the current bitmap. To get a negated copy, do + * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); + * + * The running time is proportional to the compressed size (as reported by + * sizeInBytes()). + * + */ + @Override +public void not() { + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + if (!i.hasNext()) + return; + + while (true) { + final RunningLengthWord rlw1 = i.next(); + rlw1.setRunningBit(!rlw1.getRunningBit()); + for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { + i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; + } + + if (!i.hasNext()) {// must potentially adjust the last literal word + final int usedbitsinlast = this.sizeinbits % wordinbits; + if (usedbitsinlast == 0) + return; + + if (rlw1.getNumberOfLiteralWords() == 0) { + if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { + rlw1.setRunningLength(rlw1.getRunningLength()-1); + this.addLiteralWord((~0l) >>> (wordinbits - usedbitsinlast)); + } + return; + } + i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0l) >>> (wordinbits - usedbitsinlast)); + return; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise OR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) { + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + container.reserve(this.actualsizeinwords + a.actualsizeinwords); + orToContainer(a, container); + return container; + } + + + + /** + * Computes the bitwise or between the current bitmap and the bitmap "a". + * Stores the result in the container. + * + * @since 0.4.0 + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void orToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == true) { + container.addStreamOfEmptyWords(true, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) { + container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); + } + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @since 0.4.0 + * @param a + * the other bitmap + * @return the cardinality + */ + public int orCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + orToContainer(a, counter); + return counter.getCount(); + } + + /** + * For internal use. + * + * @param data + * the word to be added + */ + private void push_back(final long data) { + if (this.actualsizeinwords == this.buffer.length) { + final long oldbuffer[] = this.buffer; + if(oldbuffer.length < 32768) + this.buffer = new long[ oldbuffer.length * 2]; + else if(oldbuffer.length * 3 / 2 < oldbuffer.length) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[oldbuffer.length * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + this.buffer[this.actualsizeinwords++] = data; + } + + /** + * For internal use. + * + * @param data + * the array of words to be added + * @param start + * the starting point + * @param number + * the number of words to add + */ + private void push_back(final long[] data, final int start, final int number) { + if (this.actualsizeinwords + number >= this.buffer.length) { + final long oldbuffer[] = this.buffer; + if(this.actualsizeinwords + number < 32768) + this.buffer = new long[(this.actualsizeinwords + number) * 2]; + else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) // overflow + this.buffer = new long[Integer.MAX_VALUE]; + else + this.buffer = new long[( this.actualsizeinwords + number) * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); + this.actualsizeinwords += number; + } + + /* + * @see java.io.Externalizable#readExternal(java.io.ObjectInput) + */ + @Override +public void readExternal(ObjectInput in) throws IOException { + deserialize(in); + } + + /** + * For internal use (trading off memory for speed). + * + * @param size + * the number of words to allocate + * @return True if the operation was a success. + */ + private boolean reserve(final int size) { + if (size > this.buffer.length) { + final long oldbuffer[] = this.buffer; + this.buffer = new long[size]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + return true; + } + return false; + } + + /** + * Serialize. + * + * @param out + * the DataOutput stream + * @throws IOException + * Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + out.writeInt(this.sizeinbits); + out.writeInt(this.actualsizeinwords); + for (int k = 0; k < this.actualsizeinwords; ++k) + out.writeLong(this.buffer[k]); + out.writeInt(this.rlw.position); + } + + /** + * Report the size required to serialize this bitmap + * + * @return the size in bytes + */ + public int serializedSizeInBytes() { + return this.sizeInBytes() + 3 * 4; + } + + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * @param i + * the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeinbits)) + return false; + int WordChecked = 0; + final IteratingRLW j = getIteratingRLW(); + final int wordi = i/wordinbits; + while (WordChecked <= wordi ) { + WordChecked += j.getRunningLength(); + if (wordi < WordChecked) { + return j.getRunningBit(); + } + if (wordi < WordChecked + j.getNumberOfLiteralWords()) { + final long w = j.getLiteralWordAt(wordi - WordChecked); + return (w & (1l << i)) != 0; + } + WordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } + + /** + * Set the bit at position i to true, the bits must be set in (strictly) increasing + * order. For example, set(15) and then set(7) will fail. You must do set(7) + * and then set(15). + * + * @param i + * the index + * @return true if the value was set (always true when i greater or equal to sizeInBits()). + * @throws IndexOutOfBoundsException + * if i is negative or greater than Integer.MAX_VALUE - 64 + */ + public boolean set(final int i) { + if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) + throw new IndexOutOfBoundsException("Set values should be between 0 and " + + (Integer.MAX_VALUE - wordinbits)); + if (i < this.sizeinbits) + return false; + // distance in words: + final int dist = (i + wordinbits) / wordinbits + - (this.sizeinbits + wordinbits - 1) / wordinbits; + this.sizeinbits = i + 1; + if (dist > 0) {// easy + if (dist > 1) + fastaddStreamOfEmptyWords(false, dist - 1); + addLiteralWord(1l << (i % wordinbits)); + return true; + } + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + addLiteralWord(1l << (i % wordinbits)); + return true; + } + this.buffer[this.actualsizeinwords - 1] |= 1l << (i % wordinbits); + if (this.buffer[this.actualsizeinwords - 1] == ~0l) { + this.buffer[this.actualsizeinwords - 1] = 0; + --this.actualsizeinwords; + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + // next we add one clean word + addEmptyWord(true); + } + return true; + } + + /** + * Set the size in bits. This does not change the compressed bitmap. + * + * @since 0.4.0 + */ + @Override +public void setSizeInBits(final int size) { + if((size+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap.wordinbits-1)/EWAHCompressedBitmap.wordinbits) + throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean)."); + this.sizeinbits = size; + } + + /** + * Change the reported size in bits of the *uncompressed* bitmap represented + * by this compressed bitmap. It may change the underlying compressed bitmap. + * It is not possible to reduce the sizeInBits, but + * it can be extended. The new bits are set to false or true depending on the + * value of defaultvalue. + * + * @param size + * the size in bits + * @param defaultvalue + * the default boolean value + * @return true if the update was possible + */ + public boolean setSizeInBits(final int size, final boolean defaultvalue) { + if (size < this.sizeinbits) + return false; + if (defaultvalue == false) + extendEmptyBits(this, this.sizeinbits, size); + else { + // next bit could be optimized + while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { + this.set(this.sizeinbits); + } + this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) + - this.sizeinbits / wordinbits); + // next bit could be optimized + while (this.sizeinbits < size) { + this.set(this.sizeinbits); + } + } + this.sizeinbits = size; + return true; + } + + /** + * Returns the size in bits of the *uncompressed* bitmap represented by this + * compressed bitmap. Initially, the sizeInBits is zero. It is extended + * automatically when you set bits to true. + * + * @return the size in bits + */ + @Override +public int sizeInBits() { + return this.sizeinbits; + } + + /** + * Report the *compressed* size of the bitmap (equivalent to memory usage, + * after accounting for some overhead). + * + * @return the size in bytes + */ + @Override +public int sizeInBytes() { + return this.actualsizeinwords * (wordinbits / 8); + } + + /** + * Populate an array of (sorted integers) corresponding to the location of the + * set bits. + * + * @return the array containing the location of the set bits + */ + public int[] toArray() { + int[] ans = new int[this.cardinality()]; + int inanspos = 0; + int pos = 0; + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + for (int j = 0; j < localrlw.getRunningLength(); ++j) { + for (int c = 0; c < wordinbits; ++c) { + ans[inanspos++] = pos++; + } + } + } else { + pos += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + long data = i.buffer()[i.literalWords() + j]; + if (!usetrailingzeros) { + for (int c = 0; c < wordinbits; ++c) { + if ((data & (1l << c)) != 0) + ans[inanspos++] = c + pos; + } + pos += wordinbits; + } else { + while (data != 0) { + final int ntz = Long.numberOfTrailingZeros(data); + data ^= (1l << ntz); + ans[inanspos++] = ntz + pos; + } + pos += wordinbits; + } + } + } + return ans; + + } + + /** + * A more detailed string describing the bitmap (useful for debugging). + * + * @return the string + */ + public String toDebugString() { + String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + + " size in words = " + this.actualsizeinwords + "\n"; + final EWAHIterator i = new EWAHIterator(this, this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord localrlw = i.next(); + if (localrlw.getRunningBit()) { + ans += localrlw.getRunningLength() + " 1x11\n"; + } else { + ans += localrlw.getRunningLength() + " 0x00\n"; + } + ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + long data = i.buffer()[i.literalWords() + j]; + ans += "\t" + data + "\n"; + } + } + return ans; + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + StringBuffer answer = new StringBuffer(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + + /** + * swap the content of the bitmap with another. + * @param other bitmap to swap with + */ +public void swap(final EWAHCompressedBitmap other) { + long[] tmp = this.buffer; + this.buffer = other.buffer; + other.buffer = tmp; + + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.actualsizeinwords; + this.actualsizeinwords = other.actualsizeinwords; + other.actualsizeinwords = tmp3; + + int tmp4 = this.sizeinbits; + this.sizeinbits = other.sizeinbits; + other.sizeinbits = tmp4; + } + + /** + * Reduce the internal buffer to its minimal allowable size (given + * by this.actualsizeinwords). This can free memory. + */ + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); + } + + /* + * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) + */ + @Override +public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) { + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + container.reserve(this.actualsizeinwords + a.actualsizeinwords); + xorToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the bitwise XOR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @since 0.4.0 + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void xorToContainer(final EWAHCompressedBitmap a, final BitmapStorage container) { + final EWAHIterator i = a.getEWAHIterator(); + final EWAHIterator j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord rlwi = new IteratingBufferedRunningLengthWord(i); + final IteratingBufferedRunningLengthWord rlwj = new IteratingBufferedRunningLengthWord(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + long index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = prey.dischargeNegated(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise XOR of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @since 0.4.0 + * @param a + * the other bitmap + * @return the cardinality + */ + public int xorCardinality(final EWAHCompressedBitmap a) { + final BitCounter counter = new BitCounter(); + xorToContainer(a, counter); + return counter.getCount(); + } + + /** + * For internal use. Computes the bitwise and of the provided bitmaps and + * stores the result in the container. + * + * @param container + * where the result is stored + * @param bitmaps + * bitmaps to AND + * @since 0.4.3 + */ + public static void andWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); + if(bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1],container); + return; + } + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of the + * provided bitmaps. + * + * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @since 0.4.3 + * @param bitmaps + * bitmaps to AND together + * @return result of the AND + */ + public static EWAHCompressedBitmap and(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; + if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + EWAHCompressedBitmap tmp = new EWAHCompressedBitmap(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values of the + * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold + * the result of the AND. + * + * @since 0.4.3 + * @param bitmaps + * bitmaps to AND + * @return the cardinality + */ + public static int andCardinality(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); + final BitCounter counter = new BitCounter(); + andWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** + * Return a bitmap with the bit set to true at the given + * positions. The positions should be given in sorted order. + * + * (This is a convenience method.) + * + * @since 0.4.5 + * @param setbits list of set bit positions + * @return the bitmap + */ + public static EWAHCompressedBitmap bitmapOf(int ... setbits) { + EWAHCompressedBitmap a = new EWAHCompressedBitmap(); + for (int k : setbits) + a.set(k); + return a; + } + + + + + /** + * For internal use. This simply adds a stream of words made of zeroes so that + * we pad to the desired size. + * + * @param storage + * bitmap to extend + * @param currentSize + * current size (in bits) + * @param newSize + * new desired size (in bits) + * @since 0.4.3 + */ + private static void extendEmptyBits(final BitmapStorage storage, + final int currentSize, final int newSize) { + final int currentLeftover = currentSize % wordinbits; + final int finalLeftover = newSize % wordinbits; + storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize + / wordinbits + (finalLeftover != 0 ? 1 : 0) + + (currentLeftover != 0 ? -1 : 0)); + } + + + /** + * Uses an adaptive technique to compute the logical OR. + * Mostly for internal use. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void orWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + long size = 0L; + long sinbits = 0L; + for (EWAHCompressedBitmap b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation.bufferedorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation.orToContainer(container, bitmaps); + } + } + + + /** + * Uses an adaptive technique to compute the logical XOR. + * Mostly for internal use. + * + * @param container where the aggregate is written. + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + long size = 0L; + long sinbits = 0L; + for (EWAHCompressedBitmap b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation.bufferedxorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation.xorToContainer(container, bitmaps); + } + } + /** + * Returns a new compressed bitmap containing the bitwise OR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.or(B).or(C).or(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @since 0.4.0 + * @param bitmaps + * bitmaps to OR together + * @return result of the OR + */ + public static EWAHCompressedBitmap or(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) + return bitmaps[0]; + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + int largestSize = 0; + for (EWAHCompressedBitmap bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + orWithContainer(container, bitmaps); + return container; + } + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap xor(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) + return bitmaps[0]; + final EWAHCompressedBitmap container = new EWAHCompressedBitmap(); + int largestSize = 0; + for (EWAHCompressedBitmap bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + xorWithContainer(container, bitmaps); + return container; + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values of the + * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold + * the result of the OR. + * + * @since 0.4.0 + * @param bitmaps + * bitmaps to OR + * @return the cardinality + */ + public static int orCardinality(final EWAHCompressedBitmap... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); + final BitCounter counter = new BitCounter(); + orWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** The actual size in words. */ + int actualsizeinwords = 1; + + /** The buffer (array of 64-bit words) */ + long buffer[] = null; + + /** The current (last) running length word. */ + RunningLengthWord rlw = null; + + /** sizeinbits: number of bits in the (uncompressed) bitmap. */ + int sizeinbits = 0; + + /** + * The Constant defaultbuffersize: default memory allocation when the object + * is constructed. + */ + static final int defaultbuffersize = 4; + + /** optimization option **/ + public static final boolean usetrailingzeros = true; + + /** whether we adjust after some aggregation by adding in zeroes **/ + public static final boolean adjustContainerSizeWhenAggregating = true; + + /** The Constant wordinbits represents the number of bits in a long. */ + public static final int wordinbits = 64; + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHIterator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHIterator.java new file mode 100644 index 000000000..991698814 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHIterator.java @@ -0,0 +1,98 @@ +package com.fr.third.googlecode.javaewah; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * The class EWAHIterator represents a special type of + * efficient iterator iterating over (uncompressed) words of bits. + * It is not meant for end users. + * @author Daniel Lemire + * @since 0.1.0 + * + */ +public final class EWAHIterator implements Cloneable { + + /** + * Instantiates a new EWAH iterator. + * + * @param a the array of words + * @param sizeinwords the number of words that are significant in the array of words + */ + public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { + this.rlw = new RunningLengthWord(a, 0); + this.size = sizeinwords; + this.pointer = 0; + } + + /** + * Allow expert developers to instantiate an EWAHIterator. + * + * @param bitmap we want to iterate over + * @return an iterator + */ + public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { + return bitmap.getEWAHIterator(); + } + + + /** + * Access to the array of words + * + * @return the long[] + */ + public long[] buffer() { + return this.rlw.parent.buffer; + } + + /** + * Position of the literal words represented by this running length word. + * + * @return the int + */ + public int literalWords() { + return this.pointer - this.rlw.getNumberOfLiteralWords(); + } + + /** + * Checks for next. + * + * @return true, if successful + */ + public boolean hasNext() { + return this.pointer < this.size; + } + + /** + * Next running length word. + * + * @return the running length word + */ + public RunningLengthWord next() { + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + return this.rlw; + } + + @Override + public EWAHIterator clone() throws CloneNotSupportedException { + EWAHIterator ans = (EWAHIterator) super.clone(); + ans.rlw = this.rlw.clone(); + ans.size = this.size; + ans.pointer = this.pointer; + return ans; + } + /** The pointer represent the location of the current running length + * word in the array of words (embedded in the rlw attribute). */ + int pointer; + + /** The current running length word. */ + RunningLengthWord rlw; + + /** The size in words. */ + int size; + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/FastAggregation.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/FastAggregation.java new file mode 100644 index 000000000..80d378469 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/FastAggregation.java @@ -0,0 +1,436 @@ +package com.fr.third.googlecode.javaewah; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.PriorityQueue; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Fast algorithms to aggregate many bitmaps. These algorithms are just given as + * reference. They may not be faster than the corresponding methods in the + * EWAHCompressedBitmap class. + * + * @author Daniel Lemire + * + */ +public class FastAggregation { + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedand(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedandWithContainer(answer,bufsize, bitmaps); + return answer; + } + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + + long[] hardbitmap = new long[bufsize*bitmaps.length]; + + for(IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0l); + long effective = Integer.MAX_VALUE; + for(IteratingRLW i : al) { + int eff = IteratorAggregation.inplaceand(hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + for(IteratingRLW i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap bufferedor(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBits(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap bufferedxor(final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); + bufferedxorWithContainer(answer, bufsize,bitmaps); + return answer; + } + + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, + final EWAHCompressedBitmap... bitmaps) { + int range = 0; + EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord(bitmap)); + } + long[] hardbitmap = new long[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + long effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBits(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * @param a class extending LogicalElement (like a compressed bitmap) + * @param bitmaps + * bitmaps to be aggregated + * @return the or aggregate + */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + public static T or(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(T a, T b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (T x : bitmaps) { + pq.add(x); + } + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.or(x2)); + } + return pq.poll(); + } + /** + * Uses a priority queue to compute the or aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage container, + final EWAHCompressedBitmap ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * + * @param a class extending LogicalElement (like a compressed bitmap) + * @param bitmaps + * bitmaps to be aggregated + * @return the xor aggregate + */ + @SuppressWarnings({ "rawtypes", "unchecked" }) + public static T xor(T... bitmaps) { + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + + @Override + public int compare(T a, T b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (T x : bitmaps) + pq.add(x); + while (pq.size() > 1) { + T x1 = pq.poll(); + T x2 = pq.poll(); + pq.add((T) x1.xor(x2)); + } + return pq.poll(); + } + + /** + * Uses a priority queue to compute the xor aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage container, + final EWAHCompressedBitmap ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap x1 = pq.poll(); + EWAHCompressedBitmap x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. (This used to be the default.) + * + * @deprecated use EWAHCompressedBitmap.or instead + * @since 0.4.0 + * @param container where store the result + * @param bitmaps to be aggregated + */ + @Deprecated + public static void legacy_orWithContainer(final BitmapStorage container, + final EWAHCompressedBitmap... bitmaps) { + if (bitmaps.length == 2) { + // should be more efficient + bitmaps[0].orToContainer(bitmaps[1], container); + return; + } + + // Sort the bitmaps in descending order by sizeinbits. We will exhaust the + // sorted bitmaps from right to left. + final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); + Arrays.sort(sortedBitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { + return a.sizeinbits < b.sizeinbits ? 1 + : a.sizeinbits == b.sizeinbits ? 0 : -1; + } + }); + + final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; + int maxAvailablePos = 0; + for (EWAHCompressedBitmap bitmap : sortedBitmaps) { + EWAHIterator iterator = bitmap.getEWAHIterator(); + if (iterator.hasNext()) { + rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( + iterator); + } + } + + if (maxAvailablePos == 0) { // this never happens... + container.setSizeInBits(0); + return; + } + + int maxSize = sortedBitmaps[0].sizeinbits; + + while (true) { + long maxOneRl = 0; + long minZeroRl = Long.MAX_VALUE; + long minSize = Long.MAX_VALUE; + int numEmptyRl = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + long size = rlw.size(); + if (size == 0) { + maxAvailablePos = i; + break; + } + minSize = Math.min(minSize, size); + + if (rlw.getRunningBit()) { + long rl = rlw.getRunningLength(); + maxOneRl = Math.max(maxOneRl, rl); + minZeroRl = 0; + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } else { + long rl = rlw.getRunningLength(); + minZeroRl = Math.min(minZeroRl, rl); + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } + } + + if (maxAvailablePos == 0) { + break; + } else if (maxAvailablePos == 1) { + // only one bitmap is left so just write the rest of it out + rlws[0].discharge(container); + break; + } + + if (maxOneRl > 0) { + container.addStreamOfEmptyWords(true, maxOneRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(maxOneRl); + } + } else if (minZeroRl > 0) { + container.addStreamOfEmptyWords(false, minZeroRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(minZeroRl); + } + } else { + int index = 0; + + if (numEmptyRl == 1) { + // if one rlw has literal words to process and the rest have a run of + // 0's we can write them out here + IteratingBufferedRunningLengthWord emptyRl = null; + long minNonEmptyRl = Long.MAX_VALUE; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + long rl = rlw.getRunningLength(); + if (rl == 0) { + assert emptyRl == null; + emptyRl = rlw; + } else { + minNonEmptyRl = Math.min(minNonEmptyRl, rl); + } + } + long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; + if (emptyRl != null) + emptyRl.writeLiteralWords((int) wordsToWrite, container); + index += wordsToWrite; + } + + while (index < minSize) { + long word = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + if (rlw.getRunningLength() <= index) { + word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); + } + } + container.add(word); + index++; + } + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord rlw = rlws[i]; + rlw.discardFirstWords(minSize); + } + } + } + container.setSizeInBits(maxSize); + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIterator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIterator.java new file mode 100644 index 000000000..2aa5ef020 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIterator.java @@ -0,0 +1,31 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * + * The IntIterator interface is used to iterate over a stream of integers. + * + * @author Daniel Lemire + * @since 0.2.0 + * + */ +public interface IntIterator { + + /** + * Is there more? + * + * @return true, if there is more, false otherwise + */ + public boolean hasNext(); + + /** + * Return the next integer + * + * @return the integer + */ + public int next(); +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorImpl.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorImpl.java new file mode 100644 index 000000000..0728ce643 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorImpl.java @@ -0,0 +1,87 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2012, Google Inc. + * Licensed under the Apache License, Version 2.0. + */ + +import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; + +/** + * The IntIteratorImpl is the 64 bit implementation of the + * IntIterator interface, which efficiently returns the stream of integers + * represented by an EWAHIterator. + * + * @author Colby Ranger + * @since 0.5.6 + */ +final class IntIteratorImpl implements IntIterator { + + private final EWAHIterator ewahIter; + private final long[] ewahBuffer; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + IntIteratorImpl(EWAHIterator ewahIter) { + this.ewahIter = ewahIter; + this.ewahBuffer = ewahIter.buffer(); + this.hasnext = this.moveToNext(); + } + + public final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override +public boolean hasNext() { + return this.hasnext; + } + + @Override +public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setRunningLengthWord(RunningLengthWord rlw) { + this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position; + if (!rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.ewahBuffer[this.wordPosition++]; + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorOverIteratingRLW.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorOverIteratingRLW.java new file mode 100644 index 000000000..c4bf20f63 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorOverIteratingRLW.java @@ -0,0 +1,89 @@ +package com.fr.third.googlecode.javaewah; + +import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Implementation of an IntIterator over an IteratingRLW. + * + * + */ +public class IntIteratorOverIteratingRLW implements IntIterator { + IteratingRLW parent; + private int position; + private int runningLength; + private long word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW(final IteratingRLW p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasnext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is an unprocessed value + */ + private final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) + setupForCurrentRunningLengthWord(); + else return false; + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasnext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setupForCurrentRunningLengthWord() { + this.runningLength = wordinbits * (int) this.parent.getRunningLength() + + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingBufferedRunningLengthWord.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingBufferedRunningLengthWord.java new file mode 100644 index 000000000..bb8f2edc5 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingBufferedRunningLengthWord.java @@ -0,0 +1,276 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically + * advances to the next BufferedRunningLengthWord as words are discarded. + * + * @since 0.4.0 + * @author David McIntosh + */ +public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { + this.iterator = iterator; + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + + + + /** + * Instantiates a new iterating buffered running length word. + * @param bitmap over which we want to iterate + * + */ +public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { + this.iterator = EWAHIterator.getEWAHIterator(bitmap); + this.brlw = new BufferedRunningLengthWord(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + + + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the number of words to be discarded + */ + @Override +public void discardFirstWords(long x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.iterator.hasNext()) { + break; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + } + } + } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override +public boolean next() { + if (!this.iterator.hasNext()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public long discharge(BitmapStorage container, long max) { + long index = 0; + while ((index < max) && (size() > 0)) { + // first run + long pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = (int) (max - index); + } + writeLiteralWords(pd, container); + discardFirstWords(pl+pd); + index += pd; + } + return index; + } + + /** + * Write out up to max words (negated), returns how many were written + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public long dischargeNegated(BitmapStorage container, long max) { + long index = 0; + while ((index < max) && (size() > 0)) { + // first run + long pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(!getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = (int) (max - index); + } + writeNegatedLiteralWords(pd, container); + discardFirstWords(pl+pd); + index += pd; + } + return index; + } + + + /** + * Write out the remain words, transforming them to zeroes. + * @param container target for writes + */ + public void dischargeAsEmpty(BitmapStorage container) { + while(size()>0) { + container.addStreamOfEmptyWords(false, size()); + discardFirstWords(size()); + } + } + + + + /** + * Write out the remaining words + * @param container target for writes + */ + public void discharge(BitmapStorage container) { + this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); + discharge(this.brlw, this.iterator, container); + } + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override +public long getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override +public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override +public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override +public long getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the long + */ + @Override +public long size() { + return this.brlw.size(); + } + + /** + * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. + * @param numWords number of words to be written + * @param container where we write + */ + public void writeLiteralWords(int numWords, BitmapStorage container) { + container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + /** + * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. + * @param numWords number of words to be written + * @param container where we write + */ + public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { + container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + /** + * For internal use. (One could use the non-static discharge method instead, + * but we expect them to be slower.) + * + * @param initialWord + * the initial word + * @param iterator + * the iterator + * @param container + * the container + */ + private static void discharge(final BufferedRunningLengthWord initialWord, + final EWAHIterator iterator, final BitmapStorage container) { + BufferedRunningLengthWord runningLengthWord = initialWord; + for (;;) { + final long runningLength = runningLengthWord.getRunningLength(); + container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), + runningLength); + container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() + + runningLengthWord.literalwordoffset, + runningLengthWord.getNumberOfLiteralWords()); + if (!iterator.hasNext()) + break; + runningLengthWord = new BufferedRunningLengthWord(iterator.next()); + } + } + + + @Override + public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + return answer; + } + + + private BufferedRunningLengthWord brlw; + private long[] buffer; + private int literalWordStartPosition; + private EWAHIterator iterator; +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingRLW.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingRLW.java new file mode 100644 index 000000000..868b93c5e --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingRLW.java @@ -0,0 +1,49 @@ +package com.fr.third.googlecode.javaewah; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * High-level iterator over a compressed bitmap. + * + */ +public interface IteratingRLW { + /** + * @return whether there is more + */ + public boolean next() ; + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + public long getLiteralWordAt(int index); + /** + * @return the number of literal (non-fill) words + */ + public int getNumberOfLiteralWords() ; + /** + * @return the bit used for the fill bits + */ + public boolean getRunningBit() ; + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + public long size() ; + /** + * @return length of the run of fill words + */ + public long getRunningLength() ; + /** + * @param x the number of words to discard + */ + public void discardFirstWords(long x); + + /** + * @return a copy of the iterator + * @throws CloneNotSupportedException this should not be thrown in theory + */ + public IteratingRLW clone() throws CloneNotSupportedException; +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorAggregation.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorAggregation.java new file mode 100644 index 000000000..87386625a --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorAggregation.java @@ -0,0 +1,616 @@ +package com.fr.third.googlecode.javaewah; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Set of helper functions to aggregate bitmaps. + * + */ +public class IteratorAggregation { + + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW not(final IteratingRLW x) { + return new IteratingRLW() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public long getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return ! x.getRunningBit(); + } + + @Override + public long size() { + return x.size(); + } + + @Override + public long getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(long y) { + x.discardFirstWords(y); + } + + @Override + public IteratingRLW clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + + }; + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW bufferedand(final IteratingRLW... al) { + return bufferedand(DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) + * @return and aggregate + */ + public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW bufferedor(final IteratingRLW... al) { + return bufferedor(DEFAULTMAXBUFSIZE,al); + } + + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return or aggregate + */ + public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + return new BufferedIterator(new BufferedORIterator(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al set of iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final IteratingRLW... al) { + return bufferedxor(DEFAULTMAXBUFSIZE,al); + } + + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return xor aggregate + */ + public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + + final LinkedList basell = new LinkedList(); + for (IteratingRLW i : al) + basell.add(i); + + return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); + } + + + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container + * where we write + * @param i + * the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage container, + final IteratingRLW i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + + } + } + + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + + protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + long L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + + /** + * Write out up to max negated words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { + long counter = 0; + while (i.size() > 0 && counter < max) { + long L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(!i.getRunningBit(), L1); + counter += L1; + } + long L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(~i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + static void andToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage container, + final IteratingRLW rlwi, IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + + /** + * Compute the first few words of the XOR aggregate between two iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage container, + int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + long index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + long index = dischargeNegated(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(long[] bitmap, + IteratingRLW i) { + + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) + Arrays.fill(bitmap, pos, pos + L, ~0l); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + Arrays.fill(bitmap, pos, bitmap.length, ~0l); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + Arrays.fill(bitmap, pos, pos + L, ~0l); + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + protected static int inplacexor(long[] bitmap, + IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + protected static int inplaceand(long[] bitmap, + IteratingRLW i) { + int pos = 0; + long s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = (int) i.getRunningLength(); + if (!i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = (int) i.getRunningLength(); + if (pos + L > bitmap.length) { + if (!i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (!i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at + * the expense of memory. + */ + public final static int DEFAULTMAXBUFSIZE = 65536; +} +class BufferedORIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + long[] hardbitmap; + LinkedList ll; + int buffersize; + + BufferedORIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.hardbitmap = new long[bufsize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) { + this.buffer.add(this.hardbitmap[k]); + } + + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class BufferedXORIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + long[] hardbitmap; + LinkedList ll; + int buffersize; + + BufferedXORIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.hardbitmap = new long[bufsize]; + } + + @Override + public BufferedXORIterator clone() throws CloneNotSupportedException { + BufferedXORIterator answer = (BufferedXORIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + long effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + + +class BufferedAndIterator implements CloneableIterator { + EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); + LinkedList ll; + int buffersize; + + public BufferedAndIterator(LinkedList basell, int bufsize) { + this.ll = basell; + this.buffersize = bufsize; + + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public BufferedAndIterator clone() throws CloneNotSupportedException { + BufferedAndIterator answer = (BufferedAndIterator) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator next() { + this.buffer.clear(); + IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), + this.ll.get(0), this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation.andToContainer(tmpbuffer, + this.buffer.getIteratingRLW(), i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + Iterator i = this.ll.iterator(); + while(i.hasNext()) { + if(i.next().size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorUtil.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorUtil.java new file mode 100644 index 000000000..628ae46a8 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorUtil.java @@ -0,0 +1,132 @@ +package com.fr.third.googlecode.javaewah; + +import java.util.Iterator; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Convenience functions for working over iterators + * + */ +public class IteratorUtil { + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { + return new IntIteratorOverIteratingRLW(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + } + + final private IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Generate a bitmap from an iterator + * + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW i, final BitmapStorage c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); + } + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + c.add(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the iterator + */ + public static int cardinality(final IteratingRLW i) { + int answer = 0; + while (true) { + if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + answer += Long.bitCount(i.getLiteralWordAt(k)); + if(!i.next()) break; + } + return answer; + } + + /** + * @param x set of bitmaps + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { + IteratingRLW[] X = new IteratingRLW[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord(x[k]); + } + return X; + } + /** + * Turn an iterator into a bitmap. + * + * @param i iterator we wish to materialize + * @param c where we write + * @param Max maximum number of words we wish to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { + final long origMax = Max; + while (true) { + if (i.getRunningLength() > 0) { + long L = i.getRunningLength(); + if(L > Max) L = Max; + c.addStreamOfEmptyWords(i.getRunningBit(), L); + Max -= L; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.add(i.getLiteralWordAt(k)); + if(Max>0) { + if (!i.next()) + break; + } + else break; + } + return origMax - Max; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap materialize(final IteratingRLW i) { + EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); + materialize(i, ewah); + return ewah; + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/LogicalElement.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/LogicalElement.java new file mode 100644 index 000000000..b6300410c --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/LogicalElement.java @@ -0,0 +1,61 @@ +package com.fr.third.googlecode.javaewah; + +/** + * A prototypical model for bitmaps. Used by the + * class FastAggregation. Users should probably not + * be concerned by this class. + * + * @author Daniel Lemire + * @param the type of element (e.g., a bitmap class) + * + */ +public interface LogicalElement { + /** + * Compute the bitwise logical and + * @param le element + * @return the result of the operation + */ + public T and(T le); + + /** + * Compute the bitwise logical and not + * @param le element + * @return the result of the operation + */ + public T andNot(T le); + + /** + * Compute the bitwise logical not (in place) + */ + public void not(); + + + @SuppressWarnings({ "rawtypes", "javadoc" }) + /** + * Compute the bitwise logical or + * @param le another element + * @return the result of the operation + */ + public LogicalElement or(T le); + + /** + * How many logical bits does this element represent? + * + * @return the number of bits represented by this element + */ + public int sizeInBits(); + + /** + * Should report the storage requirement + * @return How many bytes + * @since 0.6.2 + */ + public int sizeInBytes(); + + /** + * Compute the bitwise logical Xor + * @param le element + * @return the results of the operation + */ + public T xor(T le); +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/NonEmptyVirtualStorage.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/NonEmptyVirtualStorage.java new file mode 100644 index 000000000..96ea6cfea --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/NonEmptyVirtualStorage.java @@ -0,0 +1,92 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * This is a BitmapStorage that can be used to determine quickly if the result + * of an operation is non-trivial... that is, whether there will be at least on + * set bit. + * + * @since 0.4.2 + * @author Daniel Lemire and Veronika Zenz + * + */ +public class NonEmptyVirtualStorage implements BitmapStorage { + static class NonEmptyException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Do not fill in the stack trace for this exception + * for performance reasons. + * + * @return this instance + * @see Throwable#fillInStackTrace() + */ + @Override + public synchronized Throwable fillInStackTrace() { + return this; + } + } + + private static final NonEmptyException nonEmptyException = new NonEmptyException(); + + /** + * If the word to be added is non-zero, a NonEmptyException exception is + * thrown. + * + * @see com.googlecode.javaewah.BitmapStorage#add(long) + */ + @Override +public void add(long newdata) { + if (newdata != 0) + throw nonEmptyException; + return; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + * + */ + @Override +public void addStreamOfLiteralWords(long[] data, int start, int number) { + if(number>0){ + throw nonEmptyException; + } + } + + /** + * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, + * otherwise, nothing happens. + * + * @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) + */ + @Override +public void addStreamOfEmptyWords(boolean v, long number) { + if (v && (number>0)) + throw nonEmptyException; + return; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + * + */ + @Override +public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { + if(number>0){ + throw nonEmptyException; + } + } + + /** + * Does nothing. + * + * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) + */ + @Override +public void setSizeInBits(int bits) { + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/RunningLengthWord.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/RunningLengthWord.java new file mode 100644 index 000000000..969a78c48 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/RunningLengthWord.java @@ -0,0 +1,152 @@ +package com.fr.third.googlecode.javaewah; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Mostly for internal use. + * + * @since 0.1.0 + * @author Daniel Lemire + */ +public final class RunningLengthWord implements Cloneable { + + /** + * Instantiates a new running length word. + * + * @param a + * an array of 64-bit words + * @param p + * position in the array where the running length word is + * located. + */ + RunningLengthWord(final EWAHCompressedBitmap a, final int p) { + this.parent = a; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return (this.parent.buffer[this.position] & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public long getRunningLength() { + return (this.parent.buffer[this.position] >>> 1) + & largestrunninglengthcount; + } + + /** + * Sets the number of literal words. + * + * @param number + * the new number of literal words + */ + public void setNumberOfLiteralWords(final long number) { + this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; + this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) + | runninglengthplusrunningbit; + } + + /** + * Sets the running bit. + * + * @param b + * the new running bit + */ + public void setRunningBit(final boolean b) { + if (b) + this.parent.buffer[this.position] |= 1l; + else + this.parent.buffer[this.position] &= ~1l; + } + + /** + * Sets the running length. + * + * @param number + * the new running length + */ + public void setRunningLength(final long number) { + this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; + this.parent.buffer[this.position] &= (number << 1) + | notshiftedlargestrunninglengthcount; + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the size + */ + public long size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord clone() throws CloneNotSupportedException { + RunningLengthWord answer; + answer = (RunningLengthWord) super.clone(); + answer.parent = this.parent; + answer.position = this.position; + return answer; + } + + /** The array of words. */ + public EWAHCompressedBitmap parent; + + /** The position in array. */ + public int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int runninglengthbits = 32; + + private static final int literalbits = 64 - 1 - runninglengthbits; + + /** largest number of literal words in a run. */ + public static final int largestliteralcount = (1 << literalbits) - 1; + + /** largest number of clean words in a run */ + public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; + + private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; + + private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + + private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + + private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; + +} \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark.java new file mode 100644 index 000000000..e2ffb8fe4 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark.java @@ -0,0 +1,284 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.text.DecimalFormat; +import java.util.Arrays; +import java.util.List; +import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; +import com.fr.third.googlecode.javaewah.FastAggregation; +import com.fr.third.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah.IteratingRLW; +import com.fr.third.googlecode.javaewah.IteratorAggregation; +import com.fr.third.googlecode.javaewah.IteratorUtil; + +/** + * This class is used to benchmark the performance EWAH. + * + * @author Daniel Lemire + */ +public class Benchmark { + + /** + * Compute the union between two sorted arrays + * @param set1 first sorted array + * @param set2 second sorted array + * @return merged array + */ + static public int[] unite2by2(final int[] set1, final int[] set2) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == set1.length) + return Arrays.copyOf(set2, set2.length); + if (0 == set2.length) + return Arrays.copyOf(set1, set1.length); + int[] buffer = new int[set1.length + set2.length]; + while (true) { + if (set1[k1] < set2[k2]) { + buffer[pos++] = set1[k1]; + ++k1; + if (k1 >= set1.length) { + for (; k2 < set2.length; ++k2) + buffer[pos++] = set2[k2]; + break; + } + } else if (set1[k1] == set2[k2]) { + buffer[pos++] = set1[k1]; + ++k1; + ++k2; + if (k1 >= set1.length) { + for (; k2 < set2.length; ++k2) + buffer[pos++] = set2[k2]; + break; + } + if (k2 >= set2.length) { + for (; k1 < set1.length; ++k1) + buffer[pos++] = set1[k1]; + break; + } + } else {// if (set1[k1]>set2[k2]) { + buffer[pos++] = set2[k2]; + ++k2; + if (k2 >= set2.length) { + for (; k1 < set1.length; ++k1) + buffer[pos++] = set1[k1]; + break; + } + } + } + return Arrays.copyOf(buffer, pos); + } + + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + //test(2, 24, 1); + test(100, 16, 1); + } + + @SuppressWarnings("javadoc") + public static void test(int N, int nbr, int repeat) { + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { + long bogus = 0; + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + System.out.println("# generating random data..."); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + System.out.println("# generating random data... ok."); + // building + bef = System.currentTimeMillis(); + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + int size = 0; + for (int r = 0; r < repeat; ++r) { + size = 0; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + size += ewah[k].sizeInBytes(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + size; + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = ewah[k].toArray(); + bogus += array.length; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = new int[ewah[k].cardinality()]; + int c = 0; + for (int x : ewah[k]) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + List L = ewah[k].getPositions(); + int[] array = new int[L.size()]; + int c = 0; + for (int x : L) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IntIterator iter = ewah[k].intIterator(); + while (iter.hasNext()) { + bogus += iter.next(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + // run sanity check + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); + EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); + if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); + } + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + bogus += IteratorUtil.materialize(ewahor).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahand = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahand = ewahand.and(ewah[j]); + } + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahand = EWAHCompressedBitmap + .and(ewahcp); + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); + EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); + EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); + if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); + } + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); + bogus += IteratorUtil.materialize(ewahand).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); + System.out.println(line); + System.out.println("# bogus =" + bogus); + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark32.java new file mode 100644 index 000000000..b221b911c --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark32.java @@ -0,0 +1,212 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.text.DecimalFormat; +import java.util.List; +import com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32; +import com.fr.third.googlecode.javaewah.FastAggregation; +import com.fr.third.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah32.IteratingRLW32; +import com.fr.third.googlecode.javaewah32.IteratorAggregation32; +import com.fr.third.googlecode.javaewah32.IteratorUtil32; + +/** + * This class is used to benchmark the performance EWAH. + * + * @author Daniel Lemire + */ +public class Benchmark32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(100, 16, 1); +// test(2, 24, 1); + } + + @SuppressWarnings("javadoc") + public static void test(int N, int nbr, int repeat) { + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { + long bogus = 0; + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + System.out.println("# generating random data..."); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + System.out.println("# generating random data... ok."); + // building + bef = System.currentTimeMillis(); + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + int size = 0; + for (int r = 0; r < repeat; ++r) { + size = 0; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + size += ewah[k].sizeInBytes(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + size; + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = ewah[k].toArray(); + bogus += array.length; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + int[] array = new int[ewah[k].cardinality()]; + int c = 0; + for (int x : ewah[k]) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + List L = ewah[k].getPositions(); + int[] array = new int[L.size()]; + int c = 0; + for (int x : L) + array[c++] = x; + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // uncompressing + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IntIterator iter = ewah[k].intIterator(); + while (iter.hasNext()) { + bogus += iter.next(); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); + bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + line += "\t\t\t"; + // logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahand = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahand = ewahand.and(ewah[j]); + } + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 + .and(ewahcp); + bogus += ewahand.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical and + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j].getIteratingRLW(); + } + IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); + bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); + System.out.println(line); + System.out.println("# bogus =" + bogus); + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection.java new file mode 100644 index 000000000..943c76bc6 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection.java @@ -0,0 +1,130 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.fr.third.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical and (intersection) aggregate. + */ +public class BenchmarkIntersection { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc"}) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + + EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); + if (!answer.equals(ewahand)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.and"); + EWAHCompressedBitmap ewahand2 = FastAggregation + .bufferedand(65536,ewah); + if (!ewahand.equals(ewahand2)) + throw new RuntimeException( + "bug FastAggregation.bufferedand "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.and(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .and(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedand(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection32.java new file mode 100644 index 000000000..a36ba889b --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection32.java @@ -0,0 +1,130 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.fr.third.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical and (intersection) aggregate. + */ +public class BenchmarkIntersection32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + int[] inter = cdg.generateClustered(1 << (nbr/2), Max); + for (int k = 0; k < N; ++k) + data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.and(ewah[k]); + + EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); + if (!answer.equals(ewahand)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.and"); + EWAHCompressedBitmap32 ewahand2 = FastAggregation32 + .bufferedand(65536,ewah); + if (!ewahand.equals(ewahand2)) + throw new RuntimeException( + "bug FastAggregation.bufferedand "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.and(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .and(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedand(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32( + ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion.java new file mode 100644 index 000000000..55453bbe8 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion.java @@ -0,0 +1,164 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.fr.third.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical or (union) aggregate. + */ +public class BenchmarkUnion { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc", "deprecation" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.or(ewah[k]); + + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); + if (!answer.equals(ewahor)) + throw new RuntimeException( + "bug EWAHCompressedBitmap.or"); + EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); + if (!ewahor.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.or"); + EWAHCompressedBitmap ewahor2 = FastAggregation + .bufferedor(65536,ewah); + if (!ewahor.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedor "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = EWAHCompressedBitmap + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap x = new EWAHCompressedBitmap(); + FastAggregation.legacy_orWithContainer(x, ewahcp); + bogus += x.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion32.java new file mode 100644 index 000000000..249354944 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion32.java @@ -0,0 +1,165 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; + +import com.fr.third.googlecode.javaewah.FastAggregation; +import com.fr.third.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical or (union) aggregate. + */ +public class BenchmarkUnion32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + } + + @SuppressWarnings({ "javadoc", "deprecation" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if(true){ + EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); + for(int k = 2; k < ewah.length; ++k) + answer = answer.or(ewah[k]); + + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewah); + if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); + EWAHCompressedBitmap32 ewahor3 = FastAggregation + .or(ewah); + if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); + EWAHCompressedBitmap32 ewahor2 = FastAggregation32 + .bufferedor(65536,ewah); + if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); + + } + + // logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.or(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation + .or(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); + FastAggregation32.legacy_orWithContainer(x, ewahcp); + bogus += x.sizeInBits(); + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical or + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32 + .bufferedor(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR.java new file mode 100644 index 000000000..7f036d690 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR.java @@ -0,0 +1,134 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; +import com.fr.third.googlecode.javaewah.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical xor aggregate. + */ +public class BenchmarkXOR { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + //test(10, 18, 1); + test(2, 22, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.xor(ewah[k]); + EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); + if (!answer.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.xor"); + EWAHCompressedBitmap ewahor2 = FastAggregation + .bufferedxor(65536,ewah); + if (!answer.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedxor "); + EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); + if (!answer.equals(iwah)) + throw new RuntimeException( + "bug xor it "); + + + } + + // logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.xor(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .xor(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap ewahor = FastAggregation + .bufferedxor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord( + ewah[j]); + } + IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); + int wordcounter = IteratorUtil.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + System.out + .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR32.java new file mode 100644 index 000000000..4fb057fab --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR32.java @@ -0,0 +1,137 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +import java.text.DecimalFormat; + +import com.fr.third.googlecode.javaewah.FastAggregation; +import com.fr.third.googlecode.javaewah32.*; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * To benchmark the logical xor aggregate. + */ +public class BenchmarkXOR32 { + + @SuppressWarnings("javadoc") + public static void main(String args[]) { + test(10, 18, 1); + //test(2, 22, 1); + } + + @SuppressWarnings({ "javadoc" }) + public static void test(int N, int nbr, int repeat) { + long bogus = 0; + + DecimalFormat df = new DecimalFormat("0.###"); + ClusteredDataGenerator cdg = new ClusteredDataGenerator(); + for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { + for (int times = 0; times < 2; ++times) { + String line = ""; + long bef, aft; + line += sparsity; + int[][] data = new int[N][]; + int Max = (1 << (nbr + sparsity)); + for (int k = 0; k < N; ++k) + data[k] = cdg.generateClustered(1 << nbr, Max); + // building + EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; + for (int k = 0; k < N; ++k) { + ewah[k] = new EWAHCompressedBitmap32(); + for (int x = 0; x < data[k].length; ++x) { + ewah[k].set(data[k][x]); + } + data[k] = null; + } + // sanity check + if (true) { + EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); + for (int k = 2; k < ewah.length; ++k) + answer = answer.xor(ewah[k]); + EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); + if (!answer.equals(ewahor3)) + throw new RuntimeException("bug FastAggregation.xor"); + EWAHCompressedBitmap32 ewahor2 = FastAggregation32 + .bufferedxor(65536,ewah); + if (!answer.equals(ewahor2)) + throw new RuntimeException( + "bug FastAggregation.bufferedxor "); + EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); + if (!answer.equals(iwah)) + throw new RuntimeException( + "bug xor it "); + + } + + // logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32 ewahor = ewah[0]; + for (int j = 1; j < k + 1; ++j) { + ewahor = ewahor.xor(ewah[j]); + } + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation + .xor(ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = ewah[j]; + } + EWAHCompressedBitmap32 ewahor = FastAggregation32 + .bufferedxor(65536,ewahcp); + bogus += ewahor.sizeInBits(); + } + aft = System.currentTimeMillis(); + line += "\t" + df.format((aft - bef) / 1000.0); + + // fast logical xor + bef = System.currentTimeMillis(); + for (int r = 0; r < repeat; ++r) + for (int k = 0; k < N; ++k) { + IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; + for (int j = 0; j < k + 1; ++j) { + ewahcp[j] = new IteratingBufferedRunningLengthWord32( + ewah[j]); + } + IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); + int wordcounter = IteratorUtil32.cardinality(ewahor); + bogus += wordcounter; + } + aft = System.currentTimeMillis(); + + line += "\t" + df.format((aft - bef) / 1000.0); + + + System.out + .println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); + + System.out.println(line); + } + System.out.println("# bogus =" + bogus); + + } + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/ClusteredDataGenerator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/ClusteredDataGenerator.java new file mode 100644 index 000000000..29078ca21 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/ClusteredDataGenerator.java @@ -0,0 +1,78 @@ +package com.fr.third.googlecode.javaewah.benchmark; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + + +/** + * This class will generate lists of random integers with a "clustered" distribution. + * Reference: + * Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. + * + * @author Daniel Lemire + */ +public class ClusteredDataGenerator { + + /** + * + */ +public ClusteredDataGenerator() { + this.unidg = new UniformDataGenerator(); + } + + /** + * @param seed random seed + */ +public ClusteredDataGenerator(final int seed) { + this.unidg = new UniformDataGenerator(seed); +} + +/** + * generates randomly N distinct integers from 0 to Max. + * @param N number of integers + * @param Max maximum integer value + * @return a randomly generated array + */ + public int[] generateClustered(int N, int Max) { + int[] array = new int[N]; + fillClustered(array, 0, N, 0, Max); + return array; + } + + void fillClustered(int[] array, int offset, int length, int Min, int Max) { + final int range = Max - Min; + if ((range == length) || (length <= 10)) { + fillUniform(array, offset, length, Min, Max); + return; + } + final int cut = length / 2 + + ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0); + final double p = this.unidg.rand.nextDouble(); + if (p < 0.25) { + fillUniform(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length - length / 2, Min + cut, + Max); + } else if (p < 0.5) { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillUniform(array, offset + length / 2, length - length / 2, Min + cut, + Max); + } else { + fillClustered(array, offset, length / 2, Min, Min + cut); + fillClustered(array, offset + length / 2, length - length / 2, Min + cut, + Max); + } + } + + void fillUniform(int[] array, int offset, int length, int Min, int Max) { + int[] v = this.unidg.generateUniform(length, Max - Min); + for (int k = 0; k < v.length; ++k) + array[k + offset] = Min + v[k]; + } + + UniformDataGenerator unidg; + +} + diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/UniformDataGenerator.java b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/UniformDataGenerator.java new file mode 100644 index 000000000..6c91ea67e --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/UniformDataGenerator.java @@ -0,0 +1,114 @@ +package com.fr.third.googlecode.javaewah.benchmark; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.Arrays; +import java.util.BitSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Random; + +/** + * This class will generate "uniform" lists of random integers. + * + * @author Daniel Lemire + */ +public class UniformDataGenerator { + /** + * construct generator of random arrays. + */ + public UniformDataGenerator() { + this.rand = new Random(); + } + + /** + * @param seed random seed + */ + public UniformDataGenerator(final int seed) { + this.rand = new Random(seed); + } + + /** + * generates randomly N distinct integers from 0 to Max. + */ + int[] generateUniformHash(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + HashSet s = new HashSet(); + while (s.size() < N) + s.add(new Integer(this.rand.nextInt(Max))); + Iterator i = s.iterator(); + for (int k = 0; k < N; ++k) + ans[k] = i.next().intValue(); + Arrays.sort(ans); + return ans; + } + + /** + * output all integers from the range [0,Max) that are not + * in the array + */ + static int[] negate(int[] x, int Max) { + int[] ans = new int[Max - x.length]; + int i = 0; + int c = 0; + for (int j = 0; j < x.length; ++j) { + int v = x[j]; + for (; i < v; ++i) + ans[c++] = i; + ++i; + } + while (c < ans.length) + ans[c++] = i++; + return ans; + } + + + /** + * generates randomly N distinct integers from 0 to Max. + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + public int[] generateUniform(int N, int Max) { + if(N * 2 > Max) { + return negate( generateUniform(Max - N, Max), Max ); + } + if (2048 * N > Max) + return generateUniformBitmap(N, Max); + return generateUniformHash(N, Max); + } + + /** + * generates randomly N distinct integers from 0 to Max using a bitmap. + * @param N Number of integers to generate + * @param Max Maximum value of the integers + * @return array containing random integers + */ + int[] generateUniformBitmap(int N, int Max) { + if (N > Max) + throw new RuntimeException("not possible"); + int[] ans = new int[N]; + BitSet bs = new BitSet(Max); + int cardinality = 0; + while (cardinality < N) { + int v = this.rand.nextInt(Max); + if (!bs.get(v)) { + bs.set(v); + cardinality++; + } + } + int pos = 0; + for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { + ans[pos++] = i; + } + return ans; + } + + Random rand = new Random(); + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/BitCounter32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/BitCounter32.java new file mode 100644 index 000000000..53a9a0ee5 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/BitCounter32.java @@ -0,0 +1,102 @@ +package com.fr.third.googlecode.javaewah32; + + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz + * Licensed under the Apache License, Version 2.0. + */ +/** + * BitCounter is a fake bitset data structure. Instead of storing the actual data, + * it only records the number of set bits. + * + * @since 0.5.0 + * @author Daniel Lemire and David McIntosh + */ + +public final class BitCounter32 implements BitmapStorage32 { + + /** + * Virtually add words directly to the bitmap + * + * @param newdata the word + */ + // @Override : causes problems with Java 1.5 + @Override +public void add(final int newdata) { + this.oneBits += Integer.bitCount(newdata); + } + + + /** + * virtually add several literal words. + * + * @param data the literal words + * @param start the starting point in the array + * @param number the number of literal words to add + */ + // @Override : causes problems with Java 1.5 + @Override +public void addStreamOfLiteralWords(int[] data, int start, int number) { + for(int i=start;i iterator) { + this.masteriterator = iterator; + if(this.masteriterator.hasNext()) { + this.iterator = this.masteriterator.next(); + this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + } + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the number of words to be discarded + */ + @Override + public void discardFirstWords(int x) { + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.next()) { + break; + } + } + } + } + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override + public boolean next() { + if (!this.iterator.hasNext()) { + if(!reload()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + private boolean reload() { + if(!this.masteriterator.hasNext()) { + return false; + } + this.iterator = this.masteriterator.next(); + this.buffer = this.iterator.buffer(); + return true; + } + + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override + public int getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override + public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override + public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override + public int getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the size + */ + @Override + public int size() { + return this.brlw.size(); + } + + @Override + public BufferedIterator32 clone() throws CloneNotSupportedException { + BufferedIterator32 answer = (BufferedIterator32) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + answer.masteriterator = this.masteriterator.clone(); + return answer; + } + + private BufferedRunningLengthWord32 brlw; + private int[] buffer; + private int literalWordStartPosition; + private EWAHIterator32 iterator; + private CloneableIterator masteriterator; + } \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedRunningLengthWord32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedRunningLengthWord32.java new file mode 100644 index 000000000..80b665e81 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedRunningLengthWord32.java @@ -0,0 +1,174 @@ +package com.fr.third.googlecode.javaewah32; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + + + +/** + * Mostly for internal use. Similar to RunningLengthWord, but can + * be modified without access to the array, and has faster access. + * + * @author Daniel Lemire + * @since 0.5.0 + * + */ +public final class BufferedRunningLengthWord32 implements Cloneable { + + /** + * Instantiates a new buffered running length word. + * + * @param a the word + */ + public BufferedRunningLengthWord32(final int a) { + this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); + } + + /** + * Instantiates a new buffered running length word. + * + * @param rlw the rlw + */ + public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { + this(rlw.parent.buffer[rlw.position]); + } + + /** + * Discard first words. + * + * @param x the number of words to be discarded + */ + public void discardFirstWords(int x) { + if (this.RunningLength >= x) { + this.RunningLength -= x; + return; + } + x -= this.RunningLength; + this.RunningLength = 0; + this.literalwordoffset += x; + this.NumberOfLiteralWords -= x; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return this.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return this.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public int getRunningLength() { + return this.RunningLength; + } + + /** + * Reset the values using the provided word. + * + * @param a the word + */ + public void reset(final int a) { + this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); + this.RunningBit = (a & 1) != 0; + this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); + this.literalwordoffset = 0; + } + + /** + * Reset the values of this running length word so that it has the same values + * as the other running length word. + * + * @param rlw the other running length word + */ + public void reset(final RunningLengthWord32 rlw) { + reset(rlw.parent.buffer[rlw.position]); + } + + /** + * Sets the number of literal words. + * + * @param number the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.NumberOfLiteralWords = number; + } + + /** + * Sets the running bit. + * + * @param b the new running bit + */ + public void setRunningBit(final boolean b) { + this.RunningBit = b; + } + + /** + * Sets the running length. + * + * @param number the new running length + */ + public void setRunningLength(final int number) { + this.RunningLength = number; + } + + /** + * Size in uncompressed words. + * + * @return the int + */ + public int size() { + return this.RunningLength + this.NumberOfLiteralWords; + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + " running length = " + + getRunningLength() + " number of lit. words " + + getNumberOfLiteralWords(); + } + + @Override +public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { + BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); + answer.literalwordoffset = this.literalwordoffset; + answer.NumberOfLiteralWords = this.NumberOfLiteralWords; + answer.RunningBit = this.RunningBit; + answer.RunningLength = this.RunningLength; + return answer; + } + + /** how many literal words have we read so far? */ + public int literalwordoffset = 0; + + /** The Number of literal words. */ + public int NumberOfLiteralWords; + + /** The Running bit. */ + public boolean RunningBit; + + /** The Running length. */ + public int RunningLength; + + +} \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHCompressedBitmap32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHCompressedBitmap32.java new file mode 100644 index 000000000..56259ffee --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHCompressedBitmap32.java @@ -0,0 +1,1608 @@ +package com.fr.third.googlecode.javaewah32; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +import java.util.*; +import java.io.*; + +import com.fr.third.googlecode.javaewah.IntIterator; +import com.fr.third.googlecode.javaewah.LogicalElement; + + +/** + *

+ * This implements the patent-free EWAH scheme. Roughly speaking, it is a 32-bit + * variant of the BBC compression scheme used by Oracle for its bitmap indexes. + *

+ * + *

+ * In contrast with the 64-bit EWAH scheme (javaewah.EWAHCompressedBitmap), you + * can expect this class to compress better, but to be slower at processing the + * data. In effect, there is a trade-off between memory usage and performances. + *

+ * + * @see com.fr.third.googlecode.javaewah.EWAHCompressedBitmap + * + *

+ * The objective of this compression type is to provide some compression, + * while reducing as much as possible the CPU cycle usage. + *

+ * + * + *

+ * For more details, see the following paper: + *

+ * + *
    + *
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves + * word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages + * 3-28, 2010. http://arxiv.org/abs/0901.3751
  • + *
+ * + * @since 0.5.0 + */ +public final class EWAHCompressedBitmap32 implements Cloneable, Externalizable, + Iterable, BitmapStorage32, LogicalElement { + + /** + * Creates an empty bitmap (no bit set to true). + */ + public EWAHCompressedBitmap32() { + this.buffer = new int[defaultbuffersize]; + this.rlw = new RunningLengthWord32(this, 0); + } + + /** + * Sets explicitly the buffer size (in 32-bit words). The initial memory usage + * will be "buffersize * 32". For large poorly compressible bitmaps, using + * large values may improve performance. + * + * @param buffersize + * number of 32-bit words reserved when the object is created) + */ + public EWAHCompressedBitmap32(final int buffersize) { + this.buffer = new int[buffersize]; + this.rlw = new RunningLengthWord32(this, 0); + } + + /** + * Adding words directly to the bitmap (for expert use). + * + * This is normally how you add data to the array. So you add bits in streams + * of 4*8 bits. + * + * Example: if you add 321, you are have added (in binary notation) + * 0b101000001, so you have effectively called set(0), set(6), set(8) + * in sequence. + * + * @param newdata + * the word + */ + @Override +public void add(final int newdata) { + add(newdata, wordinbits); + } + + /** + * Adding words directly to the bitmap (for expert use). + * + * @param newdata + * the word + * @param bitsthatmatter + * the number of significant bits (by default it should be 32) + */ + public void add(final int newdata, final int bitsthatmatter) { + this.sizeinbits += bitsthatmatter; + if (newdata == 0) { + addEmptyWord(false); + } else if (newdata == ~0) { + addEmptyWord(true); + } else { + addLiteralWord(newdata); + } + } + + /** + * For internal use. + * + * @param v + * the boolean value + * @return the storage cost of the addition + */ + private int addEmptyWord(final boolean v) { + final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0); + final int runlen = this.rlw.getRunningLength(); + if ((noliteralword) && (runlen == 0)) { + this.rlw.setRunningBit(v); + } + if ((noliteralword) && (this.rlw.getRunningBit() == v) + && (runlen < RunningLengthWord32.largestrunninglengthcount)) { + this.rlw.setRunningLength(runlen + 1); + return 0; + } + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(1); + return 1; + } + + /** + * For internal use. + * + * @param newdata + * the literal word + * @return the storage cost of the addition + */ + private int addLiteralWord(final int newdata) { + final int numbersofar = this.rlw.getNumberOfLiteralWords(); + if (numbersofar >= RunningLengthWord32.largestliteralcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + this.rlw.setNumberOfLiteralWords(1); + push_back(newdata); + return 2; + } + this.rlw.setNumberOfLiteralWords(numbersofar + 1); + push_back(newdata); + return 1; + } + + /** + * if you have several literal words to copy over, this might be faster. + * + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + @Override +public void addStreamOfLiteralWords(final int[] data, final int start, + final int number) { + int leftovernumber = number; + while (leftovernumber > 0) { + final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords ? leftovernumber + : RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + + whatwecanadd); + leftovernumber -= whatwecanadd; + push_back(data, start, whatwecanadd); + this.sizeinbits += whatwecanadd * wordinbits; + if (leftovernumber > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + } + } + } + + /** + * For experts: You want to add many zeroes or ones? This is the method you + * use. + * + * @param v + * the boolean value + * @param number + * the number + */ + @Override +public void addStreamOfEmptyWords(final boolean v, int number) { + if (number == 0) + return; + this.sizeinbits += number * wordinbits; + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) + || (this.rlw.getRunningBit() != v)) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + } + final int runlen = this.rlw.getRunningLength(); + final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount + - runlen ? number : RunningLengthWord32.largestrunninglengthcount + - runlen; + this.rlw.setRunningLength(runlen + whatwecanadd); + number -= whatwecanadd; + while (number >= RunningLengthWord32.largestrunninglengthcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); + number -= RunningLengthWord32.largestrunninglengthcount; + } + if (number > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(number); + } + } + + /** + * Same as addStreamOfLiteralWords, but the words are negated. + * + * @param data + * the literal words + * @param start + * the starting point in the array + * @param number + * the number of literal words to add + */ + @Override +public void addStreamOfNegatedLiteralWords(final int[] data, final int start, + final int number) { + int leftovernumber = number; + while (leftovernumber > 0) { + final int NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords(); + final int whatwecanadd = leftovernumber < RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords ? leftovernumber + : RunningLengthWord32.largestliteralcount + - NumberOfLiteralWords; + this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + + whatwecanadd); + leftovernumber -= whatwecanadd; + negative_push_back(data, start, whatwecanadd); + this.sizeinbits += whatwecanadd * wordinbits; + if (leftovernumber > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + } + } + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap32 and(final EWAHCompressedBitmap32 a) { + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + container + .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords + : a.actualsizeinwords); + andToContainer(a, container); + return container; + } + + /** + * Computes new compressed bitmap containing the bitwise AND values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @param a + * the other bitmap + * @param container + * where we store the result + */ + /** + * Computes new compressed bitmap containing the bitwise AND values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @since 0.4.0 + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void andToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final int index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + if (adjustContainerSizeWhenAggregating) { + final boolean i_remains = rlwi.size() > 0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi + : rlwj; + remaining.dischargeAsEmpty(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + } + + + /** + * Returns the cardinality of the result of a bitwise AND of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @param a + * the other bitmap + * @return the cardinality + */ + public int andCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + andToContainer(a, counter); + return counter.getCount(); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap32 andNot(final EWAHCompressedBitmap32 a) { + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + container + .reserve(this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords + : a.actualsizeinwords); + andNotToContainer(a, container); + return container; + } + + /** + * Returns a new compressed bitmap containing the bitwise AND NOT values of + * the current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @param a the other bitmap + * @param container where we store the result + */ + public void andNotToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + final EWAHIterator32 i = getEWAHIterator(); + final EWAHIterator32 j = a.getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if ( ((predator.getRunningBit() == true) && (i_is_prey)) + || ((predator.getRunningBit() == false) && (!i_is_prey))){ + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else if (i_is_prey) { + int index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + int index = prey.dischargeNegated(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & (~rlwj.getLiteralWordAt(k))); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; + if(i_remains) + remaining.discharge(container); + else if (adjustContainerSizeWhenAggregating) + remaining.dischargeAsEmpty(container); + if (adjustContainerSizeWhenAggregating) + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + + } + + /** + * Returns the cardinality of the result of a bitwise AND NOT of the values of + * the current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @param a + * the other bitmap + * @return the cardinality + */ + public int andNotCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + andNotToContainer(a, counter); + return counter.getCount(); + } + + /** + * reports the number of bits set to true. Running time is proportional to + * compressed size (as reported by sizeInBytes). + * + * @return the number of bits set to true + */ + public int cardinality() { + int counter = 0; + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + counter += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + counter += Integer.bitCount(i.buffer()[i.literalWords() + j]); + } + } + return counter; + } + + /** + * Clear any set bits and set size in bits back to 0 + */ + public void clear() { + this.sizeinbits = 0; + this.actualsizeinwords = 1; + this.rlw.position = 0; + // buffer is not fully cleared but any new set operations should overwrite + // stale data + this.buffer[0] = 0; + } + + /* + * @see java.lang.Object#clone() + */ + @Override + public EWAHCompressedBitmap32 clone() throws CloneNotSupportedException { + final EWAHCompressedBitmap32 clone = (EWAHCompressedBitmap32) super.clone(); + clone.buffer = this.buffer.clone(); + clone.actualsizeinwords = this.actualsizeinwords; + clone.sizeinbits = this.sizeinbits; + return clone; + } + + /** + * Deserialize. + * + * @param in + * the DataInput stream + * @throws IOException + * Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.sizeinbits = in.readInt(); + this.actualsizeinwords = in.readInt(); + if (this.buffer.length < this.actualsizeinwords) { + this.buffer = new int[this.actualsizeinwords]; + } + for (int k = 0; k < this.actualsizeinwords; ++k) + this.buffer[k] = in.readInt(); + this.rlw = new RunningLengthWord32(this, in.readInt()); + } + + /** + * Check to see whether the two compressed bitmaps contain the same set bits. + * + * @see Object#equals(Object) + */ + @Override + public boolean equals(Object o) { + if (o instanceof EWAHCompressedBitmap32) { + try { + this.xorToContainer((EWAHCompressedBitmap32) o, new NonEmptyVirtualStorage32()); + return true; + } catch (NonEmptyVirtualStorage32.NonEmptyException e) { + return false; + } + } + return false; + } + + /** + * For experts: You want to add many zeroes or ones faster? + * + * This method does not update sizeinbits. + * + * @param v + * the boolean value + * @param number + * the number (must be greater than 0) + */ + private void fastaddStreamOfEmptyWords(final boolean v, int number) { + if ((this.rlw.getRunningBit() != v) && (this.rlw.size() == 0)) { + this.rlw.setRunningBit(v); + } else if ((this.rlw.getNumberOfLiteralWords() != 0) + || (this.rlw.getRunningBit() != v)) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + } + final int runlen = this.rlw.getRunningLength(); + final int whatwecanadd = number < RunningLengthWord32.largestrunninglengthcount + - runlen ? number : RunningLengthWord32.largestrunninglengthcount + - runlen; + this.rlw.setRunningLength(runlen + whatwecanadd); + number -= whatwecanadd; + while (number >= RunningLengthWord32.largestrunninglengthcount) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(RunningLengthWord32.largestrunninglengthcount); + number -= RunningLengthWord32.largestrunninglengthcount; + } + if (number > 0) { + push_back(0); + this.rlw.position = this.actualsizeinwords - 1; + if (v) + this.rlw.setRunningBit(v); + this.rlw.setRunningLength(number); + } + } + + /** + * Gets an EWAHIterator over the data. This is a customized iterator which + * iterates over run length word. For experts only. + * + * @return the EWAHIterator + */ + public EWAHIterator32 getEWAHIterator() { + return new EWAHIterator32(this, this.actualsizeinwords); + } + + /** + * @return the IteratingRLW iterator corresponding to this bitmap + */ + public IteratingRLW32 getIteratingRLW() { + return new IteratingBufferedRunningLengthWord32(this); + } + + /** + * get the locations of the true values as one vector. (may use more memory + * than iterator()) + * + * @return the positions + */ + public List getPositions() { + final ArrayList v = new ArrayList(); + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + int pos = 0; + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + for (int j = 0; j < localrlw.getRunningLength(); ++j) { + for (int c = 0; c < wordinbits; ++c) + v.add(new Integer(pos++)); + } + } else { + pos += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + int data = i.buffer()[i.literalWords() + j]; + while (data != 0) { + final int ntz = Integer.numberOfTrailingZeros(data); + data ^= (1 << ntz); + v.add(new Integer(ntz + pos)); + } + pos += wordinbits; + } + } + while ((v.size() > 0) + && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) + v.remove(v.size() - 1); + return v; + } + + /** + * Returns a customized hash code (based on Karp-Rabin). Naturally, if the + * bitmaps are equal, they will hash to the same value. + * + */ + @Override + public int hashCode() { + int karprabin = 0; + final int B = 31; + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + while( i.hasNext() ) { + i.next(); + if (i.rlw.getRunningBit() == true) { + karprabin += B * karprabin + i.rlw.getRunningLength(); + } + for (int k = 0; k < i.rlw.getNumberOfLiteralWords(); ++k) { + karprabin += B * karprabin + this.buffer[k + i.literalWords()]; + } + } + return karprabin; + } + + /** + * Return true if the two EWAHCompressedBitmap have both at least one true bit + * in the same position. Equivalently, you could call "and" and check whether + * there is a set bit, but intersects will run faster if you don't need the + * result of the "and" operation. + * + * @param a + * the other bitmap + * @return whether they intersect + */ + public boolean intersects(final EWAHCompressedBitmap32 a) { + NonEmptyVirtualStorage32 nevs = new NonEmptyVirtualStorage32(); + try { + this.andToContainer(a, nevs); + } catch (NonEmptyVirtualStorage32.NonEmptyException nee) { + return true; + } + return false; + } + + /** + * Iterator over the set bits (this is what most people will want to use to + * browse the content if they want an iterator). The location of the set bits + * is returned, in increasing order. + * + * @return the int iterator + */ + public IntIterator intIterator() { + return new IntIteratorImpl32( + new EWAHIterator32(this, this.actualsizeinwords)); + } + + /** + * iterate over the positions of the true values. This is similar to + * intIterator(), but it uses Java generics. + * + * @return the iterator + */ + @Override +public Iterator iterator() { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("bitsets do not support remove"); + } + + final private IntIterator under = intIterator(); + }; + } + + /** + * For internal use. + * + * @param data + * the array of words to be added + * @param start + * the starting point + * @param number + * the number of words to add + */ + private void negative_push_back(final int[] data, final int start, + final int number) { + while (this.actualsizeinwords + number >= this.buffer.length) { + final int oldbuffer[] = this.buffer; + if(this.actualsizeinwords + number < 32768) + this.buffer = new int[(this.actualsizeinwords + number) * 2]; + else if ((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + for (int k = 0; k < number; ++k) + this.buffer[this.actualsizeinwords + k] = ~data[start + k]; + this.actualsizeinwords += number; + } + + /** + * Negate (bitwise) the current bitmap. To get a negated copy, do + * EWAHCompressedBitmap x= ((EWAHCompressedBitmap) mybitmap.clone()); x.not(); + * + * The running time is proportional to the compressed size (as reported by + * sizeInBytes()). + * + */ + @Override +public void not() { + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + if (!i.hasNext()) + return; + while (true) { + final RunningLengthWord32 rlw1 = i.next(); + rlw1.setRunningBit(!rlw1.getRunningBit()); + for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) { + i.buffer()[i.literalWords() + j] = ~i.buffer()[i.literalWords() + j]; + } + if (!i.hasNext()) {// must potentially adjust the last literal word + final int usedbitsinlast = this.sizeinbits % wordinbits; + if (usedbitsinlast == 0) + return; + + if (rlw1.getNumberOfLiteralWords() == 0) { + if((rlw1.getRunningLength()>0) && (rlw1.getRunningBit())) { + rlw1.setRunningLength(rlw1.getRunningLength()-1); + this.addLiteralWord((~0) >>> (wordinbits - usedbitsinlast)); + } + return; + } + i.buffer()[i.literalWords() + rlw1.getNumberOfLiteralWords() - 1] &= ((~0) >>> (wordinbits - usedbitsinlast)); + return; + } + + } + } + + + /** + * Returns a new compressed bitmap containing the bitwise OR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap32 or(final EWAHCompressedBitmap32 a) { + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + container.reserve(this.actualsizeinwords + a.actualsizeinwords); + orToContainer(a, container); + return container; + } + + /** + * Computes the bitwise or between the current bitmap and the bitmap "a". + * Stores the result in the container. + * + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void orToContainer(final EWAHCompressedBitmap32 a, final BitmapStorage32 container) { + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi + : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == true) { + container.addStreamOfEmptyWords(true, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + int index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) { + container.add(rlwi.getLiteralWordAt(k) | rlwj.getLiteralWordAt(k)); + } + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + /** + * Returns the cardinality of the result of a bitwise OR of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @param a + * the other bitmap + * @return the cardinality + */ + public int orCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + orToContainer(a, counter); + return counter.getCount(); + } + + /** + * For internal use. + * + * @param data + * the word to be added + */ + private void push_back(final int data) { + if (this.actualsizeinwords == this.buffer.length) { + final int oldbuffer[] = this.buffer; + if(oldbuffer.length < 32768) + this.buffer = new int[oldbuffer.length * 2]; + else if (oldbuffer.length * 3 / 2 < oldbuffer.length) + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[oldbuffer.length * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + this.buffer[this.actualsizeinwords++] = data; + } + + /** + * For internal use. + * + * @param data + * the array of words to be added + * @param start + * the starting point + * @param number + * the number of words to add + */ + private void push_back(final int[] data, final int start, final int number) { + if (this.actualsizeinwords + number >= this.buffer.length) { + final int oldbuffer[] = this.buffer; + if(this.actualsizeinwords + number < 32768) + this.buffer = new int[(this.actualsizeinwords + number) * 2]; + else if((this.actualsizeinwords + number) * 3 / 2 < this.actualsizeinwords + number) //overflow + this.buffer = new int[Integer.MAX_VALUE]; + else + this.buffer = new int[(this.actualsizeinwords + number) * 3 / 2]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + } + System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number); + this.actualsizeinwords += number; + } + + /* + * @see java.io.Externalizable#readExternal(java.io.ObjectInput) + */ + @Override +public void readExternal(ObjectInput in) throws IOException { + deserialize(in); + } + + /** + * For internal use (trading off memory for speed). + * + * @param size + * the number of words to allocate + * @return True if the operation was a success. + */ + private boolean reserve(final int size) { + if (size > this.buffer.length) { + final int oldbuffer[] = this.buffer; + this.buffer = new int[size]; + System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length); + this.rlw.parent.buffer = this.buffer; + return true; + } + return false; + } + + /** + * Serialize. + * + * @param out + * the DataOutput stream + * @throws IOException + * Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + out.writeInt(this.sizeinbits); + out.writeInt(this.actualsizeinwords); + for (int k = 0; k < this.actualsizeinwords; ++k) + out.writeInt(this.buffer[k]); + out.writeInt(this.rlw.position); + } + + /** + * Report the size required to serialize this bitmap + * + * @return the size in bytes + */ + public int serializedSizeInBytes() { + return this.sizeInBytes() + 3 * 4; + } + + /** + * Query the value of a single bit. Relying on this method when speed is + * needed is discouraged. The complexity is linear with the size of the + * bitmap. + * + * (This implementation is based on zhenjl's Go version of JavaEWAH.) + * + * @param i + * the bit we are interested in + * @return whether the bit is set to true + */ + public boolean get(final int i) { + if ((i < 0) || (i >= this.sizeinbits)) + return false; + int WordChecked = 0; + final IteratingRLW32 j = getIteratingRLW(); + final int wordi = i / wordinbits; + while (WordChecked <= wordi) { + WordChecked += j.getRunningLength(); + if (wordi < WordChecked) { + return j.getRunningBit(); + } + if (wordi < WordChecked + j.getNumberOfLiteralWords()) { + final int w = j.getLiteralWordAt(wordi + - WordChecked); + return (w & (1 << i)) != 0; + } + WordChecked += j.getNumberOfLiteralWords(); + j.next(); + } + return false; + } + + /** + * Set the bit at position i to true, the bits must be set in (strictly) increasing + * order. For example, set(15) and then set(7) will fail. You must do set(7) + * and then set(15). + * + * @param i + * the index + * @return true if the value was set (always true when i is greater or equal to sizeInBits()). + * @throws IndexOutOfBoundsException + * if i is negative or greater than Integer.MAX_VALUE - 32 + */ + + public boolean set(final int i) { + if ((i > Integer.MAX_VALUE - wordinbits) || (i < 0)) + throw new IndexOutOfBoundsException("Set values should be between 0 and " + + (Integer.MAX_VALUE - wordinbits)); + if (i < this.sizeinbits) + return false; + // distance in words: + final int dist = (i + wordinbits) / wordinbits + - (this.sizeinbits + wordinbits - 1) / wordinbits; + this.sizeinbits = i + 1; + if (dist > 0) {// easy + if (dist > 1) + fastaddStreamOfEmptyWords(false, dist - 1); + addLiteralWord(1 << (i % wordinbits)); + return true; + } + if (this.rlw.getNumberOfLiteralWords() == 0) { + this.rlw.setRunningLength(this.rlw.getRunningLength() - 1); + addLiteralWord(1 << (i % wordinbits)); + return true; + } + this.buffer[this.actualsizeinwords - 1] |= 1 << (i % wordinbits); + if (this.buffer[this.actualsizeinwords - 1] == ~0) { + this.buffer[this.actualsizeinwords - 1] = 0; + --this.actualsizeinwords; + this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1); + // next we add one clean word + addEmptyWord(true); + } + return true; + } + + /** + * Set the size in bits. This does not change the compressed bitmap. + * + */ + @Override +public void setSizeInBits(final int size) { + if((size+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits!= (this.sizeinbits+EWAHCompressedBitmap32.wordinbits-1)/EWAHCompressedBitmap32.wordinbits) + throw new RuntimeException("You can only reduce the size of the bitmap within the scope of the last word. To extend the bitmap, please call setSizeInbits(int,boolean): "+size+" "+this.sizeinbits); + this.sizeinbits = size; + } + + /** + * Change the reported size in bits of the *uncompressed* bitmap represented + * by this compressed bitmap. It may change the underlying compressed bitmap. + * It is not possible to reduce the sizeInBits, but + * it can be extended. The new bits are set to false or true depending on the + * value of defaultvalue. + * + * @param size + * the size in bits + * @param defaultvalue + * the default boolean value + * @return true if the update was possible + */ + public boolean setSizeInBits(final int size, final boolean defaultvalue) { + if (size < this.sizeinbits) + return false; + if (defaultvalue == false) + extendEmptyBits(this, this.sizeinbits, size); + else { + // next bit could be optimized + while (((this.sizeinbits % wordinbits) != 0) && (this.sizeinbits < size)) { + this.set(this.sizeinbits); + } + this.addStreamOfEmptyWords(defaultvalue, (size / wordinbits) + - this.sizeinbits / wordinbits); + // next bit could be optimized + while (this.sizeinbits < size) { + this.set(this.sizeinbits); + } + } + this.sizeinbits = size; + return true; + } + + /** + * Returns the size in bits of the *uncompressed* bitmap represented by this + * compressed bitmap. Initially, the sizeInBits is zero. It is extended + * automatically when you set bits to true. + * + * @return the size in bits + */ + @Override +public int sizeInBits() { + return this.sizeinbits; + } + + /** + * Report the *compressed* size of the bitmap (equivalent to memory usage, + * after accounting for some overhead). + * + * @return the size in bytes + */ + @Override +public int sizeInBytes() { + return this.actualsizeinwords * (wordinbits / 8); + } + + /** + * Populate an array of (sorted integers) corresponding to the location of the + * set bits. + * + * @return the array containing the location of the set bits + */ + public int[] toArray() { + int[] ans = new int[this.cardinality()]; + int inanspos = 0; + int pos = 0; + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + for (int j = 0; j < localrlw.getRunningLength(); ++j) { + for (int c = 0; c < wordinbits; ++c) { + ans[inanspos++] = pos++; + } + } + } else { + pos += wordinbits * localrlw.getRunningLength(); + } + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + int data = i.buffer()[i.literalWords() + j]; + if (!usetrailingzeros) { + for (int c = 0; c < wordinbits; ++c) { + if ((data & (1 << c)) != 0) + ans[inanspos++] = c + pos; + } + pos += wordinbits; + } else { + while (data != 0) { + final int ntz = Integer.numberOfTrailingZeros(data); + data ^= (1l << ntz); + ans[inanspos++] = ntz + pos; + } + pos += wordinbits; + } + } + } + return ans; + + } + + /** + * A more detailed string describing the bitmap (useful for debugging). + * + * @return the string + */ + public String toDebugString() { + String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + + " size in words = " + this.actualsizeinwords + "\n"; + final EWAHIterator32 i = new EWAHIterator32(this, + this.actualsizeinwords); + while (i.hasNext()) { + RunningLengthWord32 localrlw = i.next(); + if (localrlw.getRunningBit()) { + ans += localrlw.getRunningLength() + " 1x11\n"; + } else { + ans += localrlw.getRunningLength() + " 0x00\n"; + } + ans += localrlw.getNumberOfLiteralWords() + " dirties\n"; + for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) { + int data = i.buffer()[i.literalWords() + j]; + ans += "\t" + data + "\n"; + } + } + return ans; + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + StringBuffer answer = new StringBuffer(); + IntIterator i = this.intIterator(); + answer.append("{"); + if (i.hasNext()) + answer.append(i.next()); + while (i.hasNext()) { + answer.append(","); + answer.append(i.next()); + } + answer.append("}"); + return answer.toString(); + } + /** + * swap the content of the bitmap with another. + * + * @param other + * bitmap to swap with + */ + public void swap(final EWAHCompressedBitmap32 other) { + int[] tmp = this.buffer; + this.buffer = other.buffer; + other.buffer = tmp; + + int tmp2 = this.rlw.position; + this.rlw.position = other.rlw.position; + other.rlw.position = tmp2; + + int tmp3 = this.actualsizeinwords; + this.actualsizeinwords = other.actualsizeinwords; + other.actualsizeinwords = tmp3; + + int tmp4 = this.sizeinbits; + this.sizeinbits = other.sizeinbits; + other.sizeinbits = tmp4; + } + /** + * Reduce the internal buffer to its minimal allowable size (given + * by this.actualsizeinwords). This can free memory. + */ + public void trim() { + this.buffer = Arrays.copyOf(this.buffer, this.actualsizeinwords); + } + + /* + * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput) + */ + @Override +public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param a + * the other bitmap + * @return the EWAH compressed bitmap + */ + @Override +public EWAHCompressedBitmap32 xor(final EWAHCompressedBitmap32 a) { + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + container.reserve(this.actualsizeinwords + a.actualsizeinwords); + xorToContainer(a, container); + return container; + } + + /** + * Computes a new compressed bitmap containing the bitwise XOR values of the + * current bitmap with some other bitmap. + * + * The running time is proportional to the sum of the compressed sizes (as + * reported by sizeInBytes()). + * + * @param a + * the other bitmap + * @param container + * where we store the result + */ + public void xorToContainer(final EWAHCompressedBitmap32 a, + final BitmapStorage32 container) { + final EWAHIterator32 i = a.getEWAHIterator(); + final EWAHIterator32 j = getEWAHIterator(); + final IteratingBufferedRunningLengthWord32 rlwi = new IteratingBufferedRunningLengthWord32(i); + final IteratingBufferedRunningLengthWord32 rlwj = new IteratingBufferedRunningLengthWord32(j); + while ((rlwi.size()>0) && (rlwj.size()>0)) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingBufferedRunningLengthWord32 prey = i_is_prey ? rlwi : rlwj; + final IteratingBufferedRunningLengthWord32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + int index = prey.discharge(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + int index = prey.dischargeNegated(container, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + final boolean i_remains = rlwi.size()>0; + final IteratingBufferedRunningLengthWord32 remaining = i_remains ? rlwi : rlwj; + remaining.discharge(container); + container.setSizeInBits(Math.max(sizeInBits(), a.sizeInBits())); + } + + /** + * Returns the cardinality of the result of a bitwise XOR of the values of the + * current bitmap with some other bitmap. Avoids needing to allocate an + * intermediate bitmap to hold the result of the OR. + * + * @param a + * the other bitmap + * @return the cardinality + */ + public int xorCardinality(final EWAHCompressedBitmap32 a) { + final BitCounter32 counter = new BitCounter32(); + xorToContainer(a, counter); + return counter.getCount(); + } + + /** + * For internal use. Computes the bitwise and of the provided bitmaps and + * stores the result in the container. + * + * @param container + * where the result is stored + * @param bitmaps + * bitmaps to AND + */ + public static void andWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) throw new IllegalArgumentException("Need at least one bitmap"); + if(bitmaps.length == 2) { + bitmaps[0].andToContainer(bitmaps[1],container); + return; + } + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length - 1; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + answer.andToContainer(bitmaps[bitmaps.length - 1], container); + } + + /** + * Returns a new compressed bitmap containing the bitwise AND values of the + * provided bitmaps. + * + * It may or may not be faster than doing the aggregation two-by-two (A.and(B).and(C)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to AND together + * @return result of the AND + */ + public static EWAHCompressedBitmap32 and( + final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; + if(bitmaps.length == 2) return bitmaps[0].and(bitmaps[1]); + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + EWAHCompressedBitmap32 tmp = new EWAHCompressedBitmap32(); + bitmaps[0].andToContainer(bitmaps[1], answer); + for(int k = 2; k < bitmaps.length; ++k) { + answer.andToContainer(bitmaps[k], tmp); + tmp.swap(answer); + tmp.clear(); + } + return answer; + } + + /** + * Returns the cardinality of the result of a bitwise AND of the values of the + * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold + * the result of the AND. + * + * @param bitmaps + * bitmaps to AND + * @return the cardinality + */ + public static int andCardinality(final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); + final BitCounter32 counter = new BitCounter32(); + andWithContainer(counter, bitmaps); + return counter.getCount(); + } + + + /** + * Return a bitmap with the bit set to true at the given + * positions. The positions should be given in sorted order. + * + * (This is a convenience method.) + * + * @since 0.4.5 + * @param setbits list of set bit positions + * @return the bitmap + */ + public static EWAHCompressedBitmap32 bitmapOf(int ... setbits) { + EWAHCompressedBitmap32 a = new EWAHCompressedBitmap32(); + for (int k : setbits) + a.set(k); + return a; + } + + + + + /** + * For internal use. This simply adds a stream of words made of zeroes so that + * we pad to the desired size. + * + * @param storage + * bitmap to extend + * @param currentSize + * current size (in bits) + * @param newSize + * new desired size (in bits) + */ + private static void extendEmptyBits(final BitmapStorage32 storage, + final int currentSize, final int newSize) { + final int currentLeftover = currentSize % wordinbits; + final int finalLeftover = newSize % wordinbits; + storage.addStreamOfEmptyWords(false, (newSize / wordinbits) - currentSize + / wordinbits + (finalLeftover != 0 ? 1 : 0) + + (currentLeftover != 0 ? -1 : 0)); + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. + * @param container where store the result + * @param bitmaps to be aggregated + */ + public static void orWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + int size = 0; + int sinbits = 0; + for (EWAHCompressedBitmap32 b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation32.bufferedorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation32.orToContainer(container, bitmaps); + } + } + + /** + * For internal use. Computes the bitwise xor of the provided bitmaps and + * stores the result in the container. + * @param container where store the result + * @param bitmaps to be aggregated + */ + public static void xorWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length < 2) + throw new IllegalArgumentException("You should provide at least two bitmaps, provided "+bitmaps.length); + int size = 0; + int sinbits = 0; + for (EWAHCompressedBitmap32 b : bitmaps) { + size += b.sizeInBytes(); + if (sinbits < b.sizeInBits()) + sinbits = b.sizeInBits(); + } + if (size * 8 > sinbits) { + FastAggregation32.bufferedxorWithContainer(container, 65536, bitmaps); + } else { + FastAggregation32.xorToContainer(container, bitmaps); + } + } + + /** + * Returns a new compressed bitmap containing the bitwise OR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.or(B).or(C).or(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to OR together + * @return result of the OR + */ + public static EWAHCompressedBitmap32 or( + final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + int largestSize = 0; + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + orWithContainer(container, bitmaps); + return container; + } + + + /** + * Returns a new compressed bitmap containing the bitwise XOR values of the + * provided bitmaps. This is typically faster than doing the aggregation + * two-by-two (A.xor(B).xor(C).xor(D)). + * + * If only one bitmap is provided, it is returned as is. + * + * If you are not planning on adding to the resulting bitmap, you may call the trim() + * method to reduce memory usage. + * + * @param bitmaps + * bitmaps to XOR together + * @return result of the XOR + */ + public static EWAHCompressedBitmap32 xor( + final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0]; + final EWAHCompressedBitmap32 container = new EWAHCompressedBitmap32(); + int largestSize = 0; + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + largestSize = Math.max(bitmap.actualsizeinwords, largestSize); + } + container.reserve((int) (largestSize * 1.5)); + xorWithContainer(container, bitmaps); + return container; + } + + /** + * Returns the cardinality of the result of a bitwise OR of the values of the + * provided bitmaps. Avoids needing to allocate an intermediate bitmap to hold + * the result of the OR. + * + * @param bitmaps + * bitmaps to OR + * @return the cardinality + */ + public static int orCardinality(final EWAHCompressedBitmap32... bitmaps) { + if(bitmaps.length == 1) return bitmaps[0].cardinality(); + final BitCounter32 counter = new BitCounter32(); + orWithContainer(counter, bitmaps); + return counter.getCount(); + } + + /** The actual size in words. */ + int actualsizeinwords = 1; + + /** The buffer (array of 32-bit words) */ + int buffer[] = null; + + /** The current (last) running length word. */ + RunningLengthWord32 rlw = null; + + /** sizeinbits: number of bits in the (uncompressed) bitmap. */ + int sizeinbits = 0; + + /** + * The Constant defaultbuffersize: default memory allocation when the object + * is constructed. + */ + static final int defaultbuffersize = 4; + + /** optimization option **/ + public static final boolean usetrailingzeros = true; + + /** whether we adjust after some aggregation by adding in zeroes **/ + public static final boolean adjustContainerSizeWhenAggregating = true; + + /** The Constant wordinbits represents the number of bits in a int. */ + public static final int wordinbits = 32; + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHIterator32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHIterator32.java new file mode 100644 index 000000000..dee08341d --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHIterator32.java @@ -0,0 +1,98 @@ +package com.fr.third.googlecode.javaewah32; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * The class EWAHIterator represents a special type of + * efficient iterator iterating over (uncompressed) words of bits. + * + * @author Daniel Lemire + * @since 0.5.0 + * + */ +public final class EWAHIterator32 implements Cloneable { + + /** + * Instantiates a new eWAH iterator. + * + * @param a the array of words + * @param sizeinwords the number of words that are significant in the array of words + */ + public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { + this.rlw = new RunningLengthWord32(a, 0); + this.size = sizeinwords; + this.pointer = 0; + } + + /** + * Allow expert developers to instantiate an EWAHIterator. + * + * @param bitmap we want to iterate over + * @return an iterator + */ + public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { + return bitmap.getEWAHIterator(); + } + + /** + * Access to the array of words + * + * @return the int[] + */ + public int[] buffer() { + return this.rlw.parent.buffer; + } + + /** + * Position of the literal words represented by this running length word. + * + * @return the int + */ + public int literalWords() { + return this.pointer - this.rlw.getNumberOfLiteralWords(); + } + + /** + * Checks for next. + * + * @return true, if successful + */ + public boolean hasNext() { + return this.pointer < this.size; + } + + /** + * Next running length word. + * + * @return the running length word + */ + public RunningLengthWord32 next() { + this.rlw.position = this.pointer; + this.pointer += this.rlw.getNumberOfLiteralWords() + 1; + return this.rlw; + } + + @Override + public EWAHIterator32 clone() throws CloneNotSupportedException { + EWAHIterator32 ans = (EWAHIterator32) super.clone(); + ans.rlw = this.rlw.clone(); + ans.size = this.size; + ans.pointer = this.pointer; + return ans; + } + + /** The pointer represent the location of the current running length + * word in the array of words (embedded in the rlw attribute). */ + int pointer; + + /** The current running length word. */ + RunningLengthWord32 rlw; + + /** The size in words. */ + int size; + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/FastAggregation32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/FastAggregation32.java new file mode 100644 index 000000000..7ecd45fcd --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/FastAggregation32.java @@ -0,0 +1,377 @@ +package com.fr.third.googlecode.javaewah32; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.PriorityQueue; + + +/** + * Fast algorithms to aggregate many bitmaps. These algorithms are just given as + * reference. They may not be faster than the corresponding methods in the + * EWAHCompressedBitmap class. + * + * @author Daniel Lemire + * + */ +public class FastAggregation32 { + + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedand(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedandWithContainer(answer,bufsize, bitmaps); + return answer; + } + /** + * Compute the and aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) + * @param bitmaps the source bitmaps + */ + public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + + java.util.LinkedList al = new java.util.LinkedList(); + for (EWAHCompressedBitmap32 bitmap : bitmaps) { + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize*bitmaps.length]; + + for(IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + + while (!al.isEmpty()) { + Arrays.fill(hardbitmap, ~0); + int effective = Integer.MAX_VALUE; + for(IteratingRLW32 i : al) { + int eff = IteratorAggregation32.inplaceand(hardbitmap, i); + if (eff < effective) + effective = eff; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + for(IteratingRLW32 i : al) + if (i.size() == 0) { + al.clear(); + break; + } + } + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the or aggregate. + */ + public static EWAHCompressedBitmap32 bufferedor(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + /** + * Compute the or aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + + } + container.setSizeInBits(range); + } + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * @param bitmaps the source bitmaps + * @param bufsize buffer size used during the computation in 64-bit words + * @return the xor aggregate. + */ + public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); + bufferedxorWithContainer(answer, bufsize, bitmaps); + return answer; + } + + + /** + * Compute the xor aggregate using a temporary uncompressed bitmap. + * + * @param container where the aggregate is written + * @param bufsize buffer size used during the computation in 64-bit words + * @param bitmaps the source bitmaps + */ + public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, + final EWAHCompressedBitmap32... bitmaps) { + int range = 0; + EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); + Arrays.sort(sbitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return b.sizeinbits - a.sizeinbits; + } + }); + + java.util.ArrayList al = new java.util.ArrayList(); + for (EWAHCompressedBitmap32 bitmap : sbitmaps) { + if (bitmap.sizeinbits > range) + range = bitmap.sizeinbits; + al.add(new IteratingBufferedRunningLengthWord32(bitmap)); + } + int[] hardbitmap = new int[bufsize]; + int maxr = al.size(); + while (maxr > 0) { + int effective = 0; + for (int k = 0; k < maxr; ++k) { + if (al.get(k).size() > 0) { + int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); + if (eff > effective) + effective = eff; + } else + maxr = k; + } + for (int k = 0; k < effective; ++k) + container.add(hardbitmap[k]); + Arrays.fill(hardbitmap, 0); + } + container.setSizeInBits(range); + } + + /** + * Uses a priority queue to compute the or aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void orToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32 ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap32 x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.or(x2)); + } + pq.poll().orToContainer(pq.poll(), container); + } + + + /** + * Uses a priority queue to compute the xor aggregate. + * @param container where we write the result + * @param bitmaps to be aggregated + */ + public static void xorToContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32 ... bitmaps) { + if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); + PriorityQueue pq = new PriorityQueue(bitmaps.length, + new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeInBytes() - b.sizeInBytes(); + } + }); + for (EWAHCompressedBitmap32 x : bitmaps) { + pq.add(x); + } + while (pq.size() > 2) { + EWAHCompressedBitmap32 x1 = pq.poll(); + EWAHCompressedBitmap32 x2 = pq.poll(); + pq.add(x1.xor(x2)); + } + pq.poll().xorToContainer(pq.poll(), container); + } + + /** + * For internal use. Computes the bitwise or of the provided bitmaps and + * stores the result in the container. (This used to be the default.) + * + * @deprecated use EWAHCompressedBitmap32.or instead + * @since 0.4.0 + * @param container where store the result + * @param bitmaps to be aggregated + */ + @Deprecated + public static void legacy_orWithContainer(final BitmapStorage32 container, + final EWAHCompressedBitmap32... bitmaps) { + if (bitmaps.length == 2) { + // should be more efficient + bitmaps[0].orToContainer(bitmaps[1], container); + return; + } + + // Sort the bitmaps in descending order by sizeinbits. We will exhaust the + // sorted bitmaps from right to left. + final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); + Arrays.sort(sortedBitmaps, new Comparator() { + @Override + public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { + return a.sizeinbits < b.sizeinbits ? 1 + : a.sizeinbits == b.sizeinbits ? 0 : -1; + } + }); + + final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; + int maxAvailablePos = 0; + for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { + EWAHIterator32 iterator = bitmap.getEWAHIterator(); + if (iterator.hasNext()) { + rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( + iterator); + } + } + + if (maxAvailablePos == 0) { // this never happens... + container.setSizeInBits(0); + return; + } + + int maxSize = sortedBitmaps[0].sizeinbits; + + while (true) { + int maxOneRl = 0; + int minZeroRl = Integer.MAX_VALUE; + int minSize = Integer.MAX_VALUE; + int numEmptyRl = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + int size = rlw.size(); + if (size == 0) { + maxAvailablePos = i; + break; + } + minSize = Math.min(minSize, size); + + if (rlw.getRunningBit()) { + int rl = rlw.getRunningLength(); + maxOneRl = Math.max(maxOneRl, rl); + minZeroRl = 0; + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } else { + int rl = rlw.getRunningLength(); + minZeroRl = Math.min(minZeroRl, rl); + if (rl == 0 && size > 0) { + numEmptyRl++; + } + } + } + + if (maxAvailablePos == 0) { + break; + } else if (maxAvailablePos == 1) { + // only one bitmap is left so just write the rest of it out + rlws[0].discharge(container); + break; + } + + if (maxOneRl > 0) { + container.addStreamOfEmptyWords(true, maxOneRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(maxOneRl); + } + } else if (minZeroRl > 0) { + container.addStreamOfEmptyWords(false, minZeroRl); + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(minZeroRl); + } + } else { + int index = 0; + + if (numEmptyRl == 1) { + // if one rlw has literal words to process and the rest have a run of + // 0's we can write them out here + IteratingBufferedRunningLengthWord32 emptyRl = null; + int minNonEmptyRl = Integer.MAX_VALUE; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + int rl = rlw.getRunningLength(); + if (rl == 0) { + assert emptyRl == null; + emptyRl = rlw; + } else { + minNonEmptyRl = Math.min(minNonEmptyRl, rl); + } + } + int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; + if (emptyRl != null) + emptyRl.writeLiteralWords(wordsToWrite, container); + index += wordsToWrite; + } + + while (index < minSize) { + int word = 0; + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + if (rlw.getRunningLength() <= index) { + word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); + } + } + container.add(word); + index++; + } + for (int i = 0; i < maxAvailablePos; i++) { + IteratingBufferedRunningLengthWord32 rlw = rlws[i]; + rlw.discardFirstWords(minSize); + } + } + } + container.setSizeInBits(maxSize); + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorImpl32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorImpl32.java new file mode 100644 index 000000000..041b5d8ff --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorImpl32.java @@ -0,0 +1,90 @@ +package com.fr.third.googlecode.javaewah32; + +/* + * Copyright 2012, Google Inc. + * Licensed under the Apache License, Version 2.0. + */ + +import static com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; + +import com.fr.third.googlecode.javaewah.IntIterator; + +/** + * The IntIteratorImpl32 is the 32 bit implementation of the IntIterator + * interface, which efficiently returns the stream of integers represented by an + * EWAHIterator32. + * + * @author Colby Ranger + * @since 0.5.6 + */ +final class IntIteratorImpl32 implements IntIterator { + + private final EWAHIterator32 ewahIter; + private final int[] ewahBuffer; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + IntIteratorImpl32(EWAHIterator32 ewahIter) { + this.ewahIter = ewahIter; + this.ewahBuffer = ewahIter.buffer(); + this.hasnext = this.moveToNext(); + } + + public final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (!this.ewahIter.hasNext()) { + return false; + } + setRunningLengthWord(this.ewahIter.next()); + } + return true; + } + + @Override + public final boolean hasNext() { + return this.hasnext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setRunningLengthWord(RunningLengthWord32 rlw) { + this.runningLength = wordinbits * rlw.getRunningLength() + + this.position; + if (!rlw.getRunningBit()) { + this.position = this.runningLength; + } + + this.wordPosition = this.ewahIter.literalWords(); + this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.ewahBuffer[this.wordPosition++]; + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java new file mode 100644 index 000000000..6ad8c47a2 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java @@ -0,0 +1,91 @@ +package com.fr.third.googlecode.javaewah32; + +import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; + +import com.fr.third.googlecode.javaewah.IntIterator; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Implementation of an IntIterator over an IteratingRLW. + * + * + */ +public class IntIteratorOverIteratingRLW32 implements IntIterator { + IteratingRLW32 parent; + private int position; + private int runningLength; + private int word; + private int wordPosition; + private int wordLength; + private int literalPosition; + private boolean hasnext; + + /** + * @param p iterator we wish to iterate over + */ + public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { + this.parent = p; + this.position = 0; + setupForCurrentRunningLengthWord(); + this.hasnext = moveToNext(); + } + + /** + * @return whether we could find another set bit; don't move if there is an unprocessed value + */ + private final boolean moveToNext() { + while (!runningHasNext() && !literalHasNext()) { + if (this.parent.next()) + setupForCurrentRunningLengthWord(); + else return false; + } + return true; + } + + @Override + public boolean hasNext() { + return this.hasnext; + } + + @Override + public final int next() { + final int answer; + if (runningHasNext()) { + answer = this.position++; + } else { + final int bit = Long.numberOfTrailingZeros(this.word); + this.word ^= (1l << bit); + answer = this.literalPosition + bit; + } + this.hasnext = this.moveToNext(); + return answer; + } + + private final void setupForCurrentRunningLengthWord() { + this.runningLength = wordinbits * this.parent.getRunningLength() + + this.position; + + if (!this.parent.getRunningBit()) { + this.position = this.runningLength; + } + this.wordPosition = 0; + this.wordLength = this.parent.getNumberOfLiteralWords(); + } + + private final boolean runningHasNext() { + return this.position < this.runningLength; + } + + private final boolean literalHasNext() { + while (this.word == 0 && this.wordPosition < this.wordLength) { + this.word = this.parent.getLiteralWordAt(this.wordPosition++); + this.literalPosition = this.position; + this.position += wordinbits; + } + return this.word != 0; + } +} + diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java new file mode 100644 index 000000000..f09906274 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java @@ -0,0 +1,274 @@ +package com.fr.third.googlecode.javaewah32; + + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically + * advances to the next BufferedRunningLengthWord32 as words are discarded. + * + * @since 0.5.0 + * @author Daniel Lemire and David McIntosh + */ +public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { + /** + * Instantiates a new iterating buffered running length word. + * + * @param iterator iterator + */ + public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) { + this.iterator = iterator; + this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; + this.buffer = this.iterator.buffer(); + } + + + /** + * Instantiates a new iterating buffered running length word. + * @param bitmap over which we want to iterate + * + */ + public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { + this(EWAHIterator32.getEWAHIterator(bitmap)); + } + + + /** + * Discard first words, iterating to the next running length word if needed. + * + * @param x the x + */ + @Override +public void discardFirstWords(int x) { + + while (x > 0) { + if (this.brlw.RunningLength > x) { + this.brlw.RunningLength -= x; + return; + } + x -= this.brlw.RunningLength; + this.brlw.RunningLength = 0; + int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; + + this.literalWordStartPosition += toDiscard; + this.brlw.NumberOfLiteralWords -= toDiscard; + x -= toDiscard; + if ((x > 0) || (this.brlw.size() == 0)) { + if (!this.iterator.hasNext()) { + break; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0; + } + } + } + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public int discharge(BitmapStorage32 container, int max) { + int index = 0; + while ((index < max) && (size() > 0)) { + // first run + int pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + writeLiteralWords(pd, container); + discardFirstWords(pl+pd); + index += pd; + } + return index; + } + + /** + * Write out up to max words (negated), returns how many were written + * @param container target for writes + * @param max maximal number of writes + * @return how many written + */ + public int dischargeNegated(BitmapStorage32 container, int max) { + int index = 0; + while ((index < max) && (size() > 0)) { + // first run + int pl = getRunningLength(); + if (index + pl > max) { + pl = max - index; + } + container.addStreamOfEmptyWords(!getRunningBit(), pl); + index += pl; + int pd = getNumberOfLiteralWords(); + if (pd + index > max) { + pd = max - index; + } + writeNegatedLiteralWords(pd, container); + discardFirstWords(pl+pd); + index += pd; + } + return index; + } + + /** + * Move to the next RunningLengthWord + * @return whether the move was possible + */ + @Override +public boolean next() { + if (!this.iterator.hasNext()) { + this.brlw.NumberOfLiteralWords = 0; + this.brlw.RunningLength = 0; + return false; + } + this.brlw.reset(this.iterator.next()); + this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0 + return true; + } + + /** + * Write out the remain words, transforming them to zeroes. + * @param container target for writes + */ + public void dischargeAsEmpty(BitmapStorage32 container) { + while(size()>0) { + container.addStreamOfEmptyWords(false, size()); + discardFirstWords(size()); + } + } + + /** + * Write out the remaining words + * @param container target for writes + */ + public void discharge(BitmapStorage32 container) { + // fix the offset + this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); + discharge(this.brlw, this.iterator, container); + } + + /** + * Get the nth literal word for the current running length word + * @param index zero based index + * @return the literal word + */ + @Override +public int getLiteralWordAt(int index) { + return this.buffer[this.literalWordStartPosition + index]; + } + + /** + * Gets the number of literal words for the current running length word. + * + * @return the number of literal words + */ + @Override +public int getNumberOfLiteralWords() { + return this.brlw.NumberOfLiteralWords; + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + @Override +public boolean getRunningBit() { + return this.brlw.RunningBit; + } + + /** + * Gets the running length. + * + * @return the running length + */ + @Override +public int getRunningLength() { + return this.brlw.RunningLength; + } + + /** + * Size in uncompressed words of the current running length word. + * + * @return the int + */ + @Override +public int size() { + return this.brlw.size(); + } + + /** + * write the first N literal words to the target bitmap. Does not discard the words or perform iteration. + * @param numWords number of words to be written + * @param container where we write the data + */ + public void writeLiteralWords(int numWords, BitmapStorage32 container) { + container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + + /** + * write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. + * @param numWords number of words to be written + * @param container where we write the data + */ + public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { + container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); + } + + + /** + * For internal use. (One could use the non-static discharge method instead, + * but we expect them to be slower.) + * + * @param initialWord + * the initial word + * @param iterator + * the iterator + * @param container + * the container + */ + protected static void discharge( + final BufferedRunningLengthWord32 initialWord, + final EWAHIterator32 iterator, final BitmapStorage32 container) { + BufferedRunningLengthWord32 runningLengthWord = initialWord; + for (;;) { + final int runningLength = runningLengthWord.getRunningLength(); + container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), + runningLength); + container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() + + runningLengthWord.literalwordoffset, + runningLengthWord.getNumberOfLiteralWords()); + if (!iterator.hasNext()) + break; + runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); + } + } + + + + @Override +public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { + IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); + answer.brlw = this.brlw.clone(); + answer.buffer = this.buffer; + answer.iterator = this.iterator.clone(); + answer.literalWordStartPosition = this.literalWordStartPosition; + return answer; + } + + private BufferedRunningLengthWord32 brlw; + private int[] buffer; + private int literalWordStartPosition; + private EWAHIterator32 iterator; +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingRLW32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingRLW32.java new file mode 100644 index 000000000..1ebeb2f62 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingRLW32.java @@ -0,0 +1,42 @@ +package com.fr.third.googlecode.javaewah32; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * High-level iterator over a compressed bitmap. + * + */ +public interface IteratingRLW32 { + /** + * @return whether there is more + */ + public boolean next() ; + /** + * @param index where the literal word is + * @return the literal word at the given index. + */ + public int getLiteralWordAt(int index); + /** + * @return the number of literal (non-fill) words + */ + public int getNumberOfLiteralWords() ; + /** + * @return the bit used for the fill bits + */ + public boolean getRunningBit() ; + /** + * @return sum of getRunningLength() and getNumberOfLiteralWords() + */ + public int size() ; + /** + * @return length of the run of fill words + */ + public int getRunningLength() ; + /** + * @param x the number of words to discard + */ + public void discardFirstWords(int x); +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorAggregation32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorAggregation32.java new file mode 100644 index 000000000..cfd5a0adc --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorAggregation32.java @@ -0,0 +1,601 @@ +package com.fr.third.googlecode.javaewah32; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; + +import com.fr.third.googlecode.javaewah.CloneableIterator; + + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Set of helper functions to aggregate bitmaps. + * + */ +public class IteratorAggregation32 { + /** + * @param x iterator to negate + * @return negated version of the iterator + */ + public static IteratingRLW32 not(final IteratingRLW32 x) { + return new IteratingRLW32() { + + @Override + public boolean next() { + return x.next(); + } + + @Override + public int getLiteralWordAt(int index) { + return ~x.getLiteralWordAt(index); + } + + @Override + public int getNumberOfLiteralWords() { + return x.getNumberOfLiteralWords(); + } + + @Override + public boolean getRunningBit() { + return ! x.getRunningBit(); + } + + @Override + public int size() { + return x.size(); + } + + @Override + public int getRunningLength() { + return x.getRunningLength(); + } + + @Override + public void discardFirstWords(int y) { + x.discardFirstWords(y); + } + + }; + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { + return bufferedand (DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return and aggregate + */ + public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new AndIt(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { + return bufferedor(DEFAULTMAXBUFSIZE,al); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return or aggregate + */ + public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new ORIt(basell,bufsize)); + } + + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { + return bufferedxor (DEFAULTMAXBUFSIZE,al); + } + /** + * Aggregate the iterators using a bitmap buffer. + * + * @param al iterators to aggregate + * @param bufsize size of the internal buffer used by the iterator in 64-bit words + * @return xor aggregate + */ + public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { + if (al.length == 0) + throw new IllegalArgumentException("Need at least one iterator"); + if (al.length == 1) + return al[0]; + + final LinkedList basell = new LinkedList(); + for (IteratingRLW32 i : al) + basell.add(i); + return new BufferedIterator32(new XORIt(basell,bufsize)); + } + /** + * Write out the content of the iterator, but as if it were all zeros. + * + * @param container + * where we write + * @param i + * the iterator + */ + protected static void dischargeAsEmpty(final BitmapStorage32 container, + final IteratingRLW32 i) { + while (i.size() > 0) { + container.addStreamOfEmptyWords(false, i.size()); + i.next(); + + } + } + + /** + * Write out up to max words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + int L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + /** + * Write out up to max negated words, returns how many were written + * @param container target for writes + * @param i source of data + * @param max maximal number of writes + * @return how many written + */ + protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { + int counter = 0; + while (i.size() > 0 && counter < max) { + int L1 = i.getRunningLength(); + if (L1 > 0) { + if (L1 + counter > max) + L1 = max - counter; + container.addStreamOfEmptyWords(i.getRunningBit(), L1); + counter += L1; + } + int L = i.getNumberOfLiteralWords(); + if(L + counter > max) L = max - counter; + for (int k = 0; k < L; ++k) { + container.add(i.getLiteralWordAt(k)); + } + counter += L; + i.discardFirstWords(L+L1); + } + return counter; + } + + static void andToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + static void andToContainer(final BitmapStorage32 container, + final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + container.addStreamOfEmptyWords(false, predator.getRunningLength()); + prey.discardFirstWords(predator.getRunningLength()); + predator.discardFirstWords(predator.getRunningLength()); + } else { + final int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + + /** + * Compute the first few words of the XOR aggregate between two iterators. + * + * @param container where to write + * @param desiredrlwcount number of words to be written (max) + * @param rlwi first iterator to aggregate + * @param rlwj second iterator to aggregate + */ + public static void xorToContainer(final BitmapStorage32 container, + int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { + while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { + while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { + final boolean i_is_prey = rlwi.getRunningLength() < rlwj + .getRunningLength(); + final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; + final IteratingRLW32 predator = i_is_prey ? rlwj + : rlwi; + if (predator.getRunningBit() == false) { + int index = discharge(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(false, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } else { + int index = dischargeNegated(container, prey, predator.getRunningLength()); + container.addStreamOfEmptyWords(true, predator.getRunningLength() + - index); + predator.discardFirstWords(predator.getRunningLength()); + } + } + final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), + rlwj.getNumberOfLiteralWords()); + if (nbre_literal > 0) { + desiredrlwcount -= nbre_literal; + for (int k = 0; k < nbre_literal; ++k) + container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); + rlwi.discardFirstWords(nbre_literal); + rlwj.discardFirstWords(nbre_literal); + } + } + } + + protected static int inplaceor(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) + Arrays.fill(bitmap, pos, pos + L, ~0); + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + Arrays.fill(bitmap, pos, bitmap.length, ~0); + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + Arrays.fill(bitmap, pos, pos + L, ~0); + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] |= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + + protected static int inplacexor(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = ~bitmap[k]; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = ~bitmap[k]; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] ^= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + protected static int inplaceand(int[] bitmap, + IteratingRLW32 i) { + int pos = 0; + int s; + while ((s = i.size()) > 0) { + if (pos + s < bitmap.length) { + final int L = i.getRunningLength(); + if (!i.getRunningBit()) { + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + } + pos += L; + final int LR = i.getNumberOfLiteralWords(); + for (int k = 0; k < LR; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + if (!i.next()) { + return pos; + } + } else { + int howmany = bitmap.length - pos; + int L = i.getRunningLength(); + if (pos + L > bitmap.length) { + if (!i.getRunningBit()) { + for(int k = pos ; k < bitmap.length; ++k) + bitmap[k] = 0; + } + i.discardFirstWords(howmany); + return bitmap.length; + } + if (!i.getRunningBit()) + for(int k = pos ; k < pos + L; ++k) + bitmap[k] = 0; + pos += L; + for (int k = 0; pos < bitmap.length; ++k) + bitmap[pos++] &= i.getLiteralWordAt(k); + i.discardFirstWords(howmany); + return pos; + } + } + return pos; + } + + /** + * An optimization option. Larger values may improve speed, but at + * the expense of memory. + */ + public final static int DEFAULTMAXBUFSIZE = 65536; + + +} + + +class ORIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + int[] hardbitmap; + LinkedList ll; + + ORIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.hardbitmap = new int[bufsize]; + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class XORIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + int[] hardbitmap; + LinkedList ll; + + XORIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.hardbitmap = new int[bufsize]; + + } + + @Override + public XORIt clone() throws CloneNotSupportedException { + XORIt answer = (XORIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.hardbitmap = this.hardbitmap.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + int effective = 0; + Iterator i = this.ll.iterator(); + while (i.hasNext()) { + IteratingRLW32 rlw = i.next(); + if (rlw.size() > 0) { + int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); + if (eff > effective) + effective = eff; + } else + i.remove(); + } + for (int k = 0; k < effective; ++k) + this.buffer.add(this.hardbitmap[k]); + Arrays.fill(this.hardbitmap, 0); + return this.buffer.getEWAHIterator(); + } +} + +class AndIt implements CloneableIterator { + EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); + LinkedList ll; + int buffersize; + + public AndIt(LinkedList basell, final int bufsize) { + this.ll = basell; + this.buffersize = bufsize; + } + + @Override + public boolean hasNext() { + return !this.ll.isEmpty(); + } + + @Override + public AndIt clone() throws CloneNotSupportedException { + AndIt answer = (AndIt) super.clone(); + answer.buffer = this.buffer.clone(); + answer.ll = (LinkedList) this.ll.clone(); + return answer; + } + + @Override + public EWAHIterator32 next() { + this.buffer.clear(); + IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), + this.ll.get(0), this.ll.get(1)); + if (this.ll.size() > 2) { + Iterator i = this.ll.iterator(); + i.next(); + i.next(); + EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); + while (i.hasNext() && this.buffer.sizeInBytes() > 0) { + IteratorAggregation32.andToContainer(tmpbuffer, + this.buffer.getIteratingRLW(), i.next()); + this.buffer.swap(tmpbuffer); + tmpbuffer.clear(); + } + } + Iterator i = this.ll.iterator(); + while(i.hasNext()) { + if(i.next().size() == 0) { + this.ll.clear(); + break; + } + } + return this.buffer.getEWAHIterator(); + } + +} \ No newline at end of file diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorUtil32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorUtil32.java new file mode 100644 index 000000000..d8e415101 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorUtil32.java @@ -0,0 +1,135 @@ +package com.fr.third.googlecode.javaewah32; + +import java.util.Iterator; + +import com.fr.third.googlecode.javaewah.IntIterator; + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * Convenience functions for working over iterators + * + */ +public class IteratorUtil32 { + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { + return new IntIteratorOverIteratingRLW32(i); + } + + /** + * @param i iterator we wish to iterate over + * @return an iterator over the set bits corresponding to the iterator + */ + public static Iterator toSetBitsIterator(final IteratingRLW32 i) { + return new Iterator() { + @Override + public boolean hasNext() { + return this.under.hasNext(); + } + + @Override + public Integer next() { + return new Integer(this.under.next()); + } + + @Override + public void remove() { + } + + final private IntIterator under = toSetBitsIntIterator(i); + }; + + } + + /** + * Turn an iterator into a bitmap + * @param i iterator we wish to materialize + * @param c where we write + */ + public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { + while (true) { + if (i.getRunningLength() > 0) { + c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); + } + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + c.add(i.getLiteralWordAt(k)); + if (!i.next()) + break; + } + } + + /** + * @param i iterator we wish to iterate over + * @return the cardinality (number of set bits) corresponding to the iterator + */ + public static int cardinality(final IteratingRLW32 i) { + int answer = 0; + while (true) { + if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; + for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) + answer += Long.bitCount(i.getLiteralWordAt(k)); + if(!i.next()) break; + } + return answer; + } + + /** + * + * @param x set of bitmaps we wish to iterate over + * @return an array of iterators corresponding to the array of bitmaps + */ + public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { + IteratingRLW32[] X = new IteratingRLW32[x.length]; + for (int k = 0; k < X.length; ++k) { + X[k] = new IteratingBufferedRunningLengthWord32(x[k]); + } + return X; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @param c where we write + * @param Max maximum number of words to materialize + * @return how many words were actually materialized + */ + public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { + final int origMax = Max; + while (true) { + if (i.getRunningLength() > 0) { + int L = i.getRunningLength(); + if(L > Max) L = Max; + c.addStreamOfEmptyWords(i.getRunningBit(), L); + Max -= L; + } + long L = i.getNumberOfLiteralWords(); + for (int k = 0; k < L; ++k) + c.add(i.getLiteralWordAt(k)); + if(Max>0) { + if (!i.next()) + break; + } + else break; + } + return origMax - Max; + } + /** + * Turn an iterator into a bitmap + * + * @param i iterator we wish to materialize + * @return materialized version of the iterator + */ + public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { + EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); + materialize(i, ewah); + return ewah; + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/NonEmptyVirtualStorage32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/NonEmptyVirtualStorage32.java new file mode 100644 index 000000000..94e8bd96a --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/NonEmptyVirtualStorage32.java @@ -0,0 +1,87 @@ +package com.fr.third.googlecode.javaewah32; + + + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ +/** + * This is a BitmapStorage that can be used to determine quickly + * if the result of an operation is non-trivial... that is, whether + * there will be at least on set bit. + * + * @since 0.5.0 + * @author Daniel Lemire and Veronika Zenz + * + */ +public class NonEmptyVirtualStorage32 implements BitmapStorage32 { + static class NonEmptyException extends RuntimeException { + private static final long serialVersionUID = 1L; + + /** + * Do not fill in the stack trace for this exception + * for performance reasons. + * + * @return this instance + * @see Throwable#fillInStackTrace() + */ + @Override + public synchronized Throwable fillInStackTrace() { + return this; + } + } + + private static final NonEmptyException nonEmptyException = new NonEmptyException(); + + + /** + * If the word to be added is non-zero, a NonEmptyException exception is thrown. + */ + @Override +public void add(int newdata) { + if(newdata!=0) throw nonEmptyException; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + * + */ + @Override +public void addStreamOfLiteralWords(int[] data, int start, int number) { + if (number > 0){ + throw nonEmptyException; + } + } + + /** + * If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, + * otherwise, nothing happens. + * + */ + @Override +public void addStreamOfEmptyWords(boolean v, int number) { + if(v && (number>0)) throw nonEmptyException; + } + + /** + * throws a NonEmptyException exception when number is greater than 0 + * + */ + @Override +public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { + if (number > 0){ + throw nonEmptyException; + } + } + + /** + * Does nothing. + * + * @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) + */ + @Override +public void setSizeInBits(int bits) { + } + +} diff --git a/fine-jgit/src/com/fr/third/googlecode/javaewah32/RunningLengthWord32.java b/fine-jgit/src/com/fr/third/googlecode/javaewah32/RunningLengthWord32.java new file mode 100644 index 000000000..37f52ee83 --- /dev/null +++ b/fine-jgit/src/com/fr/third/googlecode/javaewah32/RunningLengthWord32.java @@ -0,0 +1,152 @@ +package com.fr.third.googlecode.javaewah32; + +/* + * Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser + * Licensed under the Apache License, Version 2.0. + */ + +/** + * Mostly for internal use. + * + * @since 0.5.0 + * @author Daniel Lemire + */ +public final class RunningLengthWord32 implements Cloneable { + + /** + * Instantiates a new running length word. + * + * @param a + * an array of 32-bit words + * @param p + * position in the array where the running length word is + * located. + */ + RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { + this.parent = a; + this.position = p; + } + + /** + * Gets the number of literal words. + * + * @return the number of literal words + */ + public int getNumberOfLiteralWords() { + return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); + } + + /** + * Gets the running bit. + * + * @return the running bit + */ + public boolean getRunningBit() { + return (this.parent.buffer[this.position] & 1) != 0; + } + + /** + * Gets the running length. + * + * @return the running length + */ + public int getRunningLength() { + return (this.parent.buffer[this.position] >>> 1) + & largestrunninglengthcount; + } + + /** + * Sets the number of literal words. + * + * @param number + * the new number of literal words + */ + public void setNumberOfLiteralWords(final int number) { + this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; + this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) + | runninglengthplusrunningbit; + } + + /** + * Sets the running bit. + * + * @param b + * the new running bit + */ + public void setRunningBit(final boolean b) { + if (b) + this.parent.buffer[this.position] |= 1; + else + this.parent.buffer[this.position] &= ~1; + } + + /** + * Sets the running length. + * + * @param number + * the new running length + */ + public void setRunningLength(final int number) { + this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; + this.parent.buffer[this.position] &= (number << 1) + | notshiftedlargestrunninglengthcount; + } + + /** + * Return the size in uncompressed words represented by this running + * length word. + * + * @return the int + */ + public int size() { + return getRunningLength() + getNumberOfLiteralWords(); + } + + /* + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "running bit = " + getRunningBit() + + " running length = " + getRunningLength() + + " number of lit. words " + getNumberOfLiteralWords(); + } + + @Override + public RunningLengthWord32 clone() throws CloneNotSupportedException { + RunningLengthWord32 answer; + answer = (RunningLengthWord32) super.clone(); + answer.parent = this.parent; + answer.position = this.position; + return answer; + } + + /** The array of words. */ + public EWAHCompressedBitmap32 parent; + + /** The position in array. */ + public int position; + + /** + * number of bits dedicated to marking of the running length of clean + * words + */ + public static final int runninglengthbits = 16; + + private static final int literalbits = 32 - 1 - runninglengthbits; + + /** largest number of literal words in a run. */ + public static final int largestliteralcount = (1 << literalbits) - 1; + + /** largest number of clean words in a run */ + public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; + + private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; + + private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; + + private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; + + private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; + +} \ No newline at end of file