Browse Source
* commit '33ec2b51c4e783618d8e3b5cef1e9a7db24eaf45': REPORT-20981 删除自己写的多余类 无JIRA任务、jgit gc 需要googlecode、之前没有打包它research/11.0
neil
5 years ago
55 changed files with 9884 additions and 15 deletions
Binary file not shown.
@ -0,0 +1,106 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* BitCounter is a fake bitset data structure. Instead of storing the actual |
||||
* data, it only records the number of set bits. |
||||
* |
||||
* @since 0.4.0 |
||||
* @author David McIntosh |
||||
*/ |
||||
|
||||
public final class BitCounter implements BitmapStorage { |
||||
|
||||
/** |
||||
* Virtually add words directly to the bitmap |
||||
* |
||||
* @param newdata |
||||
* the word |
||||
*/ |
||||
@Override |
||||
public void add(final long newdata) { |
||||
this.oneBits += Long.bitCount(newdata); |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* virtually add several literal words. |
||||
* |
||||
* @param data |
||||
* the literal words |
||||
* @param start |
||||
* the starting point in the array |
||||
* @param number |
||||
* the number of literal words to add |
||||
*/ |
||||
@Override |
||||
public void addStreamOfLiteralWords(long[] data, int start, int number) { |
||||
for (int i = start; i < start + number; i++) { |
||||
add(data[i]); |
||||
} |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* virtually add many zeroes or ones. |
||||
* |
||||
* @param v |
||||
* zeros or ones |
||||
* @param number |
||||
* how many to words add |
||||
*/ |
||||
@Override |
||||
public void addStreamOfEmptyWords(boolean v, long number) { |
||||
if (v) { |
||||
this.oneBits += number * EWAHCompressedBitmap.wordinbits; |
||||
} |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* virtually add several negated literal words. |
||||
* |
||||
* @param data |
||||
* the literal words |
||||
* @param start |
||||
* the starting point in the array |
||||
* @param number |
||||
* the number of literal words to add |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { |
||||
for (int i = start; i < start + number; i++) { |
||||
add(~data[i]); |
||||
} |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* As you act on this class, it records the number of set (true) bits. |
||||
* |
||||
* @return number of set bits |
||||
*/ |
||||
public int getCount() { |
||||
return this.oneBits; |
||||
} |
||||
|
||||
/** |
||||
* should directly set the sizeinbits field, but is effectively ignored in |
||||
* this class. |
||||
* |
||||
* @param bits |
||||
* number of bits |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void setSizeInBits(int bits) { |
||||
// no action
|
||||
} |
||||
|
||||
private int oneBits; |
||||
|
||||
} |
@ -0,0 +1,71 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Low level bitset writing methods. |
||||
* |
||||
* @since 0.4.0 |
||||
* @author David McIntosh |
||||
*/ |
||||
public interface BitmapStorage { |
||||
|
||||
/** |
||||
* Adding words directly to the bitmap (for expert use). |
||||
* |
||||
* This is normally how you add data to the array. So you add bits in streams |
||||
* of 8*8 bits. |
||||
* |
||||
* @param newdata |
||||
* the word |
||||
*/ |
||||
public void add(final long newdata); |
||||
|
||||
/** |
||||
* if you have several literal words to copy over, this might be faster. |
||||
* |
||||
* @param data |
||||
* the literal words |
||||
* @param start |
||||
* the starting point in the array |
||||
* @param number |
||||
* the number of literal words to add |
||||
*/ |
||||
public void addStreamOfLiteralWords(final long[] data, final int start, |
||||
final int number); |
||||
|
||||
/** |
||||
* For experts: You want to add many zeroes or ones? This is the method you |
||||
* use. |
||||
* |
||||
* @param v |
||||
* zeros or ones |
||||
* @param number |
||||
* how many to words add |
||||
*/ |
||||
public void addStreamOfEmptyWords(final boolean v, final long number); |
||||
|
||||
/** |
||||
* Like "addStreamOfLiteralWords" but negates the words being added. |
||||
* |
||||
* @param data |
||||
* the literal words |
||||
* @param start |
||||
* the starting point in the array |
||||
* @param number |
||||
* the number of literal words to add |
||||
*/ |
||||
public void addStreamOfNegatedLiteralWords(long[] data, final int start, |
||||
final int number); |
||||
|
||||
/** |
||||
* directly set the sizeinbits field |
||||
* |
||||
* @param bits |
||||
* number of bits |
||||
*/ |
||||
public void setSizeInBits(final int bits); |
||||
} |
@ -0,0 +1,151 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* This class can be used to iterate over blocks of bitmap data. |
||||
* |
||||
* @author Daniel Lemire |
||||
* |
||||
*/ |
||||
public class BufferedIterator implements IteratingRLW { |
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* |
||||
* @param iterator iterator |
||||
*/ |
||||
public BufferedIterator(final CloneableIterator<EWAHIterator> iterator) { |
||||
this.masteriterator = iterator; |
||||
if(this.masteriterator.hasNext()) { |
||||
this.iterator = this.masteriterator.next(); |
||||
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||
this.buffer = this.iterator.buffer(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Discard first words, iterating to the next running length word if needed. |
||||
* |
||||
* @param x the number of words to be discarded |
||||
*/ |
||||
@Override |
||||
public void discardFirstWords(long x) { |
||||
while (x > 0) { |
||||
if (this.brlw.RunningLength > x) { |
||||
this.brlw.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.brlw.RunningLength; |
||||
this.brlw.RunningLength = 0; |
||||
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||
|
||||
this.literalWordStartPosition += toDiscard; |
||||
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||
x -= toDiscard; |
||||
if ((x > 0) || (this.brlw.size() == 0)) { |
||||
if (!this.next()) { |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
/** |
||||
* Move to the next RunningLengthWord |
||||
* @return whether the move was possible |
||||
*/ |
||||
@Override |
||||
public boolean next() { |
||||
if (!this.iterator.hasNext()) { |
||||
if(!reload()) { |
||||
this.brlw.NumberOfLiteralWords = 0; |
||||
this.brlw.RunningLength = 0; |
||||
return false; |
||||
} |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||
return true; |
||||
} |
||||
private boolean reload() { |
||||
if(!this.masteriterator.hasNext()) { |
||||
return false; |
||||
} |
||||
this.iterator = this.masteriterator.next(); |
||||
this.buffer = this.iterator.buffer(); |
||||
return true; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Get the nth literal word for the current running length word |
||||
* @param index zero based index |
||||
* @return the literal word |
||||
*/ |
||||
@Override |
||||
public long getLiteralWordAt(int index) { |
||||
return this.buffer[this.literalWordStartPosition + index]; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words for the current running length word. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return this.brlw.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return this.brlw.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
@Override |
||||
public long getRunningLength() { |
||||
return this.brlw.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words of the current running length word. |
||||
* |
||||
* @return the size |
||||
*/ |
||||
@Override |
||||
public long size() { |
||||
return this.brlw.size(); |
||||
} |
||||
|
||||
|
||||
@Override |
||||
public BufferedIterator clone() throws CloneNotSupportedException { |
||||
BufferedIterator answer = (BufferedIterator) super.clone(); |
||||
answer.brlw = this.brlw.clone(); |
||||
answer.buffer = this.buffer; |
||||
answer.iterator = this.iterator.clone(); |
||||
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||
answer.masteriterator = this.masteriterator.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
private BufferedRunningLengthWord brlw; |
||||
private long[] buffer; |
||||
private int literalWordStartPosition; |
||||
private EWAHIterator iterator; |
||||
private CloneableIterator<EWAHIterator> masteriterator; |
||||
} |
@ -0,0 +1,175 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
|
||||
|
||||
/** |
||||
* Mostly for internal use. Similar to RunningLengthWord, but can |
||||
* be modified without access to the array, and has faster access. |
||||
* |
||||
* @author Daniel Lemire |
||||
* @since 0.1.0 |
||||
* |
||||
*/ |
||||
public final class BufferedRunningLengthWord implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new buffered running length word. |
||||
* |
||||
* @param a the word |
||||
*/ |
||||
public BufferedRunningLengthWord(final long a) { |
||||
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); |
||||
this.RunningBit = (a & 1) != 0; |
||||
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); |
||||
} |
||||
|
||||
/** |
||||
* Instantiates a new buffered running length word. |
||||
* |
||||
* @param rlw the rlw |
||||
*/ |
||||
public BufferedRunningLengthWord(final RunningLengthWord rlw) { |
||||
this(rlw.parent.buffer[rlw.position]); |
||||
} |
||||
|
||||
/** |
||||
* Discard first words. |
||||
* |
||||
* @param x the x |
||||
*/ |
||||
public void discardFirstWords(long x) { |
||||
if (this.RunningLength >= x) { |
||||
this.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.RunningLength; |
||||
this.RunningLength = 0; |
||||
this.literalwordoffset += x; |
||||
this.NumberOfLiteralWords -= x; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
public int getNumberOfLiteralWords() { |
||||
return this.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
public boolean getRunningBit() { |
||||
return this.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
public long getRunningLength() { |
||||
return this.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Reset the values using the provided word. |
||||
* |
||||
* @param a the word |
||||
*/ |
||||
public void reset(final long a) { |
||||
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); |
||||
this.RunningBit = (a & 1) != 0; |
||||
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); |
||||
this.literalwordoffset = 0; |
||||
} |
||||
|
||||
/** |
||||
* Reset the values of this running length word so that it has the same values |
||||
* as the other running length word. |
||||
* |
||||
* @param rlw the other running length word |
||||
*/ |
||||
public void reset(final RunningLengthWord rlw) { |
||||
reset(rlw.parent.buffer[rlw.position]); |
||||
} |
||||
|
||||
/** |
||||
* Sets the number of literal words. |
||||
* |
||||
* @param number the new number of literal words |
||||
*/ |
||||
public void setNumberOfLiteralWords(final int number) { |
||||
this.NumberOfLiteralWords = number; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running bit. |
||||
* |
||||
* @param b the new running bit |
||||
*/ |
||||
public void setRunningBit(final boolean b) { |
||||
this.RunningBit = b; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running length. |
||||
* |
||||
* @param number the new running length |
||||
*/ |
||||
public void setRunningLength(final long number) { |
||||
this.RunningLength = number; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words. |
||||
* |
||||
* @return the long |
||||
*/ |
||||
public long size() { |
||||
return this.RunningLength + this.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/* |
||||
* @see java.lang.Object#toString() |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return "running bit = " + getRunningBit() + " running length = " |
||||
+ getRunningLength() + " number of lit. words " |
||||
+ getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public BufferedRunningLengthWord clone() throws CloneNotSupportedException { |
||||
BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); |
||||
answer.literalwordoffset = this.literalwordoffset; |
||||
answer.NumberOfLiteralWords = this.NumberOfLiteralWords; |
||||
answer.RunningBit = this.RunningBit; |
||||
answer.RunningLength = this.RunningLength; |
||||
return answer; |
||||
} |
||||
|
||||
|
||||
/** how many literal words have we read so far? */ |
||||
public int literalwordoffset = 0; |
||||
|
||||
/** The Number of literal words. */ |
||||
public int NumberOfLiteralWords; |
||||
|
||||
/** The Running bit. */ |
||||
public boolean RunningBit; |
||||
|
||||
/** The Running length. */ |
||||
public long RunningLength; |
||||
|
||||
|
||||
} |
@ -0,0 +1,24 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/** |
||||
* Like a standard Java iterator, except that you can clone it. |
||||
* |
||||
* @param <E> the data type of the iterator |
||||
*/ |
||||
public interface CloneableIterator<E> extends Cloneable { |
||||
|
||||
/** |
||||
* @return whether there is more |
||||
*/ |
||||
public boolean hasNext(); |
||||
/** |
||||
* @return the next element |
||||
*/ |
||||
public E next(); |
||||
/** |
||||
* @return a copy |
||||
* @throws CloneNotSupportedException this should never happen in practice |
||||
*/ |
||||
public CloneableIterator<E> clone() throws CloneNotSupportedException; |
||||
|
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,98 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* The class EWAHIterator represents a special type of |
||||
* efficient iterator iterating over (uncompressed) words of bits. |
||||
* It is not meant for end users. |
||||
* @author Daniel Lemire |
||||
* @since 0.1.0 |
||||
* |
||||
*/ |
||||
public final class EWAHIterator implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new EWAH iterator. |
||||
* |
||||
* @param a the array of words |
||||
* @param sizeinwords the number of words that are significant in the array of words |
||||
*/ |
||||
public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { |
||||
this.rlw = new RunningLengthWord(a, 0); |
||||
this.size = sizeinwords; |
||||
this.pointer = 0; |
||||
} |
||||
|
||||
/** |
||||
* Allow expert developers to instantiate an EWAHIterator. |
||||
* |
||||
* @param bitmap we want to iterate over |
||||
* @return an iterator |
||||
*/ |
||||
public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { |
||||
return bitmap.getEWAHIterator(); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Access to the array of words |
||||
* |
||||
* @return the long[] |
||||
*/ |
||||
public long[] buffer() { |
||||
return this.rlw.parent.buffer; |
||||
} |
||||
|
||||
/** |
||||
* Position of the literal words represented by this running length word. |
||||
* |
||||
* @return the int |
||||
*/ |
||||
public int literalWords() { |
||||
return this.pointer - this.rlw.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
/** |
||||
* Checks for next. |
||||
* |
||||
* @return true, if successful |
||||
*/ |
||||
public boolean hasNext() { |
||||
return this.pointer < this.size; |
||||
} |
||||
|
||||
/** |
||||
* Next running length word. |
||||
* |
||||
* @return the running length word |
||||
*/ |
||||
public RunningLengthWord next() { |
||||
this.rlw.position = this.pointer; |
||||
this.pointer += this.rlw.getNumberOfLiteralWords() + 1; |
||||
return this.rlw; |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator clone() throws CloneNotSupportedException { |
||||
EWAHIterator ans = (EWAHIterator) super.clone(); |
||||
ans.rlw = this.rlw.clone(); |
||||
ans.size = this.size; |
||||
ans.pointer = this.pointer; |
||||
return ans; |
||||
} |
||||
/** The pointer represent the location of the current running length |
||||
* word in the array of words (embedded in the rlw attribute). */ |
||||
int pointer; |
||||
|
||||
/** The current running length word. */ |
||||
RunningLengthWord rlw; |
||||
|
||||
/** The size in words. */ |
||||
int size; |
||||
|
||||
} |
@ -0,0 +1,436 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.Comparator; |
||||
import java.util.PriorityQueue; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as |
||||
* reference. They may not be faster than the corresponding methods in the |
||||
* EWAHCompressedBitmap class. |
||||
* |
||||
* @author Daniel Lemire |
||||
* |
||||
*/ |
||||
public class FastAggregation { |
||||
/** |
||||
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||
* @return the or aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap bufferedand(final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||
bufferedandWithContainer(answer,bufsize, bitmaps); |
||||
return answer; |
||||
} |
||||
/** |
||||
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
|
||||
java.util.LinkedList<IteratingBufferedRunningLengthWord> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord>(); |
||||
for (EWAHCompressedBitmap bitmap : bitmaps) { |
||||
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||
} |
||||
|
||||
long[] hardbitmap = new long[bufsize*bitmaps.length]; |
||||
|
||||
for(IteratingRLW i : al) |
||||
if (i.size() == 0) { |
||||
al.clear(); |
||||
break; |
||||
} |
||||
|
||||
while (!al.isEmpty()) { |
||||
Arrays.fill(hardbitmap, ~0l); |
||||
long effective = Integer.MAX_VALUE; |
||||
for(IteratingRLW i : al) { |
||||
int eff = IteratorAggregation.inplaceand(hardbitmap, i); |
||||
if (eff < effective) |
||||
effective = eff; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
for(IteratingRLW i : al) |
||||
if (i.size() == 0) { |
||||
al.clear(); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @return the or aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap bufferedor(final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||
bufferedorWithContainer(answer, bufsize, bitmaps); |
||||
return answer; |
||||
} |
||||
|
||||
/** |
||||
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
int range = 0; |
||||
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); |
||||
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||
return b.sizeinbits - a.sizeinbits; |
||||
} |
||||
}); |
||||
|
||||
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>(); |
||||
for (EWAHCompressedBitmap bitmap : sbitmaps) { |
||||
if (bitmap.sizeinbits > range) |
||||
range = bitmap.sizeinbits; |
||||
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||
} |
||||
long[] hardbitmap = new long[bufsize]; |
||||
int maxr = al.size(); |
||||
while (maxr > 0) { |
||||
long effective = 0; |
||||
for (int k = 0; k < maxr; ++k) { |
||||
if (al.get(k).size() > 0) { |
||||
int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
maxr = k; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
Arrays.fill(hardbitmap, 0); |
||||
|
||||
} |
||||
container.setSizeInBits(range); |
||||
} |
||||
|
||||
/** |
||||
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @return the xor aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap bufferedxor(final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||
bufferedxorWithContainer(answer, bufsize,bitmaps); |
||||
return answer; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
int range = 0; |
||||
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); |
||||
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||
return b.sizeinbits - a.sizeinbits; |
||||
} |
||||
}); |
||||
|
||||
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>(); |
||||
for (EWAHCompressedBitmap bitmap : sbitmaps) { |
||||
if (bitmap.sizeinbits > range) |
||||
range = bitmap.sizeinbits; |
||||
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||
} |
||||
long[] hardbitmap = new long[bufsize]; |
||||
int maxr = al.size(); |
||||
while (maxr > 0) { |
||||
long effective = 0; |
||||
for (int k = 0; k < maxr; ++k) { |
||||
if (al.get(k).size() > 0) { |
||||
int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
maxr = k; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
Arrays.fill(hardbitmap, 0); |
||||
} |
||||
container.setSizeInBits(range); |
||||
} |
||||
|
||||
/** |
||||
* Uses a priority queue to compute the or aggregate. |
||||
* @param <T> a class extending LogicalElement (like a compressed bitmap) |
||||
* @param bitmaps |
||||
* bitmaps to be aggregated |
||||
* @return the or aggregate |
||||
*/ |
||||
@SuppressWarnings({ "rawtypes", "unchecked" }) |
||||
public static <T extends LogicalElement> T or(T... bitmaps) { |
||||
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length, |
||||
new Comparator<T>() { |
||||
@Override |
||||
public int compare(T a, T b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (T x : bitmaps) { |
||||
pq.add(x); |
||||
} |
||||
while (pq.size() > 1) { |
||||
T x1 = pq.poll(); |
||||
T x2 = pq.poll(); |
||||
pq.add((T) x1.or(x2)); |
||||
} |
||||
return pq.poll(); |
||||
} |
||||
/** |
||||
* Uses a priority queue to compute the or aggregate. |
||||
* @param container where we write the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
public static void orToContainer(final BitmapStorage container, |
||||
final EWAHCompressedBitmap ... bitmaps) { |
||||
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length, |
||||
new Comparator<EWAHCompressedBitmap>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (EWAHCompressedBitmap x : bitmaps) { |
||||
pq.add(x); |
||||
} |
||||
while (pq.size() > 2) { |
||||
EWAHCompressedBitmap x1 = pq.poll(); |
||||
EWAHCompressedBitmap x2 = pq.poll(); |
||||
pq.add(x1.or(x2)); |
||||
} |
||||
pq.poll().orToContainer(pq.poll(), container); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Uses a priority queue to compute the xor aggregate. |
||||
* |
||||
* @param <T> a class extending LogicalElement (like a compressed bitmap) |
||||
* @param bitmaps |
||||
* bitmaps to be aggregated |
||||
* @return the xor aggregate |
||||
*/ |
||||
@SuppressWarnings({ "rawtypes", "unchecked" }) |
||||
public static <T extends LogicalElement> T xor(T... bitmaps) { |
||||
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length, |
||||
new Comparator<T>() { |
||||
|
||||
@Override |
||||
public int compare(T a, T b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (T x : bitmaps) |
||||
pq.add(x); |
||||
while (pq.size() > 1) { |
||||
T x1 = pq.poll(); |
||||
T x2 = pq.poll(); |
||||
pq.add((T) x1.xor(x2)); |
||||
} |
||||
return pq.poll(); |
||||
} |
||||
|
||||
/** |
||||
* Uses a priority queue to compute the xor aggregate. |
||||
* @param container where we write the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
public static void xorToContainer(final BitmapStorage container, |
||||
final EWAHCompressedBitmap ... bitmaps) { |
||||
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length, |
||||
new Comparator<EWAHCompressedBitmap>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (EWAHCompressedBitmap x : bitmaps) { |
||||
pq.add(x); |
||||
} |
||||
while (pq.size() > 2) { |
||||
EWAHCompressedBitmap x1 = pq.poll(); |
||||
EWAHCompressedBitmap x2 = pq.poll(); |
||||
pq.add(x1.xor(x2)); |
||||
} |
||||
pq.poll().xorToContainer(pq.poll(), container); |
||||
} |
||||
|
||||
/** |
||||
* For internal use. Computes the bitwise or of the provided bitmaps and |
||||
* stores the result in the container. (This used to be the default.) |
||||
* |
||||
* @deprecated use EWAHCompressedBitmap.or instead |
||||
* @since 0.4.0 |
||||
* @param container where store the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
@Deprecated |
||||
public static void legacy_orWithContainer(final BitmapStorage container, |
||||
final EWAHCompressedBitmap... bitmaps) { |
||||
if (bitmaps.length == 2) { |
||||
// should be more efficient
|
||||
bitmaps[0].orToContainer(bitmaps[1], container); |
||||
return; |
||||
} |
||||
|
||||
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
|
||||
// sorted bitmaps from right to left.
|
||||
final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); |
||||
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||
return a.sizeinbits < b.sizeinbits ? 1 |
||||
: a.sizeinbits == b.sizeinbits ? 0 : -1; |
||||
} |
||||
}); |
||||
|
||||
final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; |
||||
int maxAvailablePos = 0; |
||||
for (EWAHCompressedBitmap bitmap : sortedBitmaps) { |
||||
EWAHIterator iterator = bitmap.getEWAHIterator(); |
||||
if (iterator.hasNext()) { |
||||
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( |
||||
iterator); |
||||
} |
||||
} |
||||
|
||||
if (maxAvailablePos == 0) { // this never happens...
|
||||
container.setSizeInBits(0); |
||||
return; |
||||
} |
||||
|
||||
int maxSize = sortedBitmaps[0].sizeinbits; |
||||
|
||||
while (true) { |
||||
long maxOneRl = 0; |
||||
long minZeroRl = Long.MAX_VALUE; |
||||
long minSize = Long.MAX_VALUE; |
||||
int numEmptyRl = 0; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
long size = rlw.size(); |
||||
if (size == 0) { |
||||
maxAvailablePos = i; |
||||
break; |
||||
} |
||||
minSize = Math.min(minSize, size); |
||||
|
||||
if (rlw.getRunningBit()) { |
||||
long rl = rlw.getRunningLength(); |
||||
maxOneRl = Math.max(maxOneRl, rl); |
||||
minZeroRl = 0; |
||||
if (rl == 0 && size > 0) { |
||||
numEmptyRl++; |
||||
} |
||||
} else { |
||||
long rl = rlw.getRunningLength(); |
||||
minZeroRl = Math.min(minZeroRl, rl); |
||||
if (rl == 0 && size > 0) { |
||||
numEmptyRl++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (maxAvailablePos == 0) { |
||||
break; |
||||
} else if (maxAvailablePos == 1) { |
||||
// only one bitmap is left so just write the rest of it out
|
||||
rlws[0].discharge(container); |
||||
break; |
||||
} |
||||
|
||||
if (maxOneRl > 0) { |
||||
container.addStreamOfEmptyWords(true, maxOneRl); |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
rlw.discardFirstWords(maxOneRl); |
||||
} |
||||
} else if (minZeroRl > 0) { |
||||
container.addStreamOfEmptyWords(false, minZeroRl); |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
rlw.discardFirstWords(minZeroRl); |
||||
} |
||||
} else { |
||||
int index = 0; |
||||
|
||||
if (numEmptyRl == 1) { |
||||
// if one rlw has literal words to process and the rest have a run of
|
||||
// 0's we can write them out here
|
||||
IteratingBufferedRunningLengthWord emptyRl = null; |
||||
long minNonEmptyRl = Long.MAX_VALUE; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
long rl = rlw.getRunningLength(); |
||||
if (rl == 0) { |
||||
assert emptyRl == null; |
||||
emptyRl = rlw; |
||||
} else { |
||||
minNonEmptyRl = Math.min(minNonEmptyRl, rl); |
||||
} |
||||
} |
||||
long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; |
||||
if (emptyRl != null) |
||||
emptyRl.writeLiteralWords((int) wordsToWrite, container); |
||||
index += wordsToWrite; |
||||
} |
||||
|
||||
while (index < minSize) { |
||||
long word = 0; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
if (rlw.getRunningLength() <= index) { |
||||
word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); |
||||
} |
||||
} |
||||
container.add(word); |
||||
index++; |
||||
} |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||
rlw.discardFirstWords(minSize); |
||||
} |
||||
} |
||||
} |
||||
container.setSizeInBits(maxSize); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,31 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* |
||||
* The IntIterator interface is used to iterate over a stream of integers. |
||||
* |
||||
* @author Daniel Lemire |
||||
* @since 0.2.0 |
||||
* |
||||
*/ |
||||
public interface IntIterator { |
||||
|
||||
/** |
||||
* Is there more? |
||||
* |
||||
* @return true, if there is more, false otherwise |
||||
*/ |
||||
public boolean hasNext(); |
||||
|
||||
/** |
||||
* Return the next integer |
||||
* |
||||
* @return the integer |
||||
*/ |
||||
public int next(); |
||||
} |
@ -0,0 +1,87 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2012, Google Inc. |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||
|
||||
/** |
||||
* The IntIteratorImpl is the 64 bit implementation of the |
||||
* IntIterator interface, which efficiently returns the stream of integers |
||||
* represented by an EWAHIterator. |
||||
* |
||||
* @author Colby Ranger |
||||
* @since 0.5.6 |
||||
*/ |
||||
final class IntIteratorImpl implements IntIterator { |
||||
|
||||
private final EWAHIterator ewahIter; |
||||
private final long[] ewahBuffer; |
||||
private int position; |
||||
private int runningLength; |
||||
private long word; |
||||
private int wordPosition; |
||||
private int wordLength; |
||||
private int literalPosition; |
||||
private boolean hasnext; |
||||
|
||||
IntIteratorImpl(EWAHIterator ewahIter) { |
||||
this.ewahIter = ewahIter; |
||||
this.ewahBuffer = ewahIter.buffer(); |
||||
this.hasnext = this.moveToNext(); |
||||
} |
||||
|
||||
public final boolean moveToNext() { |
||||
while (!runningHasNext() && !literalHasNext()) { |
||||
if (!this.ewahIter.hasNext()) { |
||||
return false; |
||||
} |
||||
setRunningLengthWord(this.ewahIter.next()); |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return this.hasnext; |
||||
} |
||||
|
||||
@Override |
||||
public final int next() { |
||||
final int answer; |
||||
if (runningHasNext()) { |
||||
answer = this.position++; |
||||
} else { |
||||
final int bit = Long.numberOfTrailingZeros(this.word); |
||||
this.word ^= (1l << bit); |
||||
answer = this.literalPosition + bit; |
||||
} |
||||
this.hasnext = this.moveToNext(); |
||||
return answer; |
||||
} |
||||
|
||||
private final void setRunningLengthWord(RunningLengthWord rlw) { |
||||
this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position; |
||||
if (!rlw.getRunningBit()) { |
||||
this.position = this.runningLength; |
||||
} |
||||
|
||||
this.wordPosition = this.ewahIter.literalWords(); |
||||
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
private final boolean runningHasNext() { |
||||
return this.position < this.runningLength; |
||||
} |
||||
|
||||
private final boolean literalHasNext() { |
||||
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||
this.word = this.ewahBuffer[this.wordPosition++]; |
||||
this.literalPosition = this.position; |
||||
this.position += wordinbits; |
||||
} |
||||
return this.word != 0; |
||||
} |
||||
} |
@ -0,0 +1,89 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Implementation of an IntIterator over an IteratingRLW. |
||||
* |
||||
* |
||||
*/ |
||||
public class IntIteratorOverIteratingRLW implements IntIterator { |
||||
IteratingRLW parent; |
||||
private int position; |
||||
private int runningLength; |
||||
private long word; |
||||
private int wordPosition; |
||||
private int wordLength; |
||||
private int literalPosition; |
||||
private boolean hasnext; |
||||
|
||||
/** |
||||
* @param p iterator we wish to iterate over |
||||
*/ |
||||
public IntIteratorOverIteratingRLW(final IteratingRLW p) { |
||||
this.parent = p; |
||||
this.position = 0; |
||||
setupForCurrentRunningLengthWord(); |
||||
this.hasnext = moveToNext(); |
||||
} |
||||
|
||||
/** |
||||
* @return whether we could find another set bit; don't move if there is an unprocessed value |
||||
*/ |
||||
private final boolean moveToNext() { |
||||
while (!runningHasNext() && !literalHasNext()) { |
||||
if (this.parent.next()) |
||||
setupForCurrentRunningLengthWord(); |
||||
else return false; |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return this.hasnext; |
||||
} |
||||
|
||||
@Override |
||||
public final int next() { |
||||
final int answer; |
||||
if (runningHasNext()) { |
||||
answer = this.position++; |
||||
} else { |
||||
final int bit = Long.numberOfTrailingZeros(this.word); |
||||
this.word ^= (1l << bit); |
||||
answer = this.literalPosition + bit; |
||||
} |
||||
this.hasnext = this.moveToNext(); |
||||
return answer; |
||||
} |
||||
|
||||
private final void setupForCurrentRunningLengthWord() { |
||||
this.runningLength = wordinbits * (int) this.parent.getRunningLength() |
||||
+ this.position; |
||||
|
||||
if (!this.parent.getRunningBit()) { |
||||
this.position = this.runningLength; |
||||
} |
||||
this.wordPosition = 0; |
||||
this.wordLength = this.parent.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
private final boolean runningHasNext() { |
||||
return this.position < this.runningLength; |
||||
} |
||||
|
||||
private final boolean literalHasNext() { |
||||
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||
this.word = this.parent.getLiteralWordAt(this.wordPosition++); |
||||
this.literalPosition = this.position; |
||||
this.position += wordinbits; |
||||
} |
||||
return this.word != 0; |
||||
} |
||||
} |
@ -0,0 +1,276 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically |
||||
* advances to the next BufferedRunningLengthWord as words are discarded. |
||||
* |
||||
* @since 0.4.0 |
||||
* @author David McIntosh |
||||
*/ |
||||
public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ |
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* |
||||
* @param iterator iterator |
||||
*/ |
||||
public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { |
||||
this.iterator = iterator; |
||||
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||
this.buffer = this.iterator.buffer(); |
||||
} |
||||
|
||||
|
||||
|
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* @param bitmap over which we want to iterate |
||||
* |
||||
*/ |
||||
public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { |
||||
this.iterator = EWAHIterator.getEWAHIterator(bitmap); |
||||
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||
this.buffer = this.iterator.buffer(); |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
/** |
||||
* Discard first words, iterating to the next running length word if needed. |
||||
* |
||||
* @param x the number of words to be discarded |
||||
*/ |
||||
@Override |
||||
public void discardFirstWords(long x) { |
||||
while (x > 0) { |
||||
if (this.brlw.RunningLength > x) { |
||||
this.brlw.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.brlw.RunningLength; |
||||
this.brlw.RunningLength = 0; |
||||
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||
|
||||
this.literalWordStartPosition += toDiscard; |
||||
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||
x -= toDiscard; |
||||
if ((x > 0) || (this.brlw.size() == 0)) { |
||||
if (!this.iterator.hasNext()) { |
||||
break; |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||
} |
||||
} |
||||
} |
||||
/** |
||||
* Move to the next RunningLengthWord |
||||
* @return whether the move was possible |
||||
*/ |
||||
@Override |
||||
public boolean next() { |
||||
if (!this.iterator.hasNext()) { |
||||
this.brlw.NumberOfLiteralWords = 0; |
||||
this.brlw.RunningLength = 0; |
||||
return false; |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||
return true; |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max words, returns how many were written |
||||
* @param container target for writes |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
public long discharge(BitmapStorage container, long max) { |
||||
long index = 0; |
||||
while ((index < max) && (size() > 0)) { |
||||
// first run
|
||||
long pl = getRunningLength(); |
||||
if (index + pl > max) { |
||||
pl = max - index; |
||||
} |
||||
container.addStreamOfEmptyWords(getRunningBit(), pl); |
||||
index += pl; |
||||
int pd = getNumberOfLiteralWords(); |
||||
if (pd + index > max) { |
||||
pd = (int) (max - index); |
||||
} |
||||
writeLiteralWords(pd, container); |
||||
discardFirstWords(pl+pd); |
||||
index += pd; |
||||
} |
||||
return index; |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max words (negated), returns how many were written |
||||
* @param container target for writes |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
public long dischargeNegated(BitmapStorage container, long max) { |
||||
long index = 0; |
||||
while ((index < max) && (size() > 0)) { |
||||
// first run
|
||||
long pl = getRunningLength(); |
||||
if (index + pl > max) { |
||||
pl = max - index; |
||||
} |
||||
container.addStreamOfEmptyWords(!getRunningBit(), pl); |
||||
index += pl; |
||||
int pd = getNumberOfLiteralWords(); |
||||
if (pd + index > max) { |
||||
pd = (int) (max - index); |
||||
} |
||||
writeNegatedLiteralWords(pd, container); |
||||
discardFirstWords(pl+pd); |
||||
index += pd; |
||||
} |
||||
return index; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Write out the remain words, transforming them to zeroes. |
||||
* @param container target for writes |
||||
*/ |
||||
public void dischargeAsEmpty(BitmapStorage container) { |
||||
while(size()>0) { |
||||
container.addStreamOfEmptyWords(false, size()); |
||||
discardFirstWords(size()); |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
/** |
||||
* Write out the remaining words |
||||
* @param container target for writes |
||||
*/ |
||||
public void discharge(BitmapStorage container) { |
||||
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); |
||||
discharge(this.brlw, this.iterator, container); |
||||
} |
||||
|
||||
/** |
||||
* Get the nth literal word for the current running length word |
||||
* @param index zero based index |
||||
* @return the literal word |
||||
*/ |
||||
@Override |
||||
public long getLiteralWordAt(int index) { |
||||
return this.buffer[this.literalWordStartPosition + index]; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words for the current running length word. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return this.brlw.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return this.brlw.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
@Override |
||||
public long getRunningLength() { |
||||
return this.brlw.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words of the current running length word. |
||||
* |
||||
* @return the long |
||||
*/ |
||||
@Override |
||||
public long size() { |
||||
return this.brlw.size(); |
||||
} |
||||
|
||||
/** |
||||
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration. |
||||
* @param numWords number of words to be written |
||||
* @param container where we write |
||||
*/ |
||||
public void writeLiteralWords(int numWords, BitmapStorage container) { |
||||
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||
} |
||||
|
||||
/** |
||||
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. |
||||
* @param numWords number of words to be written |
||||
* @param container where we write |
||||
*/ |
||||
public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { |
||||
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||
} |
||||
|
||||
/** |
||||
* For internal use. (One could use the non-static discharge method instead, |
||||
* but we expect them to be slower.) |
||||
* |
||||
* @param initialWord |
||||
* the initial word |
||||
* @param iterator |
||||
* the iterator |
||||
* @param container |
||||
* the container |
||||
*/ |
||||
private static void discharge(final BufferedRunningLengthWord initialWord, |
||||
final EWAHIterator iterator, final BitmapStorage container) { |
||||
BufferedRunningLengthWord runningLengthWord = initialWord; |
||||
for (;;) { |
||||
final long runningLength = runningLengthWord.getRunningLength(); |
||||
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), |
||||
runningLength); |
||||
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() |
||||
+ runningLengthWord.literalwordoffset, |
||||
runningLengthWord.getNumberOfLiteralWords()); |
||||
if (!iterator.hasNext()) |
||||
break; |
||||
runningLengthWord = new BufferedRunningLengthWord(iterator.next()); |
||||
} |
||||
} |
||||
|
||||
|
||||
@Override |
||||
public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { |
||||
IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); |
||||
answer.brlw = this.brlw.clone(); |
||||
answer.buffer = this.buffer; |
||||
answer.iterator = this.iterator.clone(); |
||||
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||
return answer; |
||||
} |
||||
|
||||
|
||||
private BufferedRunningLengthWord brlw; |
||||
private long[] buffer; |
||||
private int literalWordStartPosition; |
||||
private EWAHIterator iterator; |
||||
} |
@ -0,0 +1,49 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* High-level iterator over a compressed bitmap. |
||||
* |
||||
*/ |
||||
public interface IteratingRLW { |
||||
/** |
||||
* @return whether there is more |
||||
*/ |
||||
public boolean next() ; |
||||
/** |
||||
* @param index where the literal word is |
||||
* @return the literal word at the given index. |
||||
*/ |
||||
public long getLiteralWordAt(int index); |
||||
/** |
||||
* @return the number of literal (non-fill) words |
||||
*/ |
||||
public int getNumberOfLiteralWords() ; |
||||
/** |
||||
* @return the bit used for the fill bits |
||||
*/ |
||||
public boolean getRunningBit() ; |
||||
/** |
||||
* @return sum of getRunningLength() and getNumberOfLiteralWords() |
||||
*/ |
||||
public long size() ; |
||||
/** |
||||
* @return length of the run of fill words |
||||
*/ |
||||
public long getRunningLength() ; |
||||
/** |
||||
* @param x the number of words to discard |
||||
*/ |
||||
public void discardFirstWords(long x); |
||||
|
||||
/** |
||||
* @return a copy of the iterator |
||||
* @throws CloneNotSupportedException this should not be thrown in theory |
||||
*/ |
||||
public IteratingRLW clone() throws CloneNotSupportedException; |
||||
} |
@ -0,0 +1,616 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.Iterator; |
||||
import java.util.LinkedList; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Set of helper functions to aggregate bitmaps. |
||||
* |
||||
*/ |
||||
public class IteratorAggregation { |
||||
|
||||
/** |
||||
* @param x iterator to negate |
||||
* @return negated version of the iterator |
||||
*/ |
||||
public static IteratingRLW not(final IteratingRLW x) { |
||||
return new IteratingRLW() { |
||||
|
||||
@Override |
||||
public boolean next() { |
||||
return x.next(); |
||||
} |
||||
|
||||
@Override |
||||
public long getLiteralWordAt(int index) { |
||||
return ~x.getLiteralWordAt(index); |
||||
} |
||||
|
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return x.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return ! x.getRunningBit(); |
||||
} |
||||
|
||||
@Override |
||||
public long size() { |
||||
return x.size(); |
||||
} |
||||
|
||||
@Override |
||||
public long getRunningLength() { |
||||
return x.getRunningLength(); |
||||
} |
||||
|
||||
@Override |
||||
public void discardFirstWords(long y) { |
||||
x.discardFirstWords(y); |
||||
} |
||||
|
||||
@Override |
||||
public IteratingRLW clone() throws CloneNotSupportedException { |
||||
throw new CloneNotSupportedException(); |
||||
} |
||||
|
||||
|
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al set of iterators to aggregate |
||||
* @return and aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedand(final IteratingRLW... al) { |
||||
return bufferedand(DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al set of iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) |
||||
* @return and aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||
for (IteratingRLW i : al) |
||||
basell.add(i); |
||||
return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al set of iterators to aggregate |
||||
* @return or aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedor(final IteratingRLW... al) { |
||||
return bufferedor(DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||
* @return or aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
|
||||
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||
for (IteratingRLW i : al) |
||||
basell.add(i); |
||||
return new BufferedIterator(new BufferedORIterator(basell,bufsize)); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al set of iterators to aggregate |
||||
* @return xor aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedxor(final IteratingRLW... al) { |
||||
return bufferedxor(DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||
* @return xor aggregate |
||||
*/ |
||||
public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
|
||||
|
||||
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||
for (IteratingRLW i : al) |
||||
basell.add(i); |
||||
|
||||
return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Write out the content of the iterator, but as if it were all zeros. |
||||
* |
||||
* @param container |
||||
* where we write |
||||
* @param i |
||||
* the iterator |
||||
*/ |
||||
protected static void dischargeAsEmpty(final BitmapStorage container, |
||||
final IteratingRLW i) { |
||||
while (i.size() > 0) { |
||||
container.addStreamOfEmptyWords(false, i.size()); |
||||
i.next(); |
||||
|
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max words, returns how many were written |
||||
* @param container target for writes |
||||
* @param i source of data |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
|
||||
protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { |
||||
long counter = 0; |
||||
while (i.size() > 0 && counter < max) { |
||||
long L1 = i.getRunningLength(); |
||||
if (L1 > 0) { |
||||
if (L1 + counter > max) |
||||
L1 = max - counter; |
||||
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||
counter += L1; |
||||
} |
||||
long L = i.getNumberOfLiteralWords(); |
||||
if(L + counter > max) L = max - counter; |
||||
for (int k = 0; k < L; ++k) { |
||||
container.add(i.getLiteralWordAt(k)); |
||||
} |
||||
counter += L; |
||||
i.discardFirstWords(L+L1); |
||||
} |
||||
return counter; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Write out up to max negated words, returns how many were written |
||||
* @param container target for writes |
||||
* @param i source of data |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { |
||||
long counter = 0; |
||||
while (i.size() > 0 && counter < max) { |
||||
long L1 = i.getRunningLength(); |
||||
if (L1 > 0) { |
||||
if (L1 + counter > max) |
||||
L1 = max - counter; |
||||
container.addStreamOfEmptyWords(!i.getRunningBit(), L1); |
||||
counter += L1; |
||||
} |
||||
long L = i.getNumberOfLiteralWords(); |
||||
if(L + counter > max) L = max - counter; |
||||
for (int k = 0; k < L; ++k) { |
||||
container.add(~i.getLiteralWordAt(k)); |
||||
} |
||||
counter += L; |
||||
i.discardFirstWords(L+L1); |
||||
} |
||||
return counter; |
||||
} |
||||
|
||||
static void andToContainer(final BitmapStorage container, |
||||
int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||
prey.discardFirstWords(predator.getRunningLength()); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
final long index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
desiredrlwcount -= nbre_literal; |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void andToContainer(final BitmapStorage container, |
||||
final IteratingRLW rlwi, IteratingRLW rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||
prey.discardFirstWords(predator.getRunningLength()); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
final long index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Compute the first few words of the XOR aggregate between two iterators. |
||||
* |
||||
* @param container where to write |
||||
* @param desiredrlwcount number of words to be written (max) |
||||
* @param rlwi first iterator to aggregate |
||||
* @param rlwj second iterator to aggregate |
||||
*/ |
||||
public static void xorToContainer(final BitmapStorage container, |
||||
int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
long index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
long index = dischargeNegated(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(true, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
desiredrlwcount -= nbre_literal; |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
protected static int inplaceor(long[] bitmap, |
||||
IteratingRLW i) { |
||||
|
||||
int pos = 0; |
||||
long s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = (int) i.getRunningLength(); |
||||
if (i.getRunningBit()) |
||||
Arrays.fill(bitmap, pos, pos + L, ~0l); |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
|
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = (int) i.getRunningLength(); |
||||
|
||||
if (pos + L > bitmap.length) { |
||||
if (i.getRunningBit()) { |
||||
Arrays.fill(bitmap, pos, bitmap.length, ~0l); |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (i.getRunningBit()) |
||||
Arrays.fill(bitmap, pos, pos + L, ~0l); |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
|
||||
protected static int inplacexor(long[] bitmap, |
||||
IteratingRLW i) { |
||||
int pos = 0; |
||||
long s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = (int) i.getRunningLength(); |
||||
if (i.getRunningBit()) { |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
} |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = (int) i.getRunningLength(); |
||||
if (pos + L > bitmap.length) { |
||||
if (i.getRunningBit()) { |
||||
for(int k = pos ; k < bitmap.length; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (i.getRunningBit()) |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
protected static int inplaceand(long[] bitmap, |
||||
IteratingRLW i) { |
||||
int pos = 0; |
||||
long s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = (int) i.getRunningLength(); |
||||
if (!i.getRunningBit()) { |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = 0; |
||||
} |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = (int) i.getRunningLength(); |
||||
if (pos + L > bitmap.length) { |
||||
if (!i.getRunningBit()) { |
||||
for(int k = pos ; k < bitmap.length; ++k) |
||||
bitmap[k] = 0; |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (!i.getRunningBit()) |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = 0; |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
|
||||
/** |
||||
* An optimization option. Larger values may improve speed, but at |
||||
* the expense of memory. |
||||
*/ |
||||
public final static int DEFAULTMAXBUFSIZE = 65536; |
||||
} |
||||
class BufferedORIterator implements CloneableIterator<EWAHIterator> { |
||||
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||
long[] hardbitmap; |
||||
LinkedList<IteratingRLW> ll; |
||||
int buffersize; |
||||
|
||||
BufferedORIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||
this.ll = basell; |
||||
this.hardbitmap = new long[bufsize]; |
||||
} |
||||
|
||||
@Override |
||||
public BufferedXORIterator clone() throws CloneNotSupportedException { |
||||
BufferedXORIterator answer = (BufferedXORIterator) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.hardbitmap = this.hardbitmap.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator next() { |
||||
this.buffer.clear(); |
||||
long effective = 0; |
||||
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||
while (i.hasNext()) { |
||||
IteratingRLW rlw = i.next(); |
||||
if (rlw.size() > 0) { |
||||
int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
i.remove(); |
||||
} |
||||
for (int k = 0; k < effective; ++k) { |
||||
this.buffer.add(this.hardbitmap[k]); |
||||
} |
||||
|
||||
Arrays.fill(this.hardbitmap, 0); |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
} |
||||
|
||||
class BufferedXORIterator implements CloneableIterator<EWAHIterator> { |
||||
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||
long[] hardbitmap; |
||||
LinkedList<IteratingRLW> ll; |
||||
int buffersize; |
||||
|
||||
BufferedXORIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||
this.ll = basell; |
||||
this.hardbitmap = new long[bufsize]; |
||||
} |
||||
|
||||
@Override |
||||
public BufferedXORIterator clone() throws CloneNotSupportedException { |
||||
BufferedXORIterator answer = (BufferedXORIterator) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.hardbitmap = this.hardbitmap.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator next() { |
||||
this.buffer.clear(); |
||||
long effective = 0; |
||||
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||
while (i.hasNext()) { |
||||
IteratingRLW rlw = i.next(); |
||||
if (rlw.size() > 0) { |
||||
int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
i.remove(); |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
this.buffer.add(this.hardbitmap[k]); |
||||
Arrays.fill(this.hardbitmap, 0); |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
} |
||||
|
||||
|
||||
class BufferedAndIterator implements CloneableIterator<EWAHIterator> { |
||||
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||
LinkedList<IteratingRLW> ll; |
||||
int buffersize; |
||||
|
||||
public BufferedAndIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||
this.ll = basell; |
||||
this.buffersize = bufsize; |
||||
|
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public BufferedAndIterator clone() throws CloneNotSupportedException { |
||||
BufferedAndIterator answer = (BufferedAndIterator) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator next() { |
||||
this.buffer.clear(); |
||||
IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), |
||||
this.ll.get(0), this.ll.get(1)); |
||||
if (this.ll.size() > 2) { |
||||
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||
i.next(); |
||||
i.next(); |
||||
EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); |
||||
while (i.hasNext() && this.buffer.sizeInBytes() > 0) { |
||||
IteratorAggregation.andToContainer(tmpbuffer, |
||||
this.buffer.getIteratingRLW(), i.next()); |
||||
this.buffer.swap(tmpbuffer); |
||||
tmpbuffer.clear(); |
||||
} |
||||
} |
||||
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||
while(i.hasNext()) { |
||||
if(i.next().size() == 0) { |
||||
this.ll.clear(); |
||||
break; |
||||
} |
||||
} |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,132 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
import java.util.Iterator; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Convenience functions for working over iterators |
||||
* |
||||
*/ |
||||
public class IteratorUtil { |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return an iterator over the set bits corresponding to the iterator |
||||
*/ |
||||
public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { |
||||
return new IntIteratorOverIteratingRLW(i); |
||||
} |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return an iterator over the set bits corresponding to the iterator |
||||
*/ |
||||
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW i) { |
||||
return new Iterator<Integer>() { |
||||
@Override |
||||
public boolean hasNext() { |
||||
return this.under.hasNext(); |
||||
} |
||||
|
||||
@Override |
||||
public Integer next() { |
||||
return new Integer(this.under.next()); |
||||
} |
||||
|
||||
@Override |
||||
public void remove() { |
||||
} |
||||
|
||||
final private IntIterator under = toSetBitsIntIterator(i); |
||||
}; |
||||
|
||||
} |
||||
|
||||
/** |
||||
* Generate a bitmap from an iterator |
||||
* |
||||
* @param i iterator we wish to materialize |
||||
* @param c where we write |
||||
*/ |
||||
public static void materialize(final IteratingRLW i, final BitmapStorage c) { |
||||
while (true) { |
||||
if (i.getRunningLength() > 0) { |
||||
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); |
||||
} |
||||
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||
c.add(i.getLiteralWordAt(k)); |
||||
if (!i.next()) |
||||
break; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return the cardinality (number of set bits) corresponding to the iterator |
||||
*/ |
||||
public static int cardinality(final IteratingRLW i) { |
||||
int answer = 0; |
||||
while (true) { |
||||
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; |
||||
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||
answer += Long.bitCount(i.getLiteralWordAt(k)); |
||||
if(!i.next()) break; |
||||
} |
||||
return answer; |
||||
} |
||||
|
||||
/** |
||||
* @param x set of bitmaps |
||||
* @return an array of iterators corresponding to the array of bitmaps |
||||
*/ |
||||
public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { |
||||
IteratingRLW[] X = new IteratingRLW[x.length]; |
||||
for (int k = 0; k < X.length; ++k) { |
||||
X[k] = new IteratingBufferedRunningLengthWord(x[k]); |
||||
} |
||||
return X; |
||||
} |
||||
/** |
||||
* Turn an iterator into a bitmap. |
||||
* |
||||
* @param i iterator we wish to materialize |
||||
* @param c where we write |
||||
* @param Max maximum number of words we wish to materialize |
||||
* @return how many words were actually materialized |
||||
*/ |
||||
public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { |
||||
final long origMax = Max; |
||||
while (true) { |
||||
if (i.getRunningLength() > 0) { |
||||
long L = i.getRunningLength(); |
||||
if(L > Max) L = Max; |
||||
c.addStreamOfEmptyWords(i.getRunningBit(), L); |
||||
Max -= L; |
||||
} |
||||
long L = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < L; ++k) |
||||
c.add(i.getLiteralWordAt(k)); |
||||
if(Max>0) { |
||||
if (!i.next()) |
||||
break; |
||||
} |
||||
else break; |
||||
} |
||||
return origMax - Max; |
||||
} |
||||
/** |
||||
* Turn an iterator into a bitmap |
||||
* |
||||
* @param i iterator we wish to materialize |
||||
* @return materialized version of the iterator |
||||
*/ |
||||
public static EWAHCompressedBitmap materialize(final IteratingRLW i) { |
||||
EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); |
||||
materialize(i, ewah); |
||||
return ewah; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,61 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/** |
||||
* A prototypical model for bitmaps. Used by the |
||||
* class FastAggregation. Users should probably not |
||||
* be concerned by this class. |
||||
* |
||||
* @author Daniel Lemire |
||||
* @param <T> the type of element (e.g., a bitmap class) |
||||
* |
||||
*/ |
||||
public interface LogicalElement<T> { |
||||
/** |
||||
* Compute the bitwise logical and |
||||
* @param le element |
||||
* @return the result of the operation |
||||
*/ |
||||
public T and(T le); |
||||
|
||||
/** |
||||
* Compute the bitwise logical and not |
||||
* @param le element |
||||
* @return the result of the operation |
||||
*/ |
||||
public T andNot(T le); |
||||
|
||||
/** |
||||
* Compute the bitwise logical not (in place) |
||||
*/ |
||||
public void not(); |
||||
|
||||
|
||||
@SuppressWarnings({ "rawtypes", "javadoc" }) |
||||
/** |
||||
* Compute the bitwise logical or |
||||
* @param le another element |
||||
* @return the result of the operation |
||||
*/ |
||||
public LogicalElement or(T le); |
||||
|
||||
/** |
||||
* How many logical bits does this element represent? |
||||
* |
||||
* @return the number of bits represented by this element |
||||
*/ |
||||
public int sizeInBits(); |
||||
|
||||
/** |
||||
* Should report the storage requirement |
||||
* @return How many bytes |
||||
* @since 0.6.2 |
||||
*/ |
||||
public int sizeInBytes(); |
||||
|
||||
/** |
||||
* Compute the bitwise logical Xor |
||||
* @param le element |
||||
* @return the results of the operation |
||||
*/ |
||||
public T xor(T le); |
||||
} |
@ -0,0 +1,92 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* This is a BitmapStorage that can be used to determine quickly if the result |
||||
* of an operation is non-trivial... that is, whether there will be at least on |
||||
* set bit. |
||||
* |
||||
* @since 0.4.2 |
||||
* @author Daniel Lemire and Veronika Zenz |
||||
* |
||||
*/ |
||||
public class NonEmptyVirtualStorage implements BitmapStorage { |
||||
static class NonEmptyException extends RuntimeException { |
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* Do not fill in the stack trace for this exception |
||||
* for performance reasons. |
||||
* |
||||
* @return this instance |
||||
* @see Throwable#fillInStackTrace() |
||||
*/ |
||||
@Override |
||||
public synchronized Throwable fillInStackTrace() { |
||||
return this; |
||||
} |
||||
} |
||||
|
||||
private static final NonEmptyException nonEmptyException = new NonEmptyException(); |
||||
|
||||
/** |
||||
* If the word to be added is non-zero, a NonEmptyException exception is |
||||
* thrown. |
||||
* |
||||
* @see com.googlecode.javaewah.BitmapStorage#add(long) |
||||
*/ |
||||
@Override |
||||
public void add(long newdata) { |
||||
if (newdata != 0) |
||||
throw nonEmptyException; |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* throws a NonEmptyException exception when number is greater than 0 |
||||
* |
||||
*/ |
||||
@Override |
||||
public void addStreamOfLiteralWords(long[] data, int start, int number) { |
||||
if(number>0){ |
||||
throw nonEmptyException; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, |
||||
* otherwise, nothing happens. |
||||
* |
||||
* @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) |
||||
*/ |
||||
@Override |
||||
public void addStreamOfEmptyWords(boolean v, long number) { |
||||
if (v && (number>0)) |
||||
throw nonEmptyException; |
||||
return; |
||||
} |
||||
|
||||
/** |
||||
* throws a NonEmptyException exception when number is greater than 0 |
||||
* |
||||
*/ |
||||
@Override |
||||
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { |
||||
if(number>0){ |
||||
throw nonEmptyException; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Does nothing. |
||||
* |
||||
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) |
||||
*/ |
||||
@Override |
||||
public void setSizeInBits(int bits) { |
||||
} |
||||
|
||||
} |
@ -0,0 +1,152 @@
|
||||
package com.fr.third.googlecode.javaewah; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Mostly for internal use. |
||||
* |
||||
* @since 0.1.0 |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public final class RunningLengthWord implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new running length word. |
||||
* |
||||
* @param a |
||||
* an array of 64-bit words |
||||
* @param p |
||||
* position in the array where the running length word is |
||||
* located. |
||||
*/ |
||||
RunningLengthWord(final EWAHCompressedBitmap a, final int p) { |
||||
this.parent = a; |
||||
this.position = p; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
public int getNumberOfLiteralWords() { |
||||
return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
public boolean getRunningBit() { |
||||
return (this.parent.buffer[this.position] & 1) != 0; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
public long getRunningLength() { |
||||
return (this.parent.buffer[this.position] >>> 1) |
||||
& largestrunninglengthcount; |
||||
} |
||||
|
||||
/** |
||||
* Sets the number of literal words. |
||||
* |
||||
* @param number |
||||
* the new number of literal words |
||||
*/ |
||||
public void setNumberOfLiteralWords(final long number) { |
||||
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; |
||||
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) |
||||
| runninglengthplusrunningbit; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running bit. |
||||
* |
||||
* @param b |
||||
* the new running bit |
||||
*/ |
||||
public void setRunningBit(final boolean b) { |
||||
if (b) |
||||
this.parent.buffer[this.position] |= 1l; |
||||
else |
||||
this.parent.buffer[this.position] &= ~1l; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running length. |
||||
* |
||||
* @param number |
||||
* the new running length |
||||
*/ |
||||
public void setRunningLength(final long number) { |
||||
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; |
||||
this.parent.buffer[this.position] &= (number << 1) |
||||
| notshiftedlargestrunninglengthcount; |
||||
} |
||||
|
||||
/** |
||||
* Return the size in uncompressed words represented by this running |
||||
* length word. |
||||
* |
||||
* @return the size |
||||
*/ |
||||
public long size() { |
||||
return getRunningLength() + getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
/* |
||||
* @see java.lang.Object#toString() |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return "running bit = " + getRunningBit() |
||||
+ " running length = " + getRunningLength() |
||||
+ " number of lit. words " + getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public RunningLengthWord clone() throws CloneNotSupportedException { |
||||
RunningLengthWord answer; |
||||
answer = (RunningLengthWord) super.clone(); |
||||
answer.parent = this.parent; |
||||
answer.position = this.position; |
||||
return answer; |
||||
} |
||||
|
||||
/** The array of words. */ |
||||
public EWAHCompressedBitmap parent; |
||||
|
||||
/** The position in array. */ |
||||
public int position; |
||||
|
||||
/** |
||||
* number of bits dedicated to marking of the running length of clean |
||||
* words |
||||
*/ |
||||
public static final int runninglengthbits = 32; |
||||
|
||||
private static final int literalbits = 64 - 1 - runninglengthbits; |
||||
|
||||
/** largest number of literal words in a run. */ |
||||
public static final int largestliteralcount = (1 << literalbits) - 1; |
||||
|
||||
/** largest number of clean words in a run */ |
||||
public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; |
||||
|
||||
private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; |
||||
|
||||
private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; |
||||
|
||||
private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; |
||||
|
||||
private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; |
||||
|
||||
} |
@ -0,0 +1,284 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
import java.text.DecimalFormat; |
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; |
||||
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||
import com.fr.third.googlecode.javaewah.IntIterator; |
||||
import com.fr.third.googlecode.javaewah.IteratingRLW; |
||||
import com.fr.third.googlecode.javaewah.IteratorAggregation; |
||||
import com.fr.third.googlecode.javaewah.IteratorUtil; |
||||
|
||||
/** |
||||
* This class is used to benchmark the performance EWAH. |
||||
* |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public class Benchmark { |
||||
|
||||
/** |
||||
* Compute the union between two sorted arrays |
||||
* @param set1 first sorted array |
||||
* @param set2 second sorted array |
||||
* @return merged array |
||||
*/ |
||||
static public int[] unite2by2(final int[] set1, final int[] set2) { |
||||
int pos = 0; |
||||
int k1 = 0, k2 = 0; |
||||
if (0 == set1.length) |
||||
return Arrays.copyOf(set2, set2.length); |
||||
if (0 == set2.length) |
||||
return Arrays.copyOf(set1, set1.length); |
||||
int[] buffer = new int[set1.length + set2.length]; |
||||
while (true) { |
||||
if (set1[k1] < set2[k2]) { |
||||
buffer[pos++] = set1[k1]; |
||||
++k1; |
||||
if (k1 >= set1.length) { |
||||
for (; k2 < set2.length; ++k2) |
||||
buffer[pos++] = set2[k2]; |
||||
break; |
||||
} |
||||
} else if (set1[k1] == set2[k2]) { |
||||
buffer[pos++] = set1[k1]; |
||||
++k1; |
||||
++k2; |
||||
if (k1 >= set1.length) { |
||||
for (; k2 < set2.length; ++k2) |
||||
buffer[pos++] = set2[k2]; |
||||
break; |
||||
} |
||||
if (k2 >= set2.length) { |
||||
for (; k1 < set1.length; ++k1) |
||||
buffer[pos++] = set1[k1]; |
||||
break; |
||||
} |
||||
} else {// if (set1[k1]>set2[k2]) {
|
||||
buffer[pos++] = set2[k2]; |
||||
++k2; |
||||
if (k2 >= set2.length) { |
||||
for (; k1 < set1.length; ++k1) |
||||
buffer[pos++] = set1[k1]; |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
return Arrays.copyOf(buffer, pos); |
||||
} |
||||
|
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
//test(2, 24, 1);
|
||||
test(100, 16, 1); |
||||
} |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void test(int N, int nbr, int repeat) { |
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { |
||||
long bogus = 0; |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
System.out.println("# generating random data..."); |
||||
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||
System.out.println("# generating random data... ok."); |
||||
// building
|
||||
bef = System.currentTimeMillis(); |
||||
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||
int size = 0; |
||||
for (int r = 0; r < repeat; ++r) { |
||||
size = 0; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
size += ewah[k].sizeInBytes(); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + size; |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
int[] array = ewah[k].toArray(); |
||||
bogus += array.length; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
int[] array = new int[ewah[k].cardinality()]; |
||||
int c = 0; |
||||
for (int x : ewah[k]) |
||||
array[c++] = x; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
List<Integer> L = ewah[k].getPositions(); |
||||
int[] array = new int[L.size()]; |
||||
int c = 0; |
||||
for (int x : L) |
||||
array[c++] = x; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IntIterator iter = ewah[k].intIterator(); |
||||
while (iter.hasNext()) { |
||||
bogus += iter.next(); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
line += "\t\t\t"; |
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.or(ewah[j]); |
||||
} |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
// run sanity check
|
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||
EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); |
||||
EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); |
||||
if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); |
||||
} |
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||
bogus += IteratorUtil.materialize(ewahor).sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
line += "\t\t\t"; |
||||
// logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap ewahand = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahand = ewahand.and(ewah[j]); |
||||
} |
||||
bogus += ewahand.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap |
||||
.and(ewahcp); |
||||
bogus += ewahand.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); |
||||
EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); |
||||
EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); |
||||
if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); |
||||
} |
||||
// fast logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); |
||||
bogus += IteratorUtil.materialize(ewahand).sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
System.out |
||||
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); |
||||
System.out.println(line); |
||||
System.out.println("# bogus =" + bogus); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,212 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
import java.text.DecimalFormat; |
||||
import java.util.List; |
||||
import com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32; |
||||
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||
import com.fr.third.googlecode.javaewah.IntIterator; |
||||
import com.fr.third.googlecode.javaewah32.IteratingRLW32; |
||||
import com.fr.third.googlecode.javaewah32.IteratorAggregation32; |
||||
import com.fr.third.googlecode.javaewah32.IteratorUtil32; |
||||
|
||||
/** |
||||
* This class is used to benchmark the performance EWAH. |
||||
* |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public class Benchmark32 { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(100, 16, 1); |
||||
// test(2, 24, 1);
|
||||
} |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void test(int N, int nbr, int repeat) { |
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { |
||||
long bogus = 0; |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
System.out.println("# generating random data..."); |
||||
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||
System.out.println("# generating random data... ok."); |
||||
// building
|
||||
bef = System.currentTimeMillis(); |
||||
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||
int size = 0; |
||||
for (int r = 0; r < repeat; ++r) { |
||||
size = 0; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap32(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
size += ewah[k].sizeInBytes(); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + size; |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
int[] array = ewah[k].toArray(); |
||||
bogus += array.length; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
int[] array = new int[ewah[k].cardinality()]; |
||||
int c = 0; |
||||
for (int x : ewah[k]) |
||||
array[c++] = x; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
List<Integer> L = ewah[k].getPositions(); |
||||
int[] array = new int[L.size()]; |
||||
int c = 0; |
||||
for (int x : L) |
||||
array[c++] = x; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// uncompressing
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IntIterator iter = ewah[k].intIterator(); |
||||
while (iter.hasNext()) { |
||||
bogus += iter.next(); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
line += "\t\t\t"; |
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.or(ewah[j]); |
||||
} |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); |
||||
bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
line += "\t\t\t"; |
||||
// logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32 ewahand = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahand = ewahand.and(ewah[j]); |
||||
} |
||||
bogus += ewahand.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 |
||||
.and(ewahcp); |
||||
bogus += ewahand.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical and
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||
} |
||||
IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); |
||||
bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
System.out |
||||
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); |
||||
System.out.println(line); |
||||
System.out.println("# bogus =" + bogus); |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,130 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
import com.fr.third.googlecode.javaewah.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical and (intersection) aggregate. |
||||
*/ |
||||
public class BenchmarkIntersection { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(10, 18, 1); |
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc"}) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||
// building
|
||||
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if (true) { |
||||
EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); |
||||
for (int k = 2; k < ewah.length; ++k) |
||||
answer = answer.and(ewah[k]); |
||||
|
||||
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); |
||||
if (!answer.equals(ewahand)) |
||||
throw new RuntimeException( |
||||
"bug EWAHCompressedBitmap.and"); |
||||
EWAHCompressedBitmap ewahand2 = FastAggregation |
||||
.bufferedand(65536,ewah); |
||||
if (!ewahand.equals(ewahand2)) |
||||
throw new RuntimeException( |
||||
"bug FastAggregation.bufferedand "); |
||||
|
||||
} |
||||
|
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.and(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||
.and(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation |
||||
.bufferedand(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||
ewah[j]); |
||||
} |
||||
IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); |
||||
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
System.out |
||||
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,130 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
import com.fr.third.googlecode.javaewah32.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical and (intersection) aggregate. |
||||
*/ |
||||
public class BenchmarkIntersection32 { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(10, 18, 1); |
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc" }) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||
// building
|
||||
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap32(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if (true) { |
||||
EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); |
||||
for (int k = 2; k < ewah.length; ++k) |
||||
answer = answer.and(ewah[k]); |
||||
|
||||
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); |
||||
if (!answer.equals(ewahand)) |
||||
throw new RuntimeException( |
||||
"bug EWAHCompressedBitmap.and"); |
||||
EWAHCompressedBitmap32 ewahand2 = FastAggregation32 |
||||
.bufferedand(65536,ewah); |
||||
if (!ewahand.equals(ewahand2)) |
||||
throw new RuntimeException( |
||||
"bug FastAggregation.bufferedand "); |
||||
|
||||
} |
||||
|
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.and(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||
.and(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||
.bufferedand(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord32( |
||||
ewah[j]); |
||||
} |
||||
IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); |
||||
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
System.out |
||||
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,164 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
import com.fr.third.googlecode.javaewah.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical or (union) aggregate. |
||||
*/ |
||||
public class BenchmarkUnion { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(10, 18, 1); |
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc", "deprecation" }) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||
// building
|
||||
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if (true) { |
||||
EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); |
||||
for (int k = 2; k < ewah.length; ++k) |
||||
answer = answer.or(ewah[k]); |
||||
|
||||
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); |
||||
if (!answer.equals(ewahor)) |
||||
throw new RuntimeException( |
||||
"bug EWAHCompressedBitmap.or"); |
||||
EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); |
||||
if (!ewahor.equals(ewahor3)) |
||||
throw new RuntimeException("bug FastAggregation.or"); |
||||
EWAHCompressedBitmap ewahor2 = FastAggregation |
||||
.bufferedor(65536,ewah); |
||||
if (!ewahor.equals(ewahor2)) |
||||
throw new RuntimeException( |
||||
"bug FastAggregation.bufferedor "); |
||||
|
||||
} |
||||
|
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.or(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation |
||||
.bufferedor(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap x = new EWAHCompressedBitmap(); |
||||
FastAggregation.legacy_orWithContainer(x, ewahcp); |
||||
bogus += x.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||
ewah[j]); |
||||
} |
||||
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
System.out |
||||
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,165 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
|
||||
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||
import com.fr.third.googlecode.javaewah32.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical or (union) aggregate. |
||||
*/ |
||||
public class BenchmarkUnion32 { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(10, 18, 1); |
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc", "deprecation" }) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||
// building
|
||||
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap32(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if(true){ |
||||
EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); |
||||
for(int k = 2; k < ewah.length; ++k) |
||||
answer = answer.or(ewah[k]); |
||||
|
||||
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||
.or(ewah); |
||||
if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); |
||||
EWAHCompressedBitmap32 ewahor3 = FastAggregation |
||||
.or(ewah); |
||||
if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); |
||||
EWAHCompressedBitmap32 ewahor2 = FastAggregation32 |
||||
.bufferedor(65536,ewah); |
||||
if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); |
||||
|
||||
} |
||||
|
||||
// logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.or(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation |
||||
.or(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||
.bufferedor(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); |
||||
FastAggregation32.legacy_orWithContainer(x, ewahcp); |
||||
bogus += x.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical or
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); |
||||
} |
||||
IteratingRLW32 ewahor = IteratorAggregation32 |
||||
.bufferedor(ewahcp); |
||||
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
System.out |
||||
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,134 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
import com.fr.third.googlecode.javaewah.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical xor aggregate. |
||||
*/ |
||||
public class BenchmarkXOR { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
//test(10, 18, 1);
|
||||
test(2, 22, 1); |
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc" }) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||
// building
|
||||
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if (true) { |
||||
EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); |
||||
for (int k = 2; k < ewah.length; ++k) |
||||
answer = answer.xor(ewah[k]); |
||||
EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); |
||||
if (!answer.equals(ewahor3)) |
||||
throw new RuntimeException("bug FastAggregation.xor"); |
||||
EWAHCompressedBitmap ewahor2 = FastAggregation |
||||
.bufferedxor(65536,ewah); |
||||
if (!answer.equals(ewahor2)) |
||||
throw new RuntimeException( |
||||
"bug FastAggregation.bufferedxor "); |
||||
EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); |
||||
if (!answer.equals(iwah)) |
||||
throw new RuntimeException( |
||||
"bug xor it "); |
||||
|
||||
|
||||
} |
||||
|
||||
// logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.xor(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation |
||||
.xor(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap ewahor = FastAggregation |
||||
.bufferedxor(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||
ewah[j]); |
||||
} |
||||
IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); |
||||
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
System.out |
||||
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,137 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
import java.text.DecimalFormat; |
||||
|
||||
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||
import com.fr.third.googlecode.javaewah32.*; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* To benchmark the logical xor aggregate. |
||||
*/ |
||||
public class BenchmarkXOR32 { |
||||
|
||||
@SuppressWarnings("javadoc") |
||||
public static void main(String args[]) { |
||||
test(10, 18, 1); |
||||
//test(2, 22, 1);
|
||||
} |
||||
|
||||
@SuppressWarnings({ "javadoc" }) |
||||
public static void test(int N, int nbr, int repeat) { |
||||
long bogus = 0; |
||||
|
||||
DecimalFormat df = new DecimalFormat("0.###"); |
||||
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||
for (int times = 0; times < 2; ++times) { |
||||
String line = ""; |
||||
long bef, aft; |
||||
line += sparsity; |
||||
int[][] data = new int[N][]; |
||||
int Max = (1 << (nbr + sparsity)); |
||||
for (int k = 0; k < N; ++k) |
||||
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||
// building
|
||||
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||
for (int k = 0; k < N; ++k) { |
||||
ewah[k] = new EWAHCompressedBitmap32(); |
||||
for (int x = 0; x < data[k].length; ++x) { |
||||
ewah[k].set(data[k][x]); |
||||
} |
||||
data[k] = null; |
||||
} |
||||
// sanity check
|
||||
if (true) { |
||||
EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); |
||||
for (int k = 2; k < ewah.length; ++k) |
||||
answer = answer.xor(ewah[k]); |
||||
EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); |
||||
if (!answer.equals(ewahor3)) |
||||
throw new RuntimeException("bug FastAggregation.xor"); |
||||
EWAHCompressedBitmap32 ewahor2 = FastAggregation32 |
||||
.bufferedxor(65536,ewah); |
||||
if (!answer.equals(ewahor2)) |
||||
throw new RuntimeException( |
||||
"bug FastAggregation.bufferedxor "); |
||||
EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); |
||||
if (!answer.equals(iwah)) |
||||
throw new RuntimeException( |
||||
"bug xor it "); |
||||
|
||||
} |
||||
|
||||
// logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||
for (int j = 1; j < k + 1; ++j) { |
||||
ewahor = ewahor.xor(ewah[j]); |
||||
} |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation |
||||
.xor(ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = ewah[j]; |
||||
} |
||||
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||
.bufferedxor(65536,ewahcp); |
||||
bogus += ewahor.sizeInBits(); |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
// fast logical xor
|
||||
bef = System.currentTimeMillis(); |
||||
for (int r = 0; r < repeat; ++r) |
||||
for (int k = 0; k < N; ++k) { |
||||
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||
for (int j = 0; j < k + 1; ++j) { |
||||
ewahcp[j] = new IteratingBufferedRunningLengthWord32( |
||||
ewah[j]); |
||||
} |
||||
IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); |
||||
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||
bogus += wordcounter; |
||||
} |
||||
aft = System.currentTimeMillis(); |
||||
|
||||
line += "\t" + df.format((aft - bef) / 1000.0); |
||||
|
||||
|
||||
System.out |
||||
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); |
||||
|
||||
System.out.println(line); |
||||
} |
||||
System.out.println("# bogus =" + bogus); |
||||
|
||||
} |
||||
} |
||||
} |
@ -0,0 +1,78 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
|
||||
/** |
||||
* This class will generate lists of random integers with a "clustered" distribution. |
||||
* Reference: |
||||
* Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. |
||||
* |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public class ClusteredDataGenerator { |
||||
|
||||
/** |
||||
* |
||||
*/ |
||||
public ClusteredDataGenerator() { |
||||
this.unidg = new UniformDataGenerator(); |
||||
} |
||||
|
||||
/** |
||||
* @param seed random seed |
||||
*/ |
||||
public ClusteredDataGenerator(final int seed) { |
||||
this.unidg = new UniformDataGenerator(seed); |
||||
} |
||||
|
||||
/** |
||||
* generates randomly N distinct integers from 0 to Max. |
||||
* @param N number of integers |
||||
* @param Max maximum integer value |
||||
* @return a randomly generated array |
||||
*/ |
||||
public int[] generateClustered(int N, int Max) { |
||||
int[] array = new int[N]; |
||||
fillClustered(array, 0, N, 0, Max); |
||||
return array; |
||||
} |
||||
|
||||
void fillClustered(int[] array, int offset, int length, int Min, int Max) { |
||||
final int range = Max - Min; |
||||
if ((range == length) || (length <= 10)) { |
||||
fillUniform(array, offset, length, Min, Max); |
||||
return; |
||||
} |
||||
final int cut = length / 2 |
||||
+ ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0); |
||||
final double p = this.unidg.rand.nextDouble(); |
||||
if (p < 0.25) { |
||||
fillUniform(array, offset, length / 2, Min, Min + cut); |
||||
fillClustered(array, offset + length / 2, length - length / 2, Min + cut, |
||||
Max); |
||||
} else if (p < 0.5) { |
||||
fillClustered(array, offset, length / 2, Min, Min + cut); |
||||
fillUniform(array, offset + length / 2, length - length / 2, Min + cut, |
||||
Max); |
||||
} else { |
||||
fillClustered(array, offset, length / 2, Min, Min + cut); |
||||
fillClustered(array, offset + length / 2, length - length / 2, Min + cut, |
||||
Max); |
||||
} |
||||
} |
||||
|
||||
void fillUniform(int[] array, int offset, int length, int Min, int Max) { |
||||
int[] v = this.unidg.generateUniform(length, Max - Min); |
||||
for (int k = 0; k < v.length; ++k) |
||||
array[k + offset] = Min + v[k]; |
||||
} |
||||
|
||||
UniformDataGenerator unidg; |
||||
|
||||
} |
||||
|
@ -0,0 +1,114 @@
|
||||
package com.fr.third.googlecode.javaewah.benchmark; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.BitSet; |
||||
import java.util.HashSet; |
||||
import java.util.Iterator; |
||||
import java.util.Random; |
||||
|
||||
/** |
||||
* This class will generate "uniform" lists of random integers. |
||||
* |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public class UniformDataGenerator { |
||||
/** |
||||
* construct generator of random arrays. |
||||
*/ |
||||
public UniformDataGenerator() { |
||||
this.rand = new Random(); |
||||
} |
||||
|
||||
/** |
||||
* @param seed random seed |
||||
*/ |
||||
public UniformDataGenerator(final int seed) { |
||||
this.rand = new Random(seed); |
||||
} |
||||
|
||||
/** |
||||
* generates randomly N distinct integers from 0 to Max. |
||||
*/ |
||||
int[] generateUniformHash(int N, int Max) { |
||||
if (N > Max) |
||||
throw new RuntimeException("not possible"); |
||||
int[] ans = new int[N]; |
||||
HashSet<Integer> s = new HashSet<Integer>(); |
||||
while (s.size() < N) |
||||
s.add(new Integer(this.rand.nextInt(Max))); |
||||
Iterator<Integer> i = s.iterator(); |
||||
for (int k = 0; k < N; ++k) |
||||
ans[k] = i.next().intValue(); |
||||
Arrays.sort(ans); |
||||
return ans; |
||||
} |
||||
|
||||
/** |
||||
* output all integers from the range [0,Max) that are not |
||||
* in the array |
||||
*/ |
||||
static int[] negate(int[] x, int Max) { |
||||
int[] ans = new int[Max - x.length]; |
||||
int i = 0; |
||||
int c = 0; |
||||
for (int j = 0; j < x.length; ++j) { |
||||
int v = x[j]; |
||||
for (; i < v; ++i) |
||||
ans[c++] = i; |
||||
++i; |
||||
} |
||||
while (c < ans.length) |
||||
ans[c++] = i++; |
||||
return ans; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* generates randomly N distinct integers from 0 to Max. |
||||
* @param N Number of integers to generate |
||||
* @param Max Maximum value of the integers |
||||
* @return array containing random integers |
||||
*/ |
||||
public int[] generateUniform(int N, int Max) { |
||||
if(N * 2 > Max) { |
||||
return negate( generateUniform(Max - N, Max), Max ); |
||||
} |
||||
if (2048 * N > Max) |
||||
return generateUniformBitmap(N, Max); |
||||
return generateUniformHash(N, Max); |
||||
} |
||||
|
||||
/** |
||||
* generates randomly N distinct integers from 0 to Max using a bitmap. |
||||
* @param N Number of integers to generate |
||||
* @param Max Maximum value of the integers |
||||
* @return array containing random integers |
||||
*/ |
||||
int[] generateUniformBitmap(int N, int Max) { |
||||
if (N > Max) |
||||
throw new RuntimeException("not possible"); |
||||
int[] ans = new int[N]; |
||||
BitSet bs = new BitSet(Max); |
||||
int cardinality = 0; |
||||
while (cardinality < N) { |
||||
int v = this.rand.nextInt(Max); |
||||
if (!bs.get(v)) { |
||||
bs.set(v); |
||||
cardinality++; |
||||
} |
||||
} |
||||
int pos = 0; |
||||
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { |
||||
ans[pos++] = i; |
||||
} |
||||
return ans; |
||||
} |
||||
|
||||
Random rand = new Random(); |
||||
|
||||
} |
@ -0,0 +1,102 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* BitCounter is a fake bitset data structure. Instead of storing the actual data, |
||||
* it only records the number of set bits. |
||||
* |
||||
* @since 0.5.0 |
||||
* @author Daniel Lemire and David McIntosh |
||||
*/ |
||||
|
||||
public final class BitCounter32 implements BitmapStorage32 { |
||||
|
||||
/** |
||||
* Virtually add words directly to the bitmap |
||||
* |
||||
* @param newdata the word |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void add(final int newdata) { |
||||
this.oneBits += Integer.bitCount(newdata); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* virtually add several literal words. |
||||
* |
||||
* @param data the literal words |
||||
* @param start the starting point in the array |
||||
* @param number the number of literal words to add |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void addStreamOfLiteralWords(int[] data, int start, int number) { |
||||
for(int i=start;i<start+number;i++) { |
||||
add(data[i]); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* virtually add many |
||||
* zeroes or ones. |
||||
* |
||||
* @param v zeros or ones |
||||
* @param number how many to words add |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void addStreamOfEmptyWords(boolean v, int number) { |
||||
if (v) { |
||||
this.oneBits += number * EWAHCompressedBitmap32.wordinbits; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* virtually add several negated literal words. |
||||
* |
||||
* @param data the literal words |
||||
* @param start the starting point in the array |
||||
* @param number the number of literal words to add |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void addStreamOfNegatedLiteralWords(int[] data, int start, |
||||
int number) { |
||||
for(int i=start;i<start+number;i++) { |
||||
add(~data[i]); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* As you act on this class, it records the number of set (true) bits. |
||||
* |
||||
* @return number of set bits |
||||
*/ |
||||
public int getCount() { |
||||
return this.oneBits; |
||||
} |
||||
|
||||
/** |
||||
* should directly set the sizeinbits field, but is effectively ignored in this class. |
||||
* |
||||
* @param bits number of bits |
||||
*/ |
||||
// @Override : causes problems with Java 1.5
|
||||
@Override |
||||
public void setSizeInBits(int bits) { |
||||
// no action
|
||||
} |
||||
|
||||
private int oneBits; |
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,60 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Low level bitset writing methods. |
||||
* |
||||
* @since 0.5.0 |
||||
* @author Daniel Lemire and David McIntosh |
||||
*/ |
||||
public interface BitmapStorage32 { |
||||
|
||||
/** |
||||
* Adding words directly to the bitmap (for expert use). |
||||
* |
||||
* This is normally how you add data to the array. So you add bits in streams |
||||
* of 8*8 bits. |
||||
* |
||||
* @param newdata the word |
||||
*/ |
||||
public void add(final int newdata); |
||||
|
||||
/** |
||||
* if you have several literal words to copy over, this might be faster. |
||||
* |
||||
* @param data the literal words |
||||
* @param start the starting point in the array |
||||
* @param number the number of literal words to add |
||||
*/ |
||||
public void addStreamOfLiteralWords(final int[] data, final int start, |
||||
final int number); |
||||
|
||||
/** |
||||
* For experts: You want to add many |
||||
* zeroes or ones? This is the method you use. |
||||
* |
||||
* @param v zeros or ones |
||||
* @param number how many to words add |
||||
*/ |
||||
public void addStreamOfEmptyWords(final boolean v, final int number); |
||||
|
||||
/** |
||||
* Like "addStreamOfLiteralWords" but negates the words being added. |
||||
* |
||||
* @param data the literal words |
||||
* @param start the starting point in the array |
||||
* @param number the number of literal words to add |
||||
*/ |
||||
public void addStreamOfNegatedLiteralWords(int[] data, final int start, |
||||
final int number); |
||||
/** |
||||
* directly set the sizeinbits field |
||||
* @param bits number of bits |
||||
*/ |
||||
public void setSizeInBits(final int bits); |
||||
} |
@ -0,0 +1,152 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
import com.fr.third.googlecode.javaewah.CloneableIterator; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* This class can be used to iterate over blocks of bitmap data. |
||||
* |
||||
* @author Daniel Lemire |
||||
* |
||||
*/ |
||||
public class BufferedIterator32 implements IteratingRLW32, Cloneable { |
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* |
||||
* @param iterator iterator |
||||
*/ |
||||
public BufferedIterator32(final CloneableIterator<EWAHIterator32> iterator) { |
||||
this.masteriterator = iterator; |
||||
if(this.masteriterator.hasNext()) { |
||||
this.iterator = this.masteriterator.next(); |
||||
this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||
this.buffer = this.iterator.buffer(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Discard first words, iterating to the next running length word if needed. |
||||
* |
||||
* @param x the number of words to be discarded |
||||
*/ |
||||
@Override |
||||
public void discardFirstWords(int x) { |
||||
while (x > 0) { |
||||
if (this.brlw.RunningLength > x) { |
||||
this.brlw.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.brlw.RunningLength; |
||||
this.brlw.RunningLength = 0; |
||||
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||
|
||||
this.literalWordStartPosition += toDiscard; |
||||
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||
x -= toDiscard; |
||||
if ((x > 0) || (this.brlw.size() == 0)) { |
||||
if (!this.next()) { |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
} |
||||
/** |
||||
* Move to the next RunningLengthWord |
||||
* @return whether the move was possible |
||||
*/ |
||||
@Override |
||||
public boolean next() { |
||||
if (!this.iterator.hasNext()) { |
||||
if(!reload()) { |
||||
this.brlw.NumberOfLiteralWords = 0; |
||||
this.brlw.RunningLength = 0; |
||||
return false; |
||||
} |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||
return true; |
||||
} |
||||
private boolean reload() { |
||||
if(!this.masteriterator.hasNext()) { |
||||
return false; |
||||
} |
||||
this.iterator = this.masteriterator.next(); |
||||
this.buffer = this.iterator.buffer(); |
||||
return true; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Get the nth literal word for the current running length word |
||||
* @param index zero based index |
||||
* @return the literal word |
||||
*/ |
||||
@Override |
||||
public int getLiteralWordAt(int index) { |
||||
return this.buffer[this.literalWordStartPosition + index]; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words for the current running length word. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return this.brlw.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return this.brlw.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
@Override |
||||
public int getRunningLength() { |
||||
return this.brlw.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words of the current running length word. |
||||
* |
||||
* @return the size |
||||
*/ |
||||
@Override |
||||
public int size() { |
||||
return this.brlw.size(); |
||||
} |
||||
|
||||
@Override |
||||
public BufferedIterator32 clone() throws CloneNotSupportedException { |
||||
BufferedIterator32 answer = (BufferedIterator32) super.clone(); |
||||
answer.brlw = this.brlw.clone(); |
||||
answer.buffer = this.buffer; |
||||
answer.iterator = this.iterator.clone(); |
||||
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||
answer.masteriterator = this.masteriterator.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
private BufferedRunningLengthWord32 brlw; |
||||
private int[] buffer; |
||||
private int literalWordStartPosition; |
||||
private EWAHIterator32 iterator; |
||||
private CloneableIterator<EWAHIterator32> masteriterator; |
||||
} |
@ -0,0 +1,174 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
|
||||
|
||||
/** |
||||
* Mostly for internal use. Similar to RunningLengthWord, but can |
||||
* be modified without access to the array, and has faster access. |
||||
* |
||||
* @author Daniel Lemire |
||||
* @since 0.5.0 |
||||
* |
||||
*/ |
||||
public final class BufferedRunningLengthWord32 implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new buffered running length word. |
||||
* |
||||
* @param a the word |
||||
*/ |
||||
public BufferedRunningLengthWord32(final int a) { |
||||
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); |
||||
this.RunningBit = (a & 1) != 0; |
||||
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); |
||||
} |
||||
|
||||
/** |
||||
* Instantiates a new buffered running length word. |
||||
* |
||||
* @param rlw the rlw |
||||
*/ |
||||
public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { |
||||
this(rlw.parent.buffer[rlw.position]); |
||||
} |
||||
|
||||
/** |
||||
* Discard first words. |
||||
* |
||||
* @param x the number of words to be discarded |
||||
*/ |
||||
public void discardFirstWords(int x) { |
||||
if (this.RunningLength >= x) { |
||||
this.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.RunningLength; |
||||
this.RunningLength = 0; |
||||
this.literalwordoffset += x; |
||||
this.NumberOfLiteralWords -= x; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
public int getNumberOfLiteralWords() { |
||||
return this.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
public boolean getRunningBit() { |
||||
return this.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
public int getRunningLength() { |
||||
return this.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Reset the values using the provided word. |
||||
* |
||||
* @param a the word |
||||
*/ |
||||
public void reset(final int a) { |
||||
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); |
||||
this.RunningBit = (a & 1) != 0; |
||||
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); |
||||
this.literalwordoffset = 0; |
||||
} |
||||
|
||||
/** |
||||
* Reset the values of this running length word so that it has the same values |
||||
* as the other running length word. |
||||
* |
||||
* @param rlw the other running length word |
||||
*/ |
||||
public void reset(final RunningLengthWord32 rlw) { |
||||
reset(rlw.parent.buffer[rlw.position]); |
||||
} |
||||
|
||||
/** |
||||
* Sets the number of literal words. |
||||
* |
||||
* @param number the new number of literal words |
||||
*/ |
||||
public void setNumberOfLiteralWords(final int number) { |
||||
this.NumberOfLiteralWords = number; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running bit. |
||||
* |
||||
* @param b the new running bit |
||||
*/ |
||||
public void setRunningBit(final boolean b) { |
||||
this.RunningBit = b; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running length. |
||||
* |
||||
* @param number the new running length |
||||
*/ |
||||
public void setRunningLength(final int number) { |
||||
this.RunningLength = number; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words. |
||||
* |
||||
* @return the int |
||||
*/ |
||||
public int size() { |
||||
return this.RunningLength + this.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/* |
||||
* @see java.lang.Object#toString() |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return "running bit = " + getRunningBit() + " running length = " |
||||
+ getRunningLength() + " number of lit. words " |
||||
+ getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { |
||||
BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); |
||||
answer.literalwordoffset = this.literalwordoffset; |
||||
answer.NumberOfLiteralWords = this.NumberOfLiteralWords; |
||||
answer.RunningBit = this.RunningBit; |
||||
answer.RunningLength = this.RunningLength; |
||||
return answer; |
||||
} |
||||
|
||||
/** how many literal words have we read so far? */ |
||||
public int literalwordoffset = 0; |
||||
|
||||
/** The Number of literal words. */ |
||||
public int NumberOfLiteralWords; |
||||
|
||||
/** The Running bit. */ |
||||
public boolean RunningBit; |
||||
|
||||
/** The Running length. */ |
||||
public int RunningLength; |
||||
|
||||
|
||||
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,98 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* The class EWAHIterator represents a special type of |
||||
* efficient iterator iterating over (uncompressed) words of bits. |
||||
* |
||||
* @author Daniel Lemire |
||||
* @since 0.5.0 |
||||
* |
||||
*/ |
||||
public final class EWAHIterator32 implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new eWAH iterator. |
||||
* |
||||
* @param a the array of words |
||||
* @param sizeinwords the number of words that are significant in the array of words |
||||
*/ |
||||
public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { |
||||
this.rlw = new RunningLengthWord32(a, 0); |
||||
this.size = sizeinwords; |
||||
this.pointer = 0; |
||||
} |
||||
|
||||
/** |
||||
* Allow expert developers to instantiate an EWAHIterator. |
||||
* |
||||
* @param bitmap we want to iterate over |
||||
* @return an iterator |
||||
*/ |
||||
public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { |
||||
return bitmap.getEWAHIterator(); |
||||
} |
||||
|
||||
/** |
||||
* Access to the array of words |
||||
* |
||||
* @return the int[] |
||||
*/ |
||||
public int[] buffer() { |
||||
return this.rlw.parent.buffer; |
||||
} |
||||
|
||||
/** |
||||
* Position of the literal words represented by this running length word. |
||||
* |
||||
* @return the int |
||||
*/ |
||||
public int literalWords() { |
||||
return this.pointer - this.rlw.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
/** |
||||
* Checks for next. |
||||
* |
||||
* @return true, if successful |
||||
*/ |
||||
public boolean hasNext() { |
||||
return this.pointer < this.size; |
||||
} |
||||
|
||||
/** |
||||
* Next running length word. |
||||
* |
||||
* @return the running length word |
||||
*/ |
||||
public RunningLengthWord32 next() { |
||||
this.rlw.position = this.pointer; |
||||
this.pointer += this.rlw.getNumberOfLiteralWords() + 1; |
||||
return this.rlw; |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator32 clone() throws CloneNotSupportedException { |
||||
EWAHIterator32 ans = (EWAHIterator32) super.clone(); |
||||
ans.rlw = this.rlw.clone(); |
||||
ans.size = this.size; |
||||
ans.pointer = this.pointer; |
||||
return ans; |
||||
} |
||||
|
||||
/** The pointer represent the location of the current running length |
||||
* word in the array of words (embedded in the rlw attribute). */ |
||||
int pointer; |
||||
|
||||
/** The current running length word. */ |
||||
RunningLengthWord32 rlw; |
||||
|
||||
/** The size in words. */ |
||||
int size; |
||||
|
||||
} |
@ -0,0 +1,377 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.Comparator; |
||||
import java.util.PriorityQueue; |
||||
|
||||
|
||||
/** |
||||
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as |
||||
* reference. They may not be faster than the corresponding methods in the |
||||
* EWAHCompressedBitmap class. |
||||
* |
||||
* @author Daniel Lemire |
||||
* |
||||
*/ |
||||
public class FastAggregation32 { |
||||
|
||||
/** |
||||
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||
* @return the or aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap32 bufferedand(final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||
bufferedandWithContainer(answer,bufsize, bitmaps); |
||||
return answer; |
||||
} |
||||
/** |
||||
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
|
||||
java.util.LinkedList<IteratingBufferedRunningLengthWord32> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord32>(); |
||||
for (EWAHCompressedBitmap32 bitmap : bitmaps) { |
||||
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||
} |
||||
int[] hardbitmap = new int[bufsize*bitmaps.length]; |
||||
|
||||
for(IteratingRLW32 i : al) |
||||
if (i.size() == 0) { |
||||
al.clear(); |
||||
break; |
||||
} |
||||
|
||||
while (!al.isEmpty()) { |
||||
Arrays.fill(hardbitmap, ~0); |
||||
int effective = Integer.MAX_VALUE; |
||||
for(IteratingRLW32 i : al) { |
||||
int eff = IteratorAggregation32.inplaceand(hardbitmap, i); |
||||
if (eff < effective) |
||||
effective = eff; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
for(IteratingRLW32 i : al) |
||||
if (i.size() == 0) { |
||||
al.clear(); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @return the or aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap32 bufferedor(final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||
bufferedorWithContainer(answer, bufsize, bitmaps); |
||||
return answer; |
||||
} |
||||
|
||||
/** |
||||
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
int range = 0; |
||||
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); |
||||
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||
return b.sizeinbits - a.sizeinbits; |
||||
} |
||||
}); |
||||
|
||||
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>(); |
||||
for (EWAHCompressedBitmap32 bitmap : sbitmaps) { |
||||
if (bitmap.sizeinbits > range) |
||||
range = bitmap.sizeinbits; |
||||
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||
} |
||||
int[] hardbitmap = new int[bufsize]; |
||||
int maxr = al.size(); |
||||
while (maxr > 0) { |
||||
int effective = 0; |
||||
for (int k = 0; k < maxr; ++k) { |
||||
if (al.get(k).size() > 0) { |
||||
int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
maxr = k; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
Arrays.fill(hardbitmap, 0); |
||||
|
||||
} |
||||
container.setSizeInBits(range); |
||||
} |
||||
|
||||
/** |
||||
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||
* @param bitmaps the source bitmaps |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @return the xor aggregate. |
||||
*/ |
||||
public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||
bufferedxorWithContainer(answer, bufsize, bitmaps); |
||||
return answer; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||
* |
||||
* @param container where the aggregate is written |
||||
* @param bufsize buffer size used during the computation in 64-bit words |
||||
* @param bitmaps the source bitmaps |
||||
*/ |
||||
public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
int range = 0; |
||||
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); |
||||
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||
return b.sizeinbits - a.sizeinbits; |
||||
} |
||||
}); |
||||
|
||||
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>(); |
||||
for (EWAHCompressedBitmap32 bitmap : sbitmaps) { |
||||
if (bitmap.sizeinbits > range) |
||||
range = bitmap.sizeinbits; |
||||
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||
} |
||||
int[] hardbitmap = new int[bufsize]; |
||||
int maxr = al.size(); |
||||
while (maxr > 0) { |
||||
int effective = 0; |
||||
for (int k = 0; k < maxr; ++k) { |
||||
if (al.get(k).size() > 0) { |
||||
int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
maxr = k; |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
container.add(hardbitmap[k]); |
||||
Arrays.fill(hardbitmap, 0); |
||||
} |
||||
container.setSizeInBits(range); |
||||
} |
||||
|
||||
/** |
||||
* Uses a priority queue to compute the or aggregate. |
||||
* @param container where we write the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
public static void orToContainer(final BitmapStorage32 container, |
||||
final EWAHCompressedBitmap32 ... bitmaps) { |
||||
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length, |
||||
new Comparator<EWAHCompressedBitmap32>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (EWAHCompressedBitmap32 x : bitmaps) { |
||||
pq.add(x); |
||||
} |
||||
while (pq.size() > 2) { |
||||
EWAHCompressedBitmap32 x1 = pq.poll(); |
||||
EWAHCompressedBitmap32 x2 = pq.poll(); |
||||
pq.add(x1.or(x2)); |
||||
} |
||||
pq.poll().orToContainer(pq.poll(), container); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Uses a priority queue to compute the xor aggregate. |
||||
* @param container where we write the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
public static void xorToContainer(final BitmapStorage32 container, |
||||
final EWAHCompressedBitmap32 ... bitmaps) { |
||||
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length, |
||||
new Comparator<EWAHCompressedBitmap32>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||
return a.sizeInBytes() - b.sizeInBytes(); |
||||
} |
||||
}); |
||||
for (EWAHCompressedBitmap32 x : bitmaps) { |
||||
pq.add(x); |
||||
} |
||||
while (pq.size() > 2) { |
||||
EWAHCompressedBitmap32 x1 = pq.poll(); |
||||
EWAHCompressedBitmap32 x2 = pq.poll(); |
||||
pq.add(x1.xor(x2)); |
||||
} |
||||
pq.poll().xorToContainer(pq.poll(), container); |
||||
} |
||||
|
||||
/** |
||||
* For internal use. Computes the bitwise or of the provided bitmaps and |
||||
* stores the result in the container. (This used to be the default.) |
||||
* |
||||
* @deprecated use EWAHCompressedBitmap32.or instead |
||||
* @since 0.4.0 |
||||
* @param container where store the result |
||||
* @param bitmaps to be aggregated |
||||
*/ |
||||
@Deprecated |
||||
public static void legacy_orWithContainer(final BitmapStorage32 container, |
||||
final EWAHCompressedBitmap32... bitmaps) { |
||||
if (bitmaps.length == 2) { |
||||
// should be more efficient
|
||||
bitmaps[0].orToContainer(bitmaps[1], container); |
||||
return; |
||||
} |
||||
|
||||
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
|
||||
// sorted bitmaps from right to left.
|
||||
final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); |
||||
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||
@Override |
||||
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||
return a.sizeinbits < b.sizeinbits ? 1 |
||||
: a.sizeinbits == b.sizeinbits ? 0 : -1; |
||||
} |
||||
}); |
||||
|
||||
final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; |
||||
int maxAvailablePos = 0; |
||||
for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { |
||||
EWAHIterator32 iterator = bitmap.getEWAHIterator(); |
||||
if (iterator.hasNext()) { |
||||
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( |
||||
iterator); |
||||
} |
||||
} |
||||
|
||||
if (maxAvailablePos == 0) { // this never happens...
|
||||
container.setSizeInBits(0); |
||||
return; |
||||
} |
||||
|
||||
int maxSize = sortedBitmaps[0].sizeinbits; |
||||
|
||||
while (true) { |
||||
int maxOneRl = 0; |
||||
int minZeroRl = Integer.MAX_VALUE; |
||||
int minSize = Integer.MAX_VALUE; |
||||
int numEmptyRl = 0; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
int size = rlw.size(); |
||||
if (size == 0) { |
||||
maxAvailablePos = i; |
||||
break; |
||||
} |
||||
minSize = Math.min(minSize, size); |
||||
|
||||
if (rlw.getRunningBit()) { |
||||
int rl = rlw.getRunningLength(); |
||||
maxOneRl = Math.max(maxOneRl, rl); |
||||
minZeroRl = 0; |
||||
if (rl == 0 && size > 0) { |
||||
numEmptyRl++; |
||||
} |
||||
} else { |
||||
int rl = rlw.getRunningLength(); |
||||
minZeroRl = Math.min(minZeroRl, rl); |
||||
if (rl == 0 && size > 0) { |
||||
numEmptyRl++; |
||||
} |
||||
} |
||||
} |
||||
|
||||
if (maxAvailablePos == 0) { |
||||
break; |
||||
} else if (maxAvailablePos == 1) { |
||||
// only one bitmap is left so just write the rest of it out
|
||||
rlws[0].discharge(container); |
||||
break; |
||||
} |
||||
|
||||
if (maxOneRl > 0) { |
||||
container.addStreamOfEmptyWords(true, maxOneRl); |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
rlw.discardFirstWords(maxOneRl); |
||||
} |
||||
} else if (minZeroRl > 0) { |
||||
container.addStreamOfEmptyWords(false, minZeroRl); |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
rlw.discardFirstWords(minZeroRl); |
||||
} |
||||
} else { |
||||
int index = 0; |
||||
|
||||
if (numEmptyRl == 1) { |
||||
// if one rlw has literal words to process and the rest have a run of
|
||||
// 0's we can write them out here
|
||||
IteratingBufferedRunningLengthWord32 emptyRl = null; |
||||
int minNonEmptyRl = Integer.MAX_VALUE; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
int rl = rlw.getRunningLength(); |
||||
if (rl == 0) { |
||||
assert emptyRl == null; |
||||
emptyRl = rlw; |
||||
} else { |
||||
minNonEmptyRl = Math.min(minNonEmptyRl, rl); |
||||
} |
||||
} |
||||
int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; |
||||
if (emptyRl != null) |
||||
emptyRl.writeLiteralWords(wordsToWrite, container); |
||||
index += wordsToWrite; |
||||
} |
||||
|
||||
while (index < minSize) { |
||||
int word = 0; |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
if (rlw.getRunningLength() <= index) { |
||||
word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); |
||||
} |
||||
} |
||||
container.add(word); |
||||
index++; |
||||
} |
||||
for (int i = 0; i < maxAvailablePos; i++) { |
||||
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||
rlw.discardFirstWords(minSize); |
||||
} |
||||
} |
||||
} |
||||
container.setSizeInBits(maxSize); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,90 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
/* |
||||
* Copyright 2012, Google Inc. |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
import static com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; |
||||
|
||||
import com.fr.third.googlecode.javaewah.IntIterator; |
||||
|
||||
/** |
||||
* The IntIteratorImpl32 is the 32 bit implementation of the IntIterator |
||||
* interface, which efficiently returns the stream of integers represented by an |
||||
* EWAHIterator32. |
||||
* |
||||
* @author Colby Ranger |
||||
* @since 0.5.6 |
||||
*/ |
||||
final class IntIteratorImpl32 implements IntIterator { |
||||
|
||||
private final EWAHIterator32 ewahIter; |
||||
private final int[] ewahBuffer; |
||||
private int position; |
||||
private int runningLength; |
||||
private int word; |
||||
private int wordPosition; |
||||
private int wordLength; |
||||
private int literalPosition; |
||||
private boolean hasnext; |
||||
|
||||
IntIteratorImpl32(EWAHIterator32 ewahIter) { |
||||
this.ewahIter = ewahIter; |
||||
this.ewahBuffer = ewahIter.buffer(); |
||||
this.hasnext = this.moveToNext(); |
||||
} |
||||
|
||||
public final boolean moveToNext() { |
||||
while (!runningHasNext() && !literalHasNext()) { |
||||
if (!this.ewahIter.hasNext()) { |
||||
return false; |
||||
} |
||||
setRunningLengthWord(this.ewahIter.next()); |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public final boolean hasNext() { |
||||
return this.hasnext; |
||||
} |
||||
|
||||
@Override |
||||
public final int next() { |
||||
final int answer; |
||||
if (runningHasNext()) { |
||||
answer = this.position++; |
||||
} else { |
||||
final int bit = Long.numberOfTrailingZeros(this.word); |
||||
this.word ^= (1l << bit); |
||||
answer = this.literalPosition + bit; |
||||
} |
||||
this.hasnext = this.moveToNext(); |
||||
return answer; |
||||
} |
||||
|
||||
private final void setRunningLengthWord(RunningLengthWord32 rlw) { |
||||
this.runningLength = wordinbits * rlw.getRunningLength() |
||||
+ this.position; |
||||
if (!rlw.getRunningBit()) { |
||||
this.position = this.runningLength; |
||||
} |
||||
|
||||
this.wordPosition = this.ewahIter.literalWords(); |
||||
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
private final boolean runningHasNext() { |
||||
return this.position < this.runningLength; |
||||
} |
||||
|
||||
private final boolean literalHasNext() { |
||||
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||
this.word = this.ewahBuffer[this.wordPosition++]; |
||||
this.literalPosition = this.position; |
||||
this.position += wordinbits; |
||||
} |
||||
return this.word != 0; |
||||
} |
||||
} |
@ -0,0 +1,91 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||
|
||||
import com.fr.third.googlecode.javaewah.IntIterator; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Implementation of an IntIterator over an IteratingRLW. |
||||
* |
||||
* |
||||
*/ |
||||
public class IntIteratorOverIteratingRLW32 implements IntIterator { |
||||
IteratingRLW32 parent; |
||||
private int position; |
||||
private int runningLength; |
||||
private int word; |
||||
private int wordPosition; |
||||
private int wordLength; |
||||
private int literalPosition; |
||||
private boolean hasnext; |
||||
|
||||
/** |
||||
* @param p iterator we wish to iterate over |
||||
*/ |
||||
public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { |
||||
this.parent = p; |
||||
this.position = 0; |
||||
setupForCurrentRunningLengthWord(); |
||||
this.hasnext = moveToNext(); |
||||
} |
||||
|
||||
/** |
||||
* @return whether we could find another set bit; don't move if there is an unprocessed value |
||||
*/ |
||||
private final boolean moveToNext() { |
||||
while (!runningHasNext() && !literalHasNext()) { |
||||
if (this.parent.next()) |
||||
setupForCurrentRunningLengthWord(); |
||||
else return false; |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return this.hasnext; |
||||
} |
||||
|
||||
@Override |
||||
public final int next() { |
||||
final int answer; |
||||
if (runningHasNext()) { |
||||
answer = this.position++; |
||||
} else { |
||||
final int bit = Long.numberOfTrailingZeros(this.word); |
||||
this.word ^= (1l << bit); |
||||
answer = this.literalPosition + bit; |
||||
} |
||||
this.hasnext = this.moveToNext(); |
||||
return answer; |
||||
} |
||||
|
||||
private final void setupForCurrentRunningLengthWord() { |
||||
this.runningLength = wordinbits * this.parent.getRunningLength() |
||||
+ this.position; |
||||
|
||||
if (!this.parent.getRunningBit()) { |
||||
this.position = this.runningLength; |
||||
} |
||||
this.wordPosition = 0; |
||||
this.wordLength = this.parent.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
private final boolean runningHasNext() { |
||||
return this.position < this.runningLength; |
||||
} |
||||
|
||||
private final boolean literalHasNext() { |
||||
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||
this.word = this.parent.getLiteralWordAt(this.wordPosition++); |
||||
this.literalPosition = this.position; |
||||
this.position += wordinbits; |
||||
} |
||||
return this.word != 0; |
||||
} |
||||
} |
||||
|
@ -0,0 +1,274 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically |
||||
* advances to the next BufferedRunningLengthWord32 as words are discarded. |
||||
* |
||||
* @since 0.5.0 |
||||
* @author Daniel Lemire and David McIntosh |
||||
*/ |
||||
public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { |
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* |
||||
* @param iterator iterator |
||||
*/ |
||||
public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) { |
||||
this.iterator = iterator; |
||||
this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||
this.buffer = this.iterator.buffer(); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Instantiates a new iterating buffered running length word. |
||||
* @param bitmap over which we want to iterate |
||||
* |
||||
*/ |
||||
public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { |
||||
this(EWAHIterator32.getEWAHIterator(bitmap)); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Discard first words, iterating to the next running length word if needed. |
||||
* |
||||
* @param x the x |
||||
*/ |
||||
@Override |
||||
public void discardFirstWords(int x) { |
||||
|
||||
while (x > 0) { |
||||
if (this.brlw.RunningLength > x) { |
||||
this.brlw.RunningLength -= x; |
||||
return; |
||||
} |
||||
x -= this.brlw.RunningLength; |
||||
this.brlw.RunningLength = 0; |
||||
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||
|
||||
this.literalWordStartPosition += toDiscard; |
||||
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||
x -= toDiscard; |
||||
if ((x > 0) || (this.brlw.size() == 0)) { |
||||
if (!this.iterator.hasNext()) { |
||||
break; |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0;
|
||||
} |
||||
} |
||||
} |
||||
/** |
||||
* Write out up to max words, returns how many were written |
||||
* @param container target for writes |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
public int discharge(BitmapStorage32 container, int max) { |
||||
int index = 0; |
||||
while ((index < max) && (size() > 0)) { |
||||
// first run
|
||||
int pl = getRunningLength(); |
||||
if (index + pl > max) { |
||||
pl = max - index; |
||||
} |
||||
container.addStreamOfEmptyWords(getRunningBit(), pl); |
||||
index += pl; |
||||
int pd = getNumberOfLiteralWords(); |
||||
if (pd + index > max) { |
||||
pd = max - index; |
||||
} |
||||
writeLiteralWords(pd, container); |
||||
discardFirstWords(pl+pd); |
||||
index += pd; |
||||
} |
||||
return index; |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max words (negated), returns how many were written |
||||
* @param container target for writes |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
public int dischargeNegated(BitmapStorage32 container, int max) { |
||||
int index = 0; |
||||
while ((index < max) && (size() > 0)) { |
||||
// first run
|
||||
int pl = getRunningLength(); |
||||
if (index + pl > max) { |
||||
pl = max - index; |
||||
} |
||||
container.addStreamOfEmptyWords(!getRunningBit(), pl); |
||||
index += pl; |
||||
int pd = getNumberOfLiteralWords(); |
||||
if (pd + index > max) { |
||||
pd = max - index; |
||||
} |
||||
writeNegatedLiteralWords(pd, container); |
||||
discardFirstWords(pl+pd); |
||||
index += pd; |
||||
} |
||||
return index; |
||||
} |
||||
|
||||
/** |
||||
* Move to the next RunningLengthWord |
||||
* @return whether the move was possible |
||||
*/ |
||||
@Override |
||||
public boolean next() { |
||||
if (!this.iterator.hasNext()) { |
||||
this.brlw.NumberOfLiteralWords = 0; |
||||
this.brlw.RunningLength = 0; |
||||
return false; |
||||
} |
||||
this.brlw.reset(this.iterator.next()); |
||||
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||
return true; |
||||
} |
||||
|
||||
/** |
||||
* Write out the remain words, transforming them to zeroes. |
||||
* @param container target for writes |
||||
*/ |
||||
public void dischargeAsEmpty(BitmapStorage32 container) { |
||||
while(size()>0) { |
||||
container.addStreamOfEmptyWords(false, size()); |
||||
discardFirstWords(size()); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Write out the remaining words |
||||
* @param container target for writes |
||||
*/ |
||||
public void discharge(BitmapStorage32 container) { |
||||
// fix the offset
|
||||
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); |
||||
discharge(this.brlw, this.iterator, container); |
||||
} |
||||
|
||||
/** |
||||
* Get the nth literal word for the current running length word |
||||
* @param index zero based index |
||||
* @return the literal word |
||||
*/ |
||||
@Override |
||||
public int getLiteralWordAt(int index) { |
||||
return this.buffer[this.literalWordStartPosition + index]; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words for the current running length word. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return this.brlw.NumberOfLiteralWords; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return this.brlw.RunningBit; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
@Override |
||||
public int getRunningLength() { |
||||
return this.brlw.RunningLength; |
||||
} |
||||
|
||||
/** |
||||
* Size in uncompressed words of the current running length word. |
||||
* |
||||
* @return the int |
||||
*/ |
||||
@Override |
||||
public int size() { |
||||
return this.brlw.size(); |
||||
} |
||||
|
||||
/** |
||||
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration. |
||||
* @param numWords number of words to be written |
||||
* @param container where we write the data |
||||
*/ |
||||
public void writeLiteralWords(int numWords, BitmapStorage32 container) { |
||||
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. |
||||
* @param numWords number of words to be written |
||||
* @param container where we write the data |
||||
*/ |
||||
public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { |
||||
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* For internal use. (One could use the non-static discharge method instead, |
||||
* but we expect them to be slower.) |
||||
* |
||||
* @param initialWord |
||||
* the initial word |
||||
* @param iterator |
||||
* the iterator |
||||
* @param container |
||||
* the container |
||||
*/ |
||||
protected static void discharge( |
||||
final BufferedRunningLengthWord32 initialWord, |
||||
final EWAHIterator32 iterator, final BitmapStorage32 container) { |
||||
BufferedRunningLengthWord32 runningLengthWord = initialWord; |
||||
for (;;) { |
||||
final int runningLength = runningLengthWord.getRunningLength(); |
||||
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), |
||||
runningLength); |
||||
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() |
||||
+ runningLengthWord.literalwordoffset, |
||||
runningLengthWord.getNumberOfLiteralWords()); |
||||
if (!iterator.hasNext()) |
||||
break; |
||||
runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
@Override |
||||
public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { |
||||
IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); |
||||
answer.brlw = this.brlw.clone(); |
||||
answer.buffer = this.buffer; |
||||
answer.iterator = this.iterator.clone(); |
||||
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||
return answer; |
||||
} |
||||
|
||||
private BufferedRunningLengthWord32 brlw; |
||||
private int[] buffer; |
||||
private int literalWordStartPosition; |
||||
private EWAHIterator32 iterator; |
||||
} |
@ -0,0 +1,42 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* High-level iterator over a compressed bitmap. |
||||
* |
||||
*/ |
||||
public interface IteratingRLW32 { |
||||
/** |
||||
* @return whether there is more |
||||
*/ |
||||
public boolean next() ; |
||||
/** |
||||
* @param index where the literal word is |
||||
* @return the literal word at the given index. |
||||
*/ |
||||
public int getLiteralWordAt(int index); |
||||
/** |
||||
* @return the number of literal (non-fill) words |
||||
*/ |
||||
public int getNumberOfLiteralWords() ; |
||||
/** |
||||
* @return the bit used for the fill bits |
||||
*/ |
||||
public boolean getRunningBit() ; |
||||
/** |
||||
* @return sum of getRunningLength() and getNumberOfLiteralWords() |
||||
*/ |
||||
public int size() ; |
||||
/** |
||||
* @return length of the run of fill words |
||||
*/ |
||||
public int getRunningLength() ; |
||||
/** |
||||
* @param x the number of words to discard |
||||
*/ |
||||
public void discardFirstWords(int x); |
||||
} |
@ -0,0 +1,601 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.Iterator; |
||||
import java.util.LinkedList; |
||||
|
||||
import com.fr.third.googlecode.javaewah.CloneableIterator; |
||||
|
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Set of helper functions to aggregate bitmaps. |
||||
* |
||||
*/ |
||||
public class IteratorAggregation32 { |
||||
/** |
||||
* @param x iterator to negate |
||||
* @return negated version of the iterator |
||||
*/ |
||||
public static IteratingRLW32 not(final IteratingRLW32 x) { |
||||
return new IteratingRLW32() { |
||||
|
||||
@Override |
||||
public boolean next() { |
||||
return x.next(); |
||||
} |
||||
|
||||
@Override |
||||
public int getLiteralWordAt(int index) { |
||||
return ~x.getLiteralWordAt(index); |
||||
} |
||||
|
||||
@Override |
||||
public int getNumberOfLiteralWords() { |
||||
return x.getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public boolean getRunningBit() { |
||||
return ! x.getRunningBit(); |
||||
} |
||||
|
||||
@Override |
||||
public int size() { |
||||
return x.size(); |
||||
} |
||||
|
||||
@Override |
||||
public int getRunningLength() { |
||||
return x.getRunningLength(); |
||||
} |
||||
|
||||
@Override |
||||
public void discardFirstWords(int y) { |
||||
x.discardFirstWords(y); |
||||
} |
||||
|
||||
}; |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @return and aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { |
||||
return bufferedand (DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||
* @return and aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||
for (IteratingRLW32 i : al) |
||||
basell.add(i); |
||||
return new BufferedIterator32(new AndIt(basell,bufsize)); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @return or aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { |
||||
return bufferedor(DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||
* @return or aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
|
||||
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||
for (IteratingRLW32 i : al) |
||||
basell.add(i); |
||||
return new BufferedIterator32(new ORIt(basell,bufsize)); |
||||
} |
||||
|
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @return xor aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { |
||||
return bufferedxor (DEFAULTMAXBUFSIZE,al); |
||||
} |
||||
/** |
||||
* Aggregate the iterators using a bitmap buffer. |
||||
* |
||||
* @param al iterators to aggregate |
||||
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||
* @return xor aggregate |
||||
*/ |
||||
public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { |
||||
if (al.length == 0) |
||||
throw new IllegalArgumentException("Need at least one iterator"); |
||||
if (al.length == 1) |
||||
return al[0]; |
||||
|
||||
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||
for (IteratingRLW32 i : al) |
||||
basell.add(i); |
||||
return new BufferedIterator32(new XORIt(basell,bufsize)); |
||||
} |
||||
/** |
||||
* Write out the content of the iterator, but as if it were all zeros. |
||||
* |
||||
* @param container |
||||
* where we write |
||||
* @param i |
||||
* the iterator |
||||
*/ |
||||
protected static void dischargeAsEmpty(final BitmapStorage32 container, |
||||
final IteratingRLW32 i) { |
||||
while (i.size() > 0) { |
||||
container.addStreamOfEmptyWords(false, i.size()); |
||||
i.next(); |
||||
|
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max words, returns how many were written |
||||
* @param container target for writes |
||||
* @param i source of data |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { |
||||
int counter = 0; |
||||
while (i.size() > 0 && counter < max) { |
||||
int L1 = i.getRunningLength(); |
||||
if (L1 > 0) { |
||||
if (L1 + counter > max) |
||||
L1 = max - counter; |
||||
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||
counter += L1; |
||||
} |
||||
int L = i.getNumberOfLiteralWords(); |
||||
if(L + counter > max) L = max - counter; |
||||
for (int k = 0; k < L; ++k) { |
||||
container.add(i.getLiteralWordAt(k)); |
||||
} |
||||
counter += L; |
||||
i.discardFirstWords(L+L1); |
||||
} |
||||
return counter; |
||||
} |
||||
|
||||
/** |
||||
* Write out up to max negated words, returns how many were written |
||||
* @param container target for writes |
||||
* @param i source of data |
||||
* @param max maximal number of writes |
||||
* @return how many written |
||||
*/ |
||||
protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { |
||||
int counter = 0; |
||||
while (i.size() > 0 && counter < max) { |
||||
int L1 = i.getRunningLength(); |
||||
if (L1 > 0) { |
||||
if (L1 + counter > max) |
||||
L1 = max - counter; |
||||
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||
counter += L1; |
||||
} |
||||
int L = i.getNumberOfLiteralWords(); |
||||
if(L + counter > max) L = max - counter; |
||||
for (int k = 0; k < L; ++k) { |
||||
container.add(i.getLiteralWordAt(k)); |
||||
} |
||||
counter += L; |
||||
i.discardFirstWords(L+L1); |
||||
} |
||||
return counter; |
||||
} |
||||
|
||||
static void andToContainer(final BitmapStorage32 container, |
||||
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||
prey.discardFirstWords(predator.getRunningLength()); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
final int index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
desiredrlwcount -= nbre_literal; |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
static void andToContainer(final BitmapStorage32 container, |
||||
final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||
prey.discardFirstWords(predator.getRunningLength()); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
final int index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* Compute the first few words of the XOR aggregate between two iterators. |
||||
* |
||||
* @param container where to write |
||||
* @param desiredrlwcount number of words to be written (max) |
||||
* @param rlwi first iterator to aggregate |
||||
* @param rlwj second iterator to aggregate |
||||
*/ |
||||
public static void xorToContainer(final BitmapStorage32 container, |
||||
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||
.getRunningLength(); |
||||
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||
: rlwi; |
||||
if (predator.getRunningBit() == false) { |
||||
int index = discharge(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} else { |
||||
int index = dischargeNegated(container, prey, predator.getRunningLength()); |
||||
container.addStreamOfEmptyWords(true, predator.getRunningLength() |
||||
- index); |
||||
predator.discardFirstWords(predator.getRunningLength()); |
||||
} |
||||
} |
||||
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||
rlwj.getNumberOfLiteralWords()); |
||||
if (nbre_literal > 0) { |
||||
desiredrlwcount -= nbre_literal; |
||||
for (int k = 0; k < nbre_literal; ++k) |
||||
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); |
||||
rlwi.discardFirstWords(nbre_literal); |
||||
rlwj.discardFirstWords(nbre_literal); |
||||
} |
||||
} |
||||
} |
||||
|
||||
protected static int inplaceor(int[] bitmap, |
||||
IteratingRLW32 i) { |
||||
int pos = 0; |
||||
int s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = i.getRunningLength(); |
||||
if (i.getRunningBit()) |
||||
Arrays.fill(bitmap, pos, pos + L, ~0); |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = i.getRunningLength(); |
||||
if (pos + L > bitmap.length) { |
||||
if (i.getRunningBit()) { |
||||
Arrays.fill(bitmap, pos, bitmap.length, ~0); |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (i.getRunningBit()) |
||||
Arrays.fill(bitmap, pos, pos + L, ~0); |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
|
||||
|
||||
protected static int inplacexor(int[] bitmap, |
||||
IteratingRLW32 i) { |
||||
int pos = 0; |
||||
int s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = i.getRunningLength(); |
||||
if (i.getRunningBit()) { |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
} |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = i.getRunningLength(); |
||||
if (pos + L > bitmap.length) { |
||||
if (i.getRunningBit()) { |
||||
for(int k = pos ; k < bitmap.length; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (i.getRunningBit()) |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = ~bitmap[k]; |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
protected static int inplaceand(int[] bitmap, |
||||
IteratingRLW32 i) { |
||||
int pos = 0; |
||||
int s; |
||||
while ((s = i.size()) > 0) { |
||||
if (pos + s < bitmap.length) { |
||||
final int L = i.getRunningLength(); |
||||
if (!i.getRunningBit()) { |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = 0; |
||||
} |
||||
pos += L; |
||||
final int LR = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < LR; ++k) |
||||
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||
if (!i.next()) { |
||||
return pos; |
||||
} |
||||
} else { |
||||
int howmany = bitmap.length - pos; |
||||
int L = i.getRunningLength(); |
||||
if (pos + L > bitmap.length) { |
||||
if (!i.getRunningBit()) { |
||||
for(int k = pos ; k < bitmap.length; ++k) |
||||
bitmap[k] = 0; |
||||
} |
||||
i.discardFirstWords(howmany); |
||||
return bitmap.length; |
||||
} |
||||
if (!i.getRunningBit()) |
||||
for(int k = pos ; k < pos + L; ++k) |
||||
bitmap[k] = 0; |
||||
pos += L; |
||||
for (int k = 0; pos < bitmap.length; ++k) |
||||
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||
i.discardFirstWords(howmany); |
||||
return pos; |
||||
} |
||||
} |
||||
return pos; |
||||
} |
||||
|
||||
/** |
||||
* An optimization option. Larger values may improve speed, but at |
||||
* the expense of memory. |
||||
*/ |
||||
public final static int DEFAULTMAXBUFSIZE = 65536; |
||||
|
||||
|
||||
} |
||||
|
||||
|
||||
class ORIt implements CloneableIterator<EWAHIterator32> { |
||||
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||
int[] hardbitmap; |
||||
LinkedList<IteratingRLW32> ll; |
||||
|
||||
ORIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||
this.ll = basell; |
||||
this.hardbitmap = new int[bufsize]; |
||||
} |
||||
|
||||
@Override |
||||
public XORIt clone() throws CloneNotSupportedException { |
||||
XORIt answer = (XORIt) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.hardbitmap = this.hardbitmap.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator32 next() { |
||||
this.buffer.clear(); |
||||
int effective = 0; |
||||
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||
while (i.hasNext()) { |
||||
IteratingRLW32 rlw = i.next(); |
||||
if (rlw.size() > 0) { |
||||
int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
i.remove(); |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
this.buffer.add(this.hardbitmap[k]); |
||||
Arrays.fill(this.hardbitmap, 0); |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
} |
||||
|
||||
class XORIt implements CloneableIterator<EWAHIterator32> { |
||||
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||
int[] hardbitmap; |
||||
LinkedList<IteratingRLW32> ll; |
||||
|
||||
XORIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||
this.ll = basell; |
||||
this.hardbitmap = new int[bufsize]; |
||||
|
||||
} |
||||
|
||||
@Override |
||||
public XORIt clone() throws CloneNotSupportedException { |
||||
XORIt answer = (XORIt) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.hardbitmap = this.hardbitmap.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator32 next() { |
||||
this.buffer.clear(); |
||||
int effective = 0; |
||||
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||
while (i.hasNext()) { |
||||
IteratingRLW32 rlw = i.next(); |
||||
if (rlw.size() > 0) { |
||||
int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); |
||||
if (eff > effective) |
||||
effective = eff; |
||||
} else |
||||
i.remove(); |
||||
} |
||||
for (int k = 0; k < effective; ++k) |
||||
this.buffer.add(this.hardbitmap[k]); |
||||
Arrays.fill(this.hardbitmap, 0); |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
} |
||||
|
||||
class AndIt implements CloneableIterator<EWAHIterator32> { |
||||
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||
LinkedList<IteratingRLW32> ll; |
||||
int buffersize; |
||||
|
||||
public AndIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||
this.ll = basell; |
||||
this.buffersize = bufsize; |
||||
} |
||||
|
||||
@Override |
||||
public boolean hasNext() { |
||||
return !this.ll.isEmpty(); |
||||
} |
||||
|
||||
@Override |
||||
public AndIt clone() throws CloneNotSupportedException { |
||||
AndIt answer = (AndIt) super.clone(); |
||||
answer.buffer = this.buffer.clone(); |
||||
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||
return answer; |
||||
} |
||||
|
||||
@Override |
||||
public EWAHIterator32 next() { |
||||
this.buffer.clear(); |
||||
IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), |
||||
this.ll.get(0), this.ll.get(1)); |
||||
if (this.ll.size() > 2) { |
||||
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||
i.next(); |
||||
i.next(); |
||||
EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); |
||||
while (i.hasNext() && this.buffer.sizeInBytes() > 0) { |
||||
IteratorAggregation32.andToContainer(tmpbuffer, |
||||
this.buffer.getIteratingRLW(), i.next()); |
||||
this.buffer.swap(tmpbuffer); |
||||
tmpbuffer.clear(); |
||||
} |
||||
} |
||||
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||
while(i.hasNext()) { |
||||
if(i.next().size() == 0) { |
||||
this.ll.clear(); |
||||
break; |
||||
} |
||||
} |
||||
return this.buffer.getEWAHIterator(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,135 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
import java.util.Iterator; |
||||
|
||||
import com.fr.third.googlecode.javaewah.IntIterator; |
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* Convenience functions for working over iterators |
||||
* |
||||
*/ |
||||
public class IteratorUtil32 { |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return an iterator over the set bits corresponding to the iterator |
||||
*/ |
||||
public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { |
||||
return new IntIteratorOverIteratingRLW32(i); |
||||
} |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return an iterator over the set bits corresponding to the iterator |
||||
*/ |
||||
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW32 i) { |
||||
return new Iterator<Integer>() { |
||||
@Override |
||||
public boolean hasNext() { |
||||
return this.under.hasNext(); |
||||
} |
||||
|
||||
@Override |
||||
public Integer next() { |
||||
return new Integer(this.under.next()); |
||||
} |
||||
|
||||
@Override |
||||
public void remove() { |
||||
} |
||||
|
||||
final private IntIterator under = toSetBitsIntIterator(i); |
||||
}; |
||||
|
||||
} |
||||
|
||||
/** |
||||
* Turn an iterator into a bitmap |
||||
* @param i iterator we wish to materialize |
||||
* @param c where we write |
||||
*/ |
||||
public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { |
||||
while (true) { |
||||
if (i.getRunningLength() > 0) { |
||||
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); |
||||
} |
||||
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||
c.add(i.getLiteralWordAt(k)); |
||||
if (!i.next()) |
||||
break; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* @param i iterator we wish to iterate over |
||||
* @return the cardinality (number of set bits) corresponding to the iterator |
||||
*/ |
||||
public static int cardinality(final IteratingRLW32 i) { |
||||
int answer = 0; |
||||
while (true) { |
||||
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; |
||||
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||
answer += Long.bitCount(i.getLiteralWordAt(k)); |
||||
if(!i.next()) break; |
||||
} |
||||
return answer; |
||||
} |
||||
|
||||
/** |
||||
* |
||||
* @param x set of bitmaps we wish to iterate over |
||||
* @return an array of iterators corresponding to the array of bitmaps |
||||
*/ |
||||
public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { |
||||
IteratingRLW32[] X = new IteratingRLW32[x.length]; |
||||
for (int k = 0; k < X.length; ++k) { |
||||
X[k] = new IteratingBufferedRunningLengthWord32(x[k]); |
||||
} |
||||
return X; |
||||
} |
||||
/** |
||||
* Turn an iterator into a bitmap |
||||
* |
||||
* @param i iterator we wish to materialize |
||||
* @param c where we write |
||||
* @param Max maximum number of words to materialize |
||||
* @return how many words were actually materialized |
||||
*/ |
||||
public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { |
||||
final int origMax = Max; |
||||
while (true) { |
||||
if (i.getRunningLength() > 0) { |
||||
int L = i.getRunningLength(); |
||||
if(L > Max) L = Max; |
||||
c.addStreamOfEmptyWords(i.getRunningBit(), L); |
||||
Max -= L; |
||||
} |
||||
long L = i.getNumberOfLiteralWords(); |
||||
for (int k = 0; k < L; ++k) |
||||
c.add(i.getLiteralWordAt(k)); |
||||
if(Max>0) { |
||||
if (!i.next()) |
||||
break; |
||||
} |
||||
else break; |
||||
} |
||||
return origMax - Max; |
||||
} |
||||
/** |
||||
* Turn an iterator into a bitmap |
||||
* |
||||
* @param i iterator we wish to materialize |
||||
* @return materialized version of the iterator |
||||
*/ |
||||
public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { |
||||
EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); |
||||
materialize(i, ewah); |
||||
return ewah; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,87 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
|
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
/** |
||||
* This is a BitmapStorage that can be used to determine quickly |
||||
* if the result of an operation is non-trivial... that is, whether |
||||
* there will be at least on set bit. |
||||
* |
||||
* @since 0.5.0 |
||||
* @author Daniel Lemire and Veronika Zenz |
||||
* |
||||
*/ |
||||
public class NonEmptyVirtualStorage32 implements BitmapStorage32 { |
||||
static class NonEmptyException extends RuntimeException { |
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* Do not fill in the stack trace for this exception |
||||
* for performance reasons. |
||||
* |
||||
* @return this instance |
||||
* @see Throwable#fillInStackTrace() |
||||
*/ |
||||
@Override |
||||
public synchronized Throwable fillInStackTrace() { |
||||
return this; |
||||
} |
||||
} |
||||
|
||||
private static final NonEmptyException nonEmptyException = new NonEmptyException(); |
||||
|
||||
|
||||
/** |
||||
* If the word to be added is non-zero, a NonEmptyException exception is thrown. |
||||
*/ |
||||
@Override |
||||
public void add(int newdata) { |
||||
if(newdata!=0) throw nonEmptyException; |
||||
} |
||||
|
||||
/** |
||||
* throws a NonEmptyException exception when number is greater than 0 |
||||
* |
||||
*/ |
||||
@Override |
||||
public void addStreamOfLiteralWords(int[] data, int start, int number) { |
||||
if (number > 0){ |
||||
throw nonEmptyException; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, |
||||
* otherwise, nothing happens. |
||||
* |
||||
*/ |
||||
@Override |
||||
public void addStreamOfEmptyWords(boolean v, int number) { |
||||
if(v && (number>0)) throw nonEmptyException; |
||||
} |
||||
|
||||
/** |
||||
* throws a NonEmptyException exception when number is greater than 0 |
||||
* |
||||
*/ |
||||
@Override |
||||
public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { |
||||
if (number > 0){ |
||||
throw nonEmptyException; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Does nothing. |
||||
* |
||||
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) |
||||
*/ |
||||
@Override |
||||
public void setSizeInBits(int bits) { |
||||
} |
||||
|
||||
} |
@ -0,0 +1,152 @@
|
||||
package com.fr.third.googlecode.javaewah32; |
||||
|
||||
/* |
||||
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||
* Licensed under the Apache License, Version 2.0. |
||||
*/ |
||||
|
||||
/** |
||||
* Mostly for internal use. |
||||
* |
||||
* @since 0.5.0 |
||||
* @author Daniel Lemire |
||||
*/ |
||||
public final class RunningLengthWord32 implements Cloneable { |
||||
|
||||
/** |
||||
* Instantiates a new running length word. |
||||
* |
||||
* @param a |
||||
* an array of 32-bit words |
||||
* @param p |
||||
* position in the array where the running length word is |
||||
* located. |
||||
*/ |
||||
RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { |
||||
this.parent = a; |
||||
this.position = p; |
||||
} |
||||
|
||||
/** |
||||
* Gets the number of literal words. |
||||
* |
||||
* @return the number of literal words |
||||
*/ |
||||
public int getNumberOfLiteralWords() { |
||||
return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); |
||||
} |
||||
|
||||
/** |
||||
* Gets the running bit. |
||||
* |
||||
* @return the running bit |
||||
*/ |
||||
public boolean getRunningBit() { |
||||
return (this.parent.buffer[this.position] & 1) != 0; |
||||
} |
||||
|
||||
/** |
||||
* Gets the running length. |
||||
* |
||||
* @return the running length |
||||
*/ |
||||
public int getRunningLength() { |
||||
return (this.parent.buffer[this.position] >>> 1) |
||||
& largestrunninglengthcount; |
||||
} |
||||
|
||||
/** |
||||
* Sets the number of literal words. |
||||
* |
||||
* @param number |
||||
* the new number of literal words |
||||
*/ |
||||
public void setNumberOfLiteralWords(final int number) { |
||||
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; |
||||
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) |
||||
| runninglengthplusrunningbit; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running bit. |
||||
* |
||||
* @param b |
||||
* the new running bit |
||||
*/ |
||||
public void setRunningBit(final boolean b) { |
||||
if (b) |
||||
this.parent.buffer[this.position] |= 1; |
||||
else |
||||
this.parent.buffer[this.position] &= ~1; |
||||
} |
||||
|
||||
/** |
||||
* Sets the running length. |
||||
* |
||||
* @param number |
||||
* the new running length |
||||
*/ |
||||
public void setRunningLength(final int number) { |
||||
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; |
||||
this.parent.buffer[this.position] &= (number << 1) |
||||
| notshiftedlargestrunninglengthcount; |
||||
} |
||||
|
||||
/** |
||||
* Return the size in uncompressed words represented by this running |
||||
* length word. |
||||
* |
||||
* @return the int |
||||
*/ |
||||
public int size() { |
||||
return getRunningLength() + getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
/* |
||||
* @see java.lang.Object#toString() |
||||
*/ |
||||
@Override |
||||
public String toString() { |
||||
return "running bit = " + getRunningBit() |
||||
+ " running length = " + getRunningLength() |
||||
+ " number of lit. words " + getNumberOfLiteralWords(); |
||||
} |
||||
|
||||
@Override |
||||
public RunningLengthWord32 clone() throws CloneNotSupportedException { |
||||
RunningLengthWord32 answer; |
||||
answer = (RunningLengthWord32) super.clone(); |
||||
answer.parent = this.parent; |
||||
answer.position = this.position; |
||||
return answer; |
||||
} |
||||
|
||||
/** The array of words. */ |
||||
public EWAHCompressedBitmap32 parent; |
||||
|
||||
/** The position in array. */ |
||||
public int position; |
||||
|
||||
/** |
||||
* number of bits dedicated to marking of the running length of clean |
||||
* words |
||||
*/ |
||||
public static final int runninglengthbits = 16; |
||||
|
||||
private static final int literalbits = 32 - 1 - runninglengthbits; |
||||
|
||||
/** largest number of literal words in a run. */ |
||||
public static final int largestliteralcount = (1 << literalbits) - 1; |
||||
|
||||
/** largest number of clean words in a run */ |
||||
public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; |
||||
|
||||
private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; |
||||
|
||||
private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; |
||||
|
||||
private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; |
||||
|
||||
private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; |
||||
|
||||
} |
Loading…
Reference in new issue