Browse Source
* commit '33ec2b51c4e783618d8e3b5cef1e9a7db24eaf45': REPORT-20981 删除自己写的多余类 无JIRA任务、jgit gc 需要googlecode、之前没有打包它research/11.0
neil
5 years ago
55 changed files with 9884 additions and 15 deletions
Binary file not shown.
@ -0,0 +1,106 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* BitCounter is a fake bitset data structure. Instead of storing the actual |
||||||
|
* data, it only records the number of set bits. |
||||||
|
* |
||||||
|
* @since 0.4.0 |
||||||
|
* @author David McIntosh |
||||||
|
*/ |
||||||
|
|
||||||
|
public final class BitCounter implements BitmapStorage { |
||||||
|
|
||||||
|
/** |
||||||
|
* Virtually add words directly to the bitmap |
||||||
|
* |
||||||
|
* @param newdata |
||||||
|
* the word |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void add(final long newdata) { |
||||||
|
this.oneBits += Long.bitCount(newdata); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add several literal words. |
||||||
|
* |
||||||
|
* @param data |
||||||
|
* the literal words |
||||||
|
* @param start |
||||||
|
* the starting point in the array |
||||||
|
* @param number |
||||||
|
* the number of literal words to add |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfLiteralWords(long[] data, int start, int number) { |
||||||
|
for (int i = start; i < start + number; i++) { |
||||||
|
add(data[i]); |
||||||
|
} |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add many zeroes or ones. |
||||||
|
* |
||||||
|
* @param v |
||||||
|
* zeros or ones |
||||||
|
* @param number |
||||||
|
* how many to words add |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfEmptyWords(boolean v, long number) { |
||||||
|
if (v) { |
||||||
|
this.oneBits += number * EWAHCompressedBitmap.wordinbits; |
||||||
|
} |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add several negated literal words. |
||||||
|
* |
||||||
|
* @param data |
||||||
|
* the literal words |
||||||
|
* @param start |
||||||
|
* the starting point in the array |
||||||
|
* @param number |
||||||
|
* the number of literal words to add |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { |
||||||
|
for (int i = start; i < start + number; i++) { |
||||||
|
add(~data[i]); |
||||||
|
} |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* As you act on this class, it records the number of set (true) bits. |
||||||
|
* |
||||||
|
* @return number of set bits |
||||||
|
*/ |
||||||
|
public int getCount() { |
||||||
|
return this.oneBits; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* should directly set the sizeinbits field, but is effectively ignored in |
||||||
|
* this class. |
||||||
|
* |
||||||
|
* @param bits |
||||||
|
* number of bits |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void setSizeInBits(int bits) { |
||||||
|
// no action
|
||||||
|
} |
||||||
|
|
||||||
|
private int oneBits; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,71 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Low level bitset writing methods. |
||||||
|
* |
||||||
|
* @since 0.4.0 |
||||||
|
* @author David McIntosh |
||||||
|
*/ |
||||||
|
public interface BitmapStorage { |
||||||
|
|
||||||
|
/** |
||||||
|
* Adding words directly to the bitmap (for expert use). |
||||||
|
* |
||||||
|
* This is normally how you add data to the array. So you add bits in streams |
||||||
|
* of 8*8 bits. |
||||||
|
* |
||||||
|
* @param newdata |
||||||
|
* the word |
||||||
|
*/ |
||||||
|
public void add(final long newdata); |
||||||
|
|
||||||
|
/** |
||||||
|
* if you have several literal words to copy over, this might be faster. |
||||||
|
* |
||||||
|
* @param data |
||||||
|
* the literal words |
||||||
|
* @param start |
||||||
|
* the starting point in the array |
||||||
|
* @param number |
||||||
|
* the number of literal words to add |
||||||
|
*/ |
||||||
|
public void addStreamOfLiteralWords(final long[] data, final int start, |
||||||
|
final int number); |
||||||
|
|
||||||
|
/** |
||||||
|
* For experts: You want to add many zeroes or ones? This is the method you |
||||||
|
* use. |
||||||
|
* |
||||||
|
* @param v |
||||||
|
* zeros or ones |
||||||
|
* @param number |
||||||
|
* how many to words add |
||||||
|
*/ |
||||||
|
public void addStreamOfEmptyWords(final boolean v, final long number); |
||||||
|
|
||||||
|
/** |
||||||
|
* Like "addStreamOfLiteralWords" but negates the words being added. |
||||||
|
* |
||||||
|
* @param data |
||||||
|
* the literal words |
||||||
|
* @param start |
||||||
|
* the starting point in the array |
||||||
|
* @param number |
||||||
|
* the number of literal words to add |
||||||
|
*/ |
||||||
|
public void addStreamOfNegatedLiteralWords(long[] data, final int start, |
||||||
|
final int number); |
||||||
|
|
||||||
|
/** |
||||||
|
* directly set the sizeinbits field |
||||||
|
* |
||||||
|
* @param bits |
||||||
|
* number of bits |
||||||
|
*/ |
||||||
|
public void setSizeInBits(final int bits); |
||||||
|
} |
@ -0,0 +1,151 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* This class can be used to iterate over blocks of bitmap data. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class BufferedIterator implements IteratingRLW { |
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* |
||||||
|
* @param iterator iterator |
||||||
|
*/ |
||||||
|
public BufferedIterator(final CloneableIterator<EWAHIterator> iterator) { |
||||||
|
this.masteriterator = iterator; |
||||||
|
if(this.masteriterator.hasNext()) { |
||||||
|
this.iterator = this.masteriterator.next(); |
||||||
|
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words, iterating to the next running length word if needed. |
||||||
|
* |
||||||
|
* @param x the number of words to be discarded |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void discardFirstWords(long x) { |
||||||
|
while (x > 0) { |
||||||
|
if (this.brlw.RunningLength > x) { |
||||||
|
this.brlw.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.brlw.RunningLength; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||||
|
|
||||||
|
this.literalWordStartPosition += toDiscard; |
||||||
|
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||||
|
x -= toDiscard; |
||||||
|
if ((x > 0) || (this.brlw.size() == 0)) { |
||||||
|
if (!this.next()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
/** |
||||||
|
* Move to the next RunningLengthWord |
||||||
|
* @return whether the move was possible |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
if(!reload()) { |
||||||
|
this.brlw.NumberOfLiteralWords = 0; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||||
|
return true; |
||||||
|
} |
||||||
|
private boolean reload() { |
||||||
|
if(!this.masteriterator.hasNext()) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
this.iterator = this.masteriterator.next(); |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Get the nth literal word for the current running length word |
||||||
|
* @param index zero based index |
||||||
|
* @return the literal word |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long getLiteralWordAt(int index) { |
||||||
|
return this.buffer[this.literalWordStartPosition + index]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words for the current running length word. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.brlw.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.brlw.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long getRunningLength() { |
||||||
|
return this.brlw.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words of the current running length word. |
||||||
|
* |
||||||
|
* @return the size |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long size() { |
||||||
|
return this.brlw.size(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedIterator clone() throws CloneNotSupportedException { |
||||||
|
BufferedIterator answer = (BufferedIterator) super.clone(); |
||||||
|
answer.brlw = this.brlw.clone(); |
||||||
|
answer.buffer = this.buffer; |
||||||
|
answer.iterator = this.iterator.clone(); |
||||||
|
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||||
|
answer.masteriterator = this.masteriterator.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private BufferedRunningLengthWord brlw; |
||||||
|
private long[] buffer; |
||||||
|
private int literalWordStartPosition; |
||||||
|
private EWAHIterator iterator; |
||||||
|
private CloneableIterator<EWAHIterator> masteriterator; |
||||||
|
} |
@ -0,0 +1,175 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Mostly for internal use. Similar to RunningLengthWord, but can |
||||||
|
* be modified without access to the array, and has faster access. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* @since 0.1.0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
public final class BufferedRunningLengthWord implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new buffered running length word. |
||||||
|
* |
||||||
|
* @param a the word |
||||||
|
*/ |
||||||
|
public BufferedRunningLengthWord(final long a) { |
||||||
|
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); |
||||||
|
this.RunningBit = (a & 1) != 0; |
||||||
|
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new buffered running length word. |
||||||
|
* |
||||||
|
* @param rlw the rlw |
||||||
|
*/ |
||||||
|
public BufferedRunningLengthWord(final RunningLengthWord rlw) { |
||||||
|
this(rlw.parent.buffer[rlw.position]); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words. |
||||||
|
* |
||||||
|
* @param x the x |
||||||
|
*/ |
||||||
|
public void discardFirstWords(long x) { |
||||||
|
if (this.RunningLength >= x) { |
||||||
|
this.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.RunningLength; |
||||||
|
this.RunningLength = 0; |
||||||
|
this.literalwordoffset += x; |
||||||
|
this.NumberOfLiteralWords -= x; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
public long getRunningLength() { |
||||||
|
return this.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Reset the values using the provided word. |
||||||
|
* |
||||||
|
* @param a the word |
||||||
|
*/ |
||||||
|
public void reset(final long a) { |
||||||
|
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits)); |
||||||
|
this.RunningBit = (a & 1) != 0; |
||||||
|
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount); |
||||||
|
this.literalwordoffset = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Reset the values of this running length word so that it has the same values |
||||||
|
* as the other running length word. |
||||||
|
* |
||||||
|
* @param rlw the other running length word |
||||||
|
*/ |
||||||
|
public void reset(final RunningLengthWord rlw) { |
||||||
|
reset(rlw.parent.buffer[rlw.position]); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the number of literal words. |
||||||
|
* |
||||||
|
* @param number the new number of literal words |
||||||
|
*/ |
||||||
|
public void setNumberOfLiteralWords(final int number) { |
||||||
|
this.NumberOfLiteralWords = number; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running bit. |
||||||
|
* |
||||||
|
* @param b the new running bit |
||||||
|
*/ |
||||||
|
public void setRunningBit(final boolean b) { |
||||||
|
this.RunningBit = b; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running length. |
||||||
|
* |
||||||
|
* @param number the new running length |
||||||
|
*/ |
||||||
|
public void setRunningLength(final long number) { |
||||||
|
this.RunningLength = number; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words. |
||||||
|
* |
||||||
|
* @return the long |
||||||
|
*/ |
||||||
|
public long size() { |
||||||
|
return this.RunningLength + this.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
* @see java.lang.Object#toString() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public String toString() { |
||||||
|
return "running bit = " + getRunningBit() + " running length = " |
||||||
|
+ getRunningLength() + " number of lit. words " |
||||||
|
+ getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedRunningLengthWord clone() throws CloneNotSupportedException { |
||||||
|
BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone(); |
||||||
|
answer.literalwordoffset = this.literalwordoffset; |
||||||
|
answer.NumberOfLiteralWords = this.NumberOfLiteralWords; |
||||||
|
answer.RunningBit = this.RunningBit; |
||||||
|
answer.RunningLength = this.RunningLength; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** how many literal words have we read so far? */ |
||||||
|
public int literalwordoffset = 0; |
||||||
|
|
||||||
|
/** The Number of literal words. */ |
||||||
|
public int NumberOfLiteralWords; |
||||||
|
|
||||||
|
/** The Running bit. */ |
||||||
|
public boolean RunningBit; |
||||||
|
|
||||||
|
/** The Running length. */ |
||||||
|
public long RunningLength; |
||||||
|
|
||||||
|
|
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/** |
||||||
|
* Like a standard Java iterator, except that you can clone it. |
||||||
|
* |
||||||
|
* @param <E> the data type of the iterator |
||||||
|
*/ |
||||||
|
public interface CloneableIterator<E> extends Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* @return whether there is more |
||||||
|
*/ |
||||||
|
public boolean hasNext(); |
||||||
|
/** |
||||||
|
* @return the next element |
||||||
|
*/ |
||||||
|
public E next(); |
||||||
|
/** |
||||||
|
* @return a copy |
||||||
|
* @throws CloneNotSupportedException this should never happen in practice |
||||||
|
*/ |
||||||
|
public CloneableIterator<E> clone() throws CloneNotSupportedException; |
||||||
|
|
||||||
|
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,98 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* The class EWAHIterator represents a special type of |
||||||
|
* efficient iterator iterating over (uncompressed) words of bits. |
||||||
|
* It is not meant for end users. |
||||||
|
* @author Daniel Lemire |
||||||
|
* @since 0.1.0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
public final class EWAHIterator implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new EWAH iterator. |
||||||
|
* |
||||||
|
* @param a the array of words |
||||||
|
* @param sizeinwords the number of words that are significant in the array of words |
||||||
|
*/ |
||||||
|
public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) { |
||||||
|
this.rlw = new RunningLengthWord(a, 0); |
||||||
|
this.size = sizeinwords; |
||||||
|
this.pointer = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Allow expert developers to instantiate an EWAHIterator. |
||||||
|
* |
||||||
|
* @param bitmap we want to iterate over |
||||||
|
* @return an iterator |
||||||
|
*/ |
||||||
|
public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) { |
||||||
|
return bitmap.getEWAHIterator(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Access to the array of words |
||||||
|
* |
||||||
|
* @return the long[] |
||||||
|
*/ |
||||||
|
public long[] buffer() { |
||||||
|
return this.rlw.parent.buffer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Position of the literal words represented by this running length word. |
||||||
|
* |
||||||
|
* @return the int |
||||||
|
*/ |
||||||
|
public int literalWords() { |
||||||
|
return this.pointer - this.rlw.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Checks for next. |
||||||
|
* |
||||||
|
* @return true, if successful |
||||||
|
*/ |
||||||
|
public boolean hasNext() { |
||||||
|
return this.pointer < this.size; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Next running length word. |
||||||
|
* |
||||||
|
* @return the running length word |
||||||
|
*/ |
||||||
|
public RunningLengthWord next() { |
||||||
|
this.rlw.position = this.pointer; |
||||||
|
this.pointer += this.rlw.getNumberOfLiteralWords() + 1; |
||||||
|
return this.rlw; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator clone() throws CloneNotSupportedException { |
||||||
|
EWAHIterator ans = (EWAHIterator) super.clone(); |
||||||
|
ans.rlw = this.rlw.clone(); |
||||||
|
ans.size = this.size; |
||||||
|
ans.pointer = this.pointer; |
||||||
|
return ans; |
||||||
|
} |
||||||
|
/** The pointer represent the location of the current running length |
||||||
|
* word in the array of words (embedded in the rlw attribute). */ |
||||||
|
int pointer; |
||||||
|
|
||||||
|
/** The current running length word. */ |
||||||
|
RunningLengthWord rlw; |
||||||
|
|
||||||
|
/** The size in words. */ |
||||||
|
int size; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,436 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.Comparator; |
||||||
|
import java.util.PriorityQueue; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as |
||||||
|
* reference. They may not be faster than the corresponding methods in the |
||||||
|
* EWAHCompressedBitmap class. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class FastAggregation { |
||||||
|
/** |
||||||
|
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||||
|
* @return the or aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap bufferedand(final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||||
|
bufferedandWithContainer(answer,bufsize, bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
|
||||||
|
java.util.LinkedList<IteratingBufferedRunningLengthWord> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord>(); |
||||||
|
for (EWAHCompressedBitmap bitmap : bitmaps) { |
||||||
|
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||||
|
} |
||||||
|
|
||||||
|
long[] hardbitmap = new long[bufsize*bitmaps.length]; |
||||||
|
|
||||||
|
for(IteratingRLW i : al) |
||||||
|
if (i.size() == 0) { |
||||||
|
al.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
while (!al.isEmpty()) { |
||||||
|
Arrays.fill(hardbitmap, ~0l); |
||||||
|
long effective = Integer.MAX_VALUE; |
||||||
|
for(IteratingRLW i : al) { |
||||||
|
int eff = IteratorAggregation.inplaceand(hardbitmap, i); |
||||||
|
if (eff < effective) |
||||||
|
effective = eff; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
for(IteratingRLW i : al) |
||||||
|
if (i.size() == 0) { |
||||||
|
al.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @return the or aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap bufferedor(final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||||
|
bufferedorWithContainer(answer, bufsize, bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
int range = 0; |
||||||
|
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||||
|
return b.sizeinbits - a.sizeinbits; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>(); |
||||||
|
for (EWAHCompressedBitmap bitmap : sbitmaps) { |
||||||
|
if (bitmap.sizeinbits > range) |
||||||
|
range = bitmap.sizeinbits; |
||||||
|
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||||
|
} |
||||||
|
long[] hardbitmap = new long[bufsize]; |
||||||
|
int maxr = al.size(); |
||||||
|
while (maxr > 0) { |
||||||
|
long effective = 0; |
||||||
|
for (int k = 0; k < maxr; ++k) { |
||||||
|
if (al.get(k).size() > 0) { |
||||||
|
int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k)); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
maxr = k; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
Arrays.fill(hardbitmap, 0); |
||||||
|
|
||||||
|
} |
||||||
|
container.setSizeInBits(range); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @return the xor aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap bufferedxor(final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
EWAHCompressedBitmap answer = new EWAHCompressedBitmap(); |
||||||
|
bufferedxorWithContainer(answer, bufsize,bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
int range = 0; |
||||||
|
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||||
|
return b.sizeinbits - a.sizeinbits; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>(); |
||||||
|
for (EWAHCompressedBitmap bitmap : sbitmaps) { |
||||||
|
if (bitmap.sizeinbits > range) |
||||||
|
range = bitmap.sizeinbits; |
||||||
|
al.add(new IteratingBufferedRunningLengthWord(bitmap)); |
||||||
|
} |
||||||
|
long[] hardbitmap = new long[bufsize]; |
||||||
|
int maxr = al.size(); |
||||||
|
while (maxr > 0) { |
||||||
|
long effective = 0; |
||||||
|
for (int k = 0; k < maxr; ++k) { |
||||||
|
if (al.get(k).size() > 0) { |
||||||
|
int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k)); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
maxr = k; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
Arrays.fill(hardbitmap, 0); |
||||||
|
} |
||||||
|
container.setSizeInBits(range); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the or aggregate. |
||||||
|
* @param <T> a class extending LogicalElement (like a compressed bitmap) |
||||||
|
* @param bitmaps |
||||||
|
* bitmaps to be aggregated |
||||||
|
* @return the or aggregate |
||||||
|
*/ |
||||||
|
@SuppressWarnings({ "rawtypes", "unchecked" }) |
||||||
|
public static <T extends LogicalElement> T or(T... bitmaps) { |
||||||
|
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length, |
||||||
|
new Comparator<T>() { |
||||||
|
@Override |
||||||
|
public int compare(T a, T b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (T x : bitmaps) { |
||||||
|
pq.add(x); |
||||||
|
} |
||||||
|
while (pq.size() > 1) { |
||||||
|
T x1 = pq.poll(); |
||||||
|
T x2 = pq.poll(); |
||||||
|
pq.add((T) x1.or(x2)); |
||||||
|
} |
||||||
|
return pq.poll(); |
||||||
|
} |
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the or aggregate. |
||||||
|
* @param container where we write the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
public static void orToContainer(final BitmapStorage container, |
||||||
|
final EWAHCompressedBitmap ... bitmaps) { |
||||||
|
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||||
|
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length, |
||||||
|
new Comparator<EWAHCompressedBitmap>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (EWAHCompressedBitmap x : bitmaps) { |
||||||
|
pq.add(x); |
||||||
|
} |
||||||
|
while (pq.size() > 2) { |
||||||
|
EWAHCompressedBitmap x1 = pq.poll(); |
||||||
|
EWAHCompressedBitmap x2 = pq.poll(); |
||||||
|
pq.add(x1.or(x2)); |
||||||
|
} |
||||||
|
pq.poll().orToContainer(pq.poll(), container); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the xor aggregate. |
||||||
|
* |
||||||
|
* @param <T> a class extending LogicalElement (like a compressed bitmap) |
||||||
|
* @param bitmaps |
||||||
|
* bitmaps to be aggregated |
||||||
|
* @return the xor aggregate |
||||||
|
*/ |
||||||
|
@SuppressWarnings({ "rawtypes", "unchecked" }) |
||||||
|
public static <T extends LogicalElement> T xor(T... bitmaps) { |
||||||
|
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length, |
||||||
|
new Comparator<T>() { |
||||||
|
|
||||||
|
@Override |
||||||
|
public int compare(T a, T b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (T x : bitmaps) |
||||||
|
pq.add(x); |
||||||
|
while (pq.size() > 1) { |
||||||
|
T x1 = pq.poll(); |
||||||
|
T x2 = pq.poll(); |
||||||
|
pq.add((T) x1.xor(x2)); |
||||||
|
} |
||||||
|
return pq.poll(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the xor aggregate. |
||||||
|
* @param container where we write the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
public static void xorToContainer(final BitmapStorage container, |
||||||
|
final EWAHCompressedBitmap ... bitmaps) { |
||||||
|
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||||
|
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length, |
||||||
|
new Comparator<EWAHCompressedBitmap>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (EWAHCompressedBitmap x : bitmaps) { |
||||||
|
pq.add(x); |
||||||
|
} |
||||||
|
while (pq.size() > 2) { |
||||||
|
EWAHCompressedBitmap x1 = pq.poll(); |
||||||
|
EWAHCompressedBitmap x2 = pq.poll(); |
||||||
|
pq.add(x1.xor(x2)); |
||||||
|
} |
||||||
|
pq.poll().xorToContainer(pq.poll(), container); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* For internal use. Computes the bitwise or of the provided bitmaps and |
||||||
|
* stores the result in the container. (This used to be the default.) |
||||||
|
* |
||||||
|
* @deprecated use EWAHCompressedBitmap.or instead |
||||||
|
* @since 0.4.0 |
||||||
|
* @param container where store the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
@Deprecated |
||||||
|
public static void legacy_orWithContainer(final BitmapStorage container, |
||||||
|
final EWAHCompressedBitmap... bitmaps) { |
||||||
|
if (bitmaps.length == 2) { |
||||||
|
// should be more efficient
|
||||||
|
bitmaps[0].orToContainer(bitmaps[1], container); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
|
||||||
|
// sorted bitmaps from right to left.
|
||||||
|
final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) { |
||||||
|
return a.sizeinbits < b.sizeinbits ? 1 |
||||||
|
: a.sizeinbits == b.sizeinbits ? 0 : -1; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length]; |
||||||
|
int maxAvailablePos = 0; |
||||||
|
for (EWAHCompressedBitmap bitmap : sortedBitmaps) { |
||||||
|
EWAHIterator iterator = bitmap.getEWAHIterator(); |
||||||
|
if (iterator.hasNext()) { |
||||||
|
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord( |
||||||
|
iterator); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (maxAvailablePos == 0) { // this never happens...
|
||||||
|
container.setSizeInBits(0); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
int maxSize = sortedBitmaps[0].sizeinbits; |
||||||
|
|
||||||
|
while (true) { |
||||||
|
long maxOneRl = 0; |
||||||
|
long minZeroRl = Long.MAX_VALUE; |
||||||
|
long minSize = Long.MAX_VALUE; |
||||||
|
int numEmptyRl = 0; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
long size = rlw.size(); |
||||||
|
if (size == 0) { |
||||||
|
maxAvailablePos = i; |
||||||
|
break; |
||||||
|
} |
||||||
|
minSize = Math.min(minSize, size); |
||||||
|
|
||||||
|
if (rlw.getRunningBit()) { |
||||||
|
long rl = rlw.getRunningLength(); |
||||||
|
maxOneRl = Math.max(maxOneRl, rl); |
||||||
|
minZeroRl = 0; |
||||||
|
if (rl == 0 && size > 0) { |
||||||
|
numEmptyRl++; |
||||||
|
} |
||||||
|
} else { |
||||||
|
long rl = rlw.getRunningLength(); |
||||||
|
minZeroRl = Math.min(minZeroRl, rl); |
||||||
|
if (rl == 0 && size > 0) { |
||||||
|
numEmptyRl++; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (maxAvailablePos == 0) { |
||||||
|
break; |
||||||
|
} else if (maxAvailablePos == 1) { |
||||||
|
// only one bitmap is left so just write the rest of it out
|
||||||
|
rlws[0].discharge(container); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (maxOneRl > 0) { |
||||||
|
container.addStreamOfEmptyWords(true, maxOneRl); |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(maxOneRl); |
||||||
|
} |
||||||
|
} else if (minZeroRl > 0) { |
||||||
|
container.addStreamOfEmptyWords(false, minZeroRl); |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(minZeroRl); |
||||||
|
} |
||||||
|
} else { |
||||||
|
int index = 0; |
||||||
|
|
||||||
|
if (numEmptyRl == 1) { |
||||||
|
// if one rlw has literal words to process and the rest have a run of
|
||||||
|
// 0's we can write them out here
|
||||||
|
IteratingBufferedRunningLengthWord emptyRl = null; |
||||||
|
long minNonEmptyRl = Long.MAX_VALUE; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
long rl = rlw.getRunningLength(); |
||||||
|
if (rl == 0) { |
||||||
|
assert emptyRl == null; |
||||||
|
emptyRl = rlw; |
||||||
|
} else { |
||||||
|
minNonEmptyRl = Math.min(minNonEmptyRl, rl); |
||||||
|
} |
||||||
|
} |
||||||
|
long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; |
||||||
|
if (emptyRl != null) |
||||||
|
emptyRl.writeLiteralWords((int) wordsToWrite, container); |
||||||
|
index += wordsToWrite; |
||||||
|
} |
||||||
|
|
||||||
|
while (index < minSize) { |
||||||
|
long word = 0; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
if (rlw.getRunningLength() <= index) { |
||||||
|
word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
container.add(word); |
||||||
|
index++; |
||||||
|
} |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(minSize); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
container.setSizeInBits(maxSize); |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,31 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* |
||||||
|
* The IntIterator interface is used to iterate over a stream of integers. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* @since 0.2.0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
public interface IntIterator { |
||||||
|
|
||||||
|
/** |
||||||
|
* Is there more? |
||||||
|
* |
||||||
|
* @return true, if there is more, false otherwise |
||||||
|
*/ |
||||||
|
public boolean hasNext(); |
||||||
|
|
||||||
|
/** |
||||||
|
* Return the next integer |
||||||
|
* |
||||||
|
* @return the integer |
||||||
|
*/ |
||||||
|
public int next(); |
||||||
|
} |
@ -0,0 +1,87 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2012, Google Inc. |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||||
|
|
||||||
|
/** |
||||||
|
* The IntIteratorImpl is the 64 bit implementation of the |
||||||
|
* IntIterator interface, which efficiently returns the stream of integers |
||||||
|
* represented by an EWAHIterator. |
||||||
|
* |
||||||
|
* @author Colby Ranger |
||||||
|
* @since 0.5.6 |
||||||
|
*/ |
||||||
|
final class IntIteratorImpl implements IntIterator { |
||||||
|
|
||||||
|
private final EWAHIterator ewahIter; |
||||||
|
private final long[] ewahBuffer; |
||||||
|
private int position; |
||||||
|
private int runningLength; |
||||||
|
private long word; |
||||||
|
private int wordPosition; |
||||||
|
private int wordLength; |
||||||
|
private int literalPosition; |
||||||
|
private boolean hasnext; |
||||||
|
|
||||||
|
IntIteratorImpl(EWAHIterator ewahIter) { |
||||||
|
this.ewahIter = ewahIter; |
||||||
|
this.ewahBuffer = ewahIter.buffer(); |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
} |
||||||
|
|
||||||
|
public final boolean moveToNext() { |
||||||
|
while (!runningHasNext() && !literalHasNext()) { |
||||||
|
if (!this.ewahIter.hasNext()) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
setRunningLengthWord(this.ewahIter.next()); |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return this.hasnext; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public final int next() { |
||||||
|
final int answer; |
||||||
|
if (runningHasNext()) { |
||||||
|
answer = this.position++; |
||||||
|
} else { |
||||||
|
final int bit = Long.numberOfTrailingZeros(this.word); |
||||||
|
this.word ^= (1l << bit); |
||||||
|
answer = this.literalPosition + bit; |
||||||
|
} |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private final void setRunningLengthWord(RunningLengthWord rlw) { |
||||||
|
this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position; |
||||||
|
if (!rlw.getRunningBit()) { |
||||||
|
this.position = this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
this.wordPosition = this.ewahIter.literalWords(); |
||||||
|
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean runningHasNext() { |
||||||
|
return this.position < this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean literalHasNext() { |
||||||
|
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||||
|
this.word = this.ewahBuffer[this.wordPosition++]; |
||||||
|
this.literalPosition = this.position; |
||||||
|
this.position += wordinbits; |
||||||
|
} |
||||||
|
return this.word != 0; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,89 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Implementation of an IntIterator over an IteratingRLW. |
||||||
|
* |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IntIteratorOverIteratingRLW implements IntIterator { |
||||||
|
IteratingRLW parent; |
||||||
|
private int position; |
||||||
|
private int runningLength; |
||||||
|
private long word; |
||||||
|
private int wordPosition; |
||||||
|
private int wordLength; |
||||||
|
private int literalPosition; |
||||||
|
private boolean hasnext; |
||||||
|
|
||||||
|
/** |
||||||
|
* @param p iterator we wish to iterate over |
||||||
|
*/ |
||||||
|
public IntIteratorOverIteratingRLW(final IteratingRLW p) { |
||||||
|
this.parent = p; |
||||||
|
this.position = 0; |
||||||
|
setupForCurrentRunningLengthWord(); |
||||||
|
this.hasnext = moveToNext(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return whether we could find another set bit; don't move if there is an unprocessed value |
||||||
|
*/ |
||||||
|
private final boolean moveToNext() { |
||||||
|
while (!runningHasNext() && !literalHasNext()) { |
||||||
|
if (this.parent.next()) |
||||||
|
setupForCurrentRunningLengthWord(); |
||||||
|
else return false; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return this.hasnext; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public final int next() { |
||||||
|
final int answer; |
||||||
|
if (runningHasNext()) { |
||||||
|
answer = this.position++; |
||||||
|
} else { |
||||||
|
final int bit = Long.numberOfTrailingZeros(this.word); |
||||||
|
this.word ^= (1l << bit); |
||||||
|
answer = this.literalPosition + bit; |
||||||
|
} |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private final void setupForCurrentRunningLengthWord() { |
||||||
|
this.runningLength = wordinbits * (int) this.parent.getRunningLength() |
||||||
|
+ this.position; |
||||||
|
|
||||||
|
if (!this.parent.getRunningBit()) { |
||||||
|
this.position = this.runningLength; |
||||||
|
} |
||||||
|
this.wordPosition = 0; |
||||||
|
this.wordLength = this.parent.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean runningHasNext() { |
||||||
|
return this.position < this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean literalHasNext() { |
||||||
|
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||||
|
this.word = this.parent.getLiteralWordAt(this.wordPosition++); |
||||||
|
this.literalPosition = this.position; |
||||||
|
this.position += wordinbits; |
||||||
|
} |
||||||
|
return this.word != 0; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,276 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically |
||||||
|
* advances to the next BufferedRunningLengthWord as words are discarded. |
||||||
|
* |
||||||
|
* @since 0.4.0 |
||||||
|
* @author David McIntosh |
||||||
|
*/ |
||||||
|
public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{ |
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* |
||||||
|
* @param iterator iterator |
||||||
|
*/ |
||||||
|
public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) { |
||||||
|
this.iterator = iterator; |
||||||
|
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* @param bitmap over which we want to iterate |
||||||
|
* |
||||||
|
*/ |
||||||
|
public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) { |
||||||
|
this.iterator = EWAHIterator.getEWAHIterator(bitmap); |
||||||
|
this.brlw = new BufferedRunningLengthWord(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words, iterating to the next running length word if needed. |
||||||
|
* |
||||||
|
* @param x the number of words to be discarded |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void discardFirstWords(long x) { |
||||||
|
while (x > 0) { |
||||||
|
if (this.brlw.RunningLength > x) { |
||||||
|
this.brlw.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.brlw.RunningLength; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||||
|
|
||||||
|
this.literalWordStartPosition += toDiscard; |
||||||
|
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||||
|
x -= toDiscard; |
||||||
|
if ((x > 0) || (this.brlw.size() == 0)) { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
/** |
||||||
|
* Move to the next RunningLengthWord |
||||||
|
* @return whether the move was possible |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
this.brlw.NumberOfLiteralWords = 0; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
return false; |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
public long discharge(BitmapStorage container, long max) { |
||||||
|
long index = 0; |
||||||
|
while ((index < max) && (size() > 0)) { |
||||||
|
// first run
|
||||||
|
long pl = getRunningLength(); |
||||||
|
if (index + pl > max) { |
||||||
|
pl = max - index; |
||||||
|
} |
||||||
|
container.addStreamOfEmptyWords(getRunningBit(), pl); |
||||||
|
index += pl; |
||||||
|
int pd = getNumberOfLiteralWords(); |
||||||
|
if (pd + index > max) { |
||||||
|
pd = (int) (max - index); |
||||||
|
} |
||||||
|
writeLiteralWords(pd, container); |
||||||
|
discardFirstWords(pl+pd); |
||||||
|
index += pd; |
||||||
|
} |
||||||
|
return index; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max words (negated), returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
public long dischargeNegated(BitmapStorage container, long max) { |
||||||
|
long index = 0; |
||||||
|
while ((index < max) && (size() > 0)) { |
||||||
|
// first run
|
||||||
|
long pl = getRunningLength(); |
||||||
|
if (index + pl > max) { |
||||||
|
pl = max - index; |
||||||
|
} |
||||||
|
container.addStreamOfEmptyWords(!getRunningBit(), pl); |
||||||
|
index += pl; |
||||||
|
int pd = getNumberOfLiteralWords(); |
||||||
|
if (pd + index > max) { |
||||||
|
pd = (int) (max - index); |
||||||
|
} |
||||||
|
writeNegatedLiteralWords(pd, container); |
||||||
|
discardFirstWords(pl+pd); |
||||||
|
index += pd; |
||||||
|
} |
||||||
|
return index; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Write out the remain words, transforming them to zeroes. |
||||||
|
* @param container target for writes |
||||||
|
*/ |
||||||
|
public void dischargeAsEmpty(BitmapStorage container) { |
||||||
|
while(size()>0) { |
||||||
|
container.addStreamOfEmptyWords(false, size()); |
||||||
|
discardFirstWords(size()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Write out the remaining words |
||||||
|
* @param container target for writes |
||||||
|
*/ |
||||||
|
public void discharge(BitmapStorage container) { |
||||||
|
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); |
||||||
|
discharge(this.brlw, this.iterator, container); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get the nth literal word for the current running length word |
||||||
|
* @param index zero based index |
||||||
|
* @return the literal word |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long getLiteralWordAt(int index) { |
||||||
|
return this.buffer[this.literalWordStartPosition + index]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words for the current running length word. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.brlw.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.brlw.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long getRunningLength() { |
||||||
|
return this.brlw.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words of the current running length word. |
||||||
|
* |
||||||
|
* @return the long |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public long size() { |
||||||
|
return this.brlw.size(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration. |
||||||
|
* @param numWords number of words to be written |
||||||
|
* @param container where we write |
||||||
|
*/ |
||||||
|
public void writeLiteralWords(int numWords, BitmapStorage container) { |
||||||
|
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. |
||||||
|
* @param numWords number of words to be written |
||||||
|
* @param container where we write |
||||||
|
*/ |
||||||
|
public void writeNegatedLiteralWords(int numWords, BitmapStorage container) { |
||||||
|
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* For internal use. (One could use the non-static discharge method instead, |
||||||
|
* but we expect them to be slower.) |
||||||
|
* |
||||||
|
* @param initialWord |
||||||
|
* the initial word |
||||||
|
* @param iterator |
||||||
|
* the iterator |
||||||
|
* @param container |
||||||
|
* the container |
||||||
|
*/ |
||||||
|
private static void discharge(final BufferedRunningLengthWord initialWord, |
||||||
|
final EWAHIterator iterator, final BitmapStorage container) { |
||||||
|
BufferedRunningLengthWord runningLengthWord = initialWord; |
||||||
|
for (;;) { |
||||||
|
final long runningLength = runningLengthWord.getRunningLength(); |
||||||
|
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), |
||||||
|
runningLength); |
||||||
|
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() |
||||||
|
+ runningLengthWord.literalwordoffset, |
||||||
|
runningLengthWord.getNumberOfLiteralWords()); |
||||||
|
if (!iterator.hasNext()) |
||||||
|
break; |
||||||
|
runningLengthWord = new BufferedRunningLengthWord(iterator.next()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
@Override |
||||||
|
public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException { |
||||||
|
IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone(); |
||||||
|
answer.brlw = this.brlw.clone(); |
||||||
|
answer.buffer = this.buffer; |
||||||
|
answer.iterator = this.iterator.clone(); |
||||||
|
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
private BufferedRunningLengthWord brlw; |
||||||
|
private long[] buffer; |
||||||
|
private int literalWordStartPosition; |
||||||
|
private EWAHIterator iterator; |
||||||
|
} |
@ -0,0 +1,49 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* High-level iterator over a compressed bitmap. |
||||||
|
* |
||||||
|
*/ |
||||||
|
public interface IteratingRLW { |
||||||
|
/** |
||||||
|
* @return whether there is more |
||||||
|
*/ |
||||||
|
public boolean next() ; |
||||||
|
/** |
||||||
|
* @param index where the literal word is |
||||||
|
* @return the literal word at the given index. |
||||||
|
*/ |
||||||
|
public long getLiteralWordAt(int index); |
||||||
|
/** |
||||||
|
* @return the number of literal (non-fill) words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() ; |
||||||
|
/** |
||||||
|
* @return the bit used for the fill bits |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() ; |
||||||
|
/** |
||||||
|
* @return sum of getRunningLength() and getNumberOfLiteralWords() |
||||||
|
*/ |
||||||
|
public long size() ; |
||||||
|
/** |
||||||
|
* @return length of the run of fill words |
||||||
|
*/ |
||||||
|
public long getRunningLength() ; |
||||||
|
/** |
||||||
|
* @param x the number of words to discard |
||||||
|
*/ |
||||||
|
public void discardFirstWords(long x); |
||||||
|
|
||||||
|
/** |
||||||
|
* @return a copy of the iterator |
||||||
|
* @throws CloneNotSupportedException this should not be thrown in theory |
||||||
|
*/ |
||||||
|
public IteratingRLW clone() throws CloneNotSupportedException; |
||||||
|
} |
@ -0,0 +1,616 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.Iterator; |
||||||
|
import java.util.LinkedList; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Set of helper functions to aggregate bitmaps. |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IteratorAggregation { |
||||||
|
|
||||||
|
/** |
||||||
|
* @param x iterator to negate |
||||||
|
* @return negated version of the iterator |
||||||
|
*/ |
||||||
|
public static IteratingRLW not(final IteratingRLW x) { |
||||||
|
return new IteratingRLW() { |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
return x.next(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public long getLiteralWordAt(int index) { |
||||||
|
return ~x.getLiteralWordAt(index); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return x.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return ! x.getRunningBit(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public long size() { |
||||||
|
return x.size(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public long getRunningLength() { |
||||||
|
return x.getRunningLength(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void discardFirstWords(long y) { |
||||||
|
x.discardFirstWords(y); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public IteratingRLW clone() throws CloneNotSupportedException { |
||||||
|
throw new CloneNotSupportedException(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al set of iterators to aggregate |
||||||
|
* @return and aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedand(final IteratingRLW... al) { |
||||||
|
return bufferedand(DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al set of iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator) |
||||||
|
* @return and aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||||
|
for (IteratingRLW i : al) |
||||||
|
basell.add(i); |
||||||
|
return new BufferedIterator(new BufferedAndIterator(basell,bufsize)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al set of iterators to aggregate |
||||||
|
* @return or aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedor(final IteratingRLW... al) { |
||||||
|
return bufferedor(DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||||
|
* @return or aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
|
||||||
|
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||||
|
for (IteratingRLW i : al) |
||||||
|
basell.add(i); |
||||||
|
return new BufferedIterator(new BufferedORIterator(basell,bufsize)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al set of iterators to aggregate |
||||||
|
* @return xor aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedxor(final IteratingRLW... al) { |
||||||
|
return bufferedxor(DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||||
|
* @return xor aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
|
||||||
|
|
||||||
|
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>(); |
||||||
|
for (IteratingRLW i : al) |
||||||
|
basell.add(i); |
||||||
|
|
||||||
|
return new BufferedIterator(new BufferedXORIterator(basell, bufsize)); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Write out the content of the iterator, but as if it were all zeros. |
||||||
|
* |
||||||
|
* @param container |
||||||
|
* where we write |
||||||
|
* @param i |
||||||
|
* the iterator |
||||||
|
*/ |
||||||
|
protected static void dischargeAsEmpty(final BitmapStorage container, |
||||||
|
final IteratingRLW i) { |
||||||
|
while (i.size() > 0) { |
||||||
|
container.addStreamOfEmptyWords(false, i.size()); |
||||||
|
i.next(); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param i source of data |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
|
||||||
|
protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) { |
||||||
|
long counter = 0; |
||||||
|
while (i.size() > 0 && counter < max) { |
||||||
|
long L1 = i.getRunningLength(); |
||||||
|
if (L1 > 0) { |
||||||
|
if (L1 + counter > max) |
||||||
|
L1 = max - counter; |
||||||
|
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||||
|
counter += L1; |
||||||
|
} |
||||||
|
long L = i.getNumberOfLiteralWords(); |
||||||
|
if(L + counter > max) L = max - counter; |
||||||
|
for (int k = 0; k < L; ++k) { |
||||||
|
container.add(i.getLiteralWordAt(k)); |
||||||
|
} |
||||||
|
counter += L; |
||||||
|
i.discardFirstWords(L+L1); |
||||||
|
} |
||||||
|
return counter; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max negated words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param i source of data |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) { |
||||||
|
long counter = 0; |
||||||
|
while (i.size() > 0 && counter < max) { |
||||||
|
long L1 = i.getRunningLength(); |
||||||
|
if (L1 > 0) { |
||||||
|
if (L1 + counter > max) |
||||||
|
L1 = max - counter; |
||||||
|
container.addStreamOfEmptyWords(!i.getRunningBit(), L1); |
||||||
|
counter += L1; |
||||||
|
} |
||||||
|
long L = i.getNumberOfLiteralWords(); |
||||||
|
if(L + counter > max) L = max - counter; |
||||||
|
for (int k = 0; k < L; ++k) { |
||||||
|
container.add(~i.getLiteralWordAt(k)); |
||||||
|
} |
||||||
|
counter += L; |
||||||
|
i.discardFirstWords(L+L1); |
||||||
|
} |
||||||
|
return counter; |
||||||
|
} |
||||||
|
|
||||||
|
static void andToContainer(final BitmapStorage container, |
||||||
|
int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||||
|
prey.discardFirstWords(predator.getRunningLength()); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
final long index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
desiredrlwcount -= nbre_literal; |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void andToContainer(final BitmapStorage container, |
||||||
|
final IteratingRLW rlwi, IteratingRLW rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||||
|
prey.discardFirstWords(predator.getRunningLength()); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
final long index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the first few words of the XOR aggregate between two iterators. |
||||||
|
* |
||||||
|
* @param container where to write |
||||||
|
* @param desiredrlwcount number of words to be written (max) |
||||||
|
* @param rlwi first iterator to aggregate |
||||||
|
* @param rlwj second iterator to aggregate |
||||||
|
*/ |
||||||
|
public static void xorToContainer(final BitmapStorage container, |
||||||
|
int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
long index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
long index = dischargeNegated(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(true, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
desiredrlwcount -= nbre_literal; |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
protected static int inplaceor(long[] bitmap, |
||||||
|
IteratingRLW i) { |
||||||
|
|
||||||
|
int pos = 0; |
||||||
|
long s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = (int) i.getRunningLength(); |
||||||
|
if (i.getRunningBit()) |
||||||
|
Arrays.fill(bitmap, pos, pos + L, ~0l); |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
|
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = (int) i.getRunningLength(); |
||||||
|
|
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (i.getRunningBit()) { |
||||||
|
Arrays.fill(bitmap, pos, bitmap.length, ~0l); |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (i.getRunningBit()) |
||||||
|
Arrays.fill(bitmap, pos, pos + L, ~0l); |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
|
||||||
|
protected static int inplacexor(long[] bitmap, |
||||||
|
IteratingRLW i) { |
||||||
|
int pos = 0; |
||||||
|
long s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = (int) i.getRunningLength(); |
||||||
|
if (i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
} |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = (int) i.getRunningLength(); |
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < bitmap.length; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (i.getRunningBit()) |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
protected static int inplaceand(long[] bitmap, |
||||||
|
IteratingRLW i) { |
||||||
|
int pos = 0; |
||||||
|
long s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = (int) i.getRunningLength(); |
||||||
|
if (!i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
} |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = (int) i.getRunningLength(); |
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (!i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < bitmap.length; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (!i.getRunningBit()) |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* An optimization option. Larger values may improve speed, but at |
||||||
|
* the expense of memory. |
||||||
|
*/ |
||||||
|
public final static int DEFAULTMAXBUFSIZE = 65536; |
||||||
|
} |
||||||
|
class BufferedORIterator implements CloneableIterator<EWAHIterator> { |
||||||
|
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||||
|
long[] hardbitmap; |
||||||
|
LinkedList<IteratingRLW> ll; |
||||||
|
int buffersize; |
||||||
|
|
||||||
|
BufferedORIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.hardbitmap = new long[bufsize]; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedXORIterator clone() throws CloneNotSupportedException { |
||||||
|
BufferedXORIterator answer = (BufferedXORIterator) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.hardbitmap = this.hardbitmap.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator next() { |
||||||
|
this.buffer.clear(); |
||||||
|
long effective = 0; |
||||||
|
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||||
|
while (i.hasNext()) { |
||||||
|
IteratingRLW rlw = i.next(); |
||||||
|
if (rlw.size() > 0) { |
||||||
|
int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
i.remove(); |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) { |
||||||
|
this.buffer.add(this.hardbitmap[k]); |
||||||
|
} |
||||||
|
|
||||||
|
Arrays.fill(this.hardbitmap, 0); |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
class BufferedXORIterator implements CloneableIterator<EWAHIterator> { |
||||||
|
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||||
|
long[] hardbitmap; |
||||||
|
LinkedList<IteratingRLW> ll; |
||||||
|
int buffersize; |
||||||
|
|
||||||
|
BufferedXORIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.hardbitmap = new long[bufsize]; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedXORIterator clone() throws CloneNotSupportedException { |
||||||
|
BufferedXORIterator answer = (BufferedXORIterator) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.hardbitmap = this.hardbitmap.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator next() { |
||||||
|
this.buffer.clear(); |
||||||
|
long effective = 0; |
||||||
|
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||||
|
while (i.hasNext()) { |
||||||
|
IteratingRLW rlw = i.next(); |
||||||
|
if (rlw.size() > 0) { |
||||||
|
int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
i.remove(); |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
this.buffer.add(this.hardbitmap[k]); |
||||||
|
Arrays.fill(this.hardbitmap, 0); |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
class BufferedAndIterator implements CloneableIterator<EWAHIterator> { |
||||||
|
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap(); |
||||||
|
LinkedList<IteratingRLW> ll; |
||||||
|
int buffersize; |
||||||
|
|
||||||
|
public BufferedAndIterator(LinkedList<IteratingRLW> basell, int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.buffersize = bufsize; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedAndIterator clone() throws CloneNotSupportedException { |
||||||
|
BufferedAndIterator answer = (BufferedAndIterator) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator next() { |
||||||
|
this.buffer.clear(); |
||||||
|
IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(), |
||||||
|
this.ll.get(0), this.ll.get(1)); |
||||||
|
if (this.ll.size() > 2) { |
||||||
|
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||||
|
i.next(); |
||||||
|
i.next(); |
||||||
|
EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap(); |
||||||
|
while (i.hasNext() && this.buffer.sizeInBytes() > 0) { |
||||||
|
IteratorAggregation.andToContainer(tmpbuffer, |
||||||
|
this.buffer.getIteratingRLW(), i.next()); |
||||||
|
this.buffer.swap(tmpbuffer); |
||||||
|
tmpbuffer.clear(); |
||||||
|
} |
||||||
|
} |
||||||
|
Iterator<IteratingRLW> i = this.ll.iterator(); |
||||||
|
while(i.hasNext()) { |
||||||
|
if(i.next().size() == 0) { |
||||||
|
this.ll.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,132 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
import java.util.Iterator; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Convenience functions for working over iterators |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IteratorUtil { |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return an iterator over the set bits corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static IntIterator toSetBitsIntIterator(final IteratingRLW i) { |
||||||
|
return new IntIteratorOverIteratingRLW(i); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return an iterator over the set bits corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW i) { |
||||||
|
return new Iterator<Integer>() { |
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return this.under.hasNext(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public Integer next() { |
||||||
|
return new Integer(this.under.next()); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void remove() { |
||||||
|
} |
||||||
|
|
||||||
|
final private IntIterator under = toSetBitsIntIterator(i); |
||||||
|
}; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Generate a bitmap from an iterator |
||||||
|
* |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @param c where we write |
||||||
|
*/ |
||||||
|
public static void materialize(final IteratingRLW i, final BitmapStorage c) { |
||||||
|
while (true) { |
||||||
|
if (i.getRunningLength() > 0) { |
||||||
|
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); |
||||||
|
} |
||||||
|
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||||
|
c.add(i.getLiteralWordAt(k)); |
||||||
|
if (!i.next()) |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return the cardinality (number of set bits) corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static int cardinality(final IteratingRLW i) { |
||||||
|
int answer = 0; |
||||||
|
while (true) { |
||||||
|
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits; |
||||||
|
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||||
|
answer += Long.bitCount(i.getLiteralWordAt(k)); |
||||||
|
if(!i.next()) break; |
||||||
|
} |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param x set of bitmaps |
||||||
|
* @return an array of iterators corresponding to the array of bitmaps |
||||||
|
*/ |
||||||
|
public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) { |
||||||
|
IteratingRLW[] X = new IteratingRLW[x.length]; |
||||||
|
for (int k = 0; k < X.length; ++k) { |
||||||
|
X[k] = new IteratingBufferedRunningLengthWord(x[k]); |
||||||
|
} |
||||||
|
return X; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Turn an iterator into a bitmap. |
||||||
|
* |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @param c where we write |
||||||
|
* @param Max maximum number of words we wish to materialize |
||||||
|
* @return how many words were actually materialized |
||||||
|
*/ |
||||||
|
public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) { |
||||||
|
final long origMax = Max; |
||||||
|
while (true) { |
||||||
|
if (i.getRunningLength() > 0) { |
||||||
|
long L = i.getRunningLength(); |
||||||
|
if(L > Max) L = Max; |
||||||
|
c.addStreamOfEmptyWords(i.getRunningBit(), L); |
||||||
|
Max -= L; |
||||||
|
} |
||||||
|
long L = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < L; ++k) |
||||||
|
c.add(i.getLiteralWordAt(k)); |
||||||
|
if(Max>0) { |
||||||
|
if (!i.next()) |
||||||
|
break; |
||||||
|
} |
||||||
|
else break; |
||||||
|
} |
||||||
|
return origMax - Max; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Turn an iterator into a bitmap |
||||||
|
* |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @return materialized version of the iterator |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap materialize(final IteratingRLW i) { |
||||||
|
EWAHCompressedBitmap ewah = new EWAHCompressedBitmap(); |
||||||
|
materialize(i, ewah); |
||||||
|
return ewah; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,61 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/** |
||||||
|
* A prototypical model for bitmaps. Used by the |
||||||
|
* class FastAggregation. Users should probably not |
||||||
|
* be concerned by this class. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* @param <T> the type of element (e.g., a bitmap class) |
||||||
|
* |
||||||
|
*/ |
||||||
|
public interface LogicalElement<T> { |
||||||
|
/** |
||||||
|
* Compute the bitwise logical and |
||||||
|
* @param le element |
||||||
|
* @return the result of the operation |
||||||
|
*/ |
||||||
|
public T and(T le); |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the bitwise logical and not |
||||||
|
* @param le element |
||||||
|
* @return the result of the operation |
||||||
|
*/ |
||||||
|
public T andNot(T le); |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the bitwise logical not (in place) |
||||||
|
*/ |
||||||
|
public void not(); |
||||||
|
|
||||||
|
|
||||||
|
@SuppressWarnings({ "rawtypes", "javadoc" }) |
||||||
|
/** |
||||||
|
* Compute the bitwise logical or |
||||||
|
* @param le another element |
||||||
|
* @return the result of the operation |
||||||
|
*/ |
||||||
|
public LogicalElement or(T le); |
||||||
|
|
||||||
|
/** |
||||||
|
* How many logical bits does this element represent? |
||||||
|
* |
||||||
|
* @return the number of bits represented by this element |
||||||
|
*/ |
||||||
|
public int sizeInBits(); |
||||||
|
|
||||||
|
/** |
||||||
|
* Should report the storage requirement |
||||||
|
* @return How many bytes |
||||||
|
* @since 0.6.2 |
||||||
|
*/ |
||||||
|
public int sizeInBytes(); |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the bitwise logical Xor |
||||||
|
* @param le element |
||||||
|
* @return the results of the operation |
||||||
|
*/ |
||||||
|
public T xor(T le); |
||||||
|
} |
@ -0,0 +1,92 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* This is a BitmapStorage that can be used to determine quickly if the result |
||||||
|
* of an operation is non-trivial... that is, whether there will be at least on |
||||||
|
* set bit. |
||||||
|
* |
||||||
|
* @since 0.4.2 |
||||||
|
* @author Daniel Lemire and Veronika Zenz |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class NonEmptyVirtualStorage implements BitmapStorage { |
||||||
|
static class NonEmptyException extends RuntimeException { |
||||||
|
private static final long serialVersionUID = 1L; |
||||||
|
|
||||||
|
/** |
||||||
|
* Do not fill in the stack trace for this exception |
||||||
|
* for performance reasons. |
||||||
|
* |
||||||
|
* @return this instance |
||||||
|
* @see Throwable#fillInStackTrace() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public synchronized Throwable fillInStackTrace() { |
||||||
|
return this; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
private static final NonEmptyException nonEmptyException = new NonEmptyException(); |
||||||
|
|
||||||
|
/** |
||||||
|
* If the word to be added is non-zero, a NonEmptyException exception is |
||||||
|
* thrown. |
||||||
|
* |
||||||
|
* @see com.googlecode.javaewah.BitmapStorage#add(long) |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void add(long newdata) { |
||||||
|
if (newdata != 0) |
||||||
|
throw nonEmptyException; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* throws a NonEmptyException exception when number is greater than 0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfLiteralWords(long[] data, int start, int number) { |
||||||
|
if(number>0){ |
||||||
|
throw nonEmptyException; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, |
||||||
|
* otherwise, nothing happens. |
||||||
|
* |
||||||
|
* @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long) |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfEmptyWords(boolean v, long number) { |
||||||
|
if (v && (number>0)) |
||||||
|
throw nonEmptyException; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* throws a NonEmptyException exception when number is greater than 0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) { |
||||||
|
if(number>0){ |
||||||
|
throw nonEmptyException; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Does nothing. |
||||||
|
* |
||||||
|
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void setSizeInBits(int bits) { |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,152 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Mostly for internal use. |
||||||
|
* |
||||||
|
* @since 0.1.0 |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public final class RunningLengthWord implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new running length word. |
||||||
|
* |
||||||
|
* @param a |
||||||
|
* an array of 64-bit words |
||||||
|
* @param p |
||||||
|
* position in the array where the running length word is |
||||||
|
* located. |
||||||
|
*/ |
||||||
|
RunningLengthWord(final EWAHCompressedBitmap a, final int p) { |
||||||
|
this.parent = a; |
||||||
|
this.position = p; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() { |
||||||
|
return (this.parent.buffer[this.position] & 1) != 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
public long getRunningLength() { |
||||||
|
return (this.parent.buffer[this.position] >>> 1) |
||||||
|
& largestrunninglengthcount; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the number of literal words. |
||||||
|
* |
||||||
|
* @param number |
||||||
|
* the new number of literal words |
||||||
|
*/ |
||||||
|
public void setNumberOfLiteralWords(final long number) { |
||||||
|
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; |
||||||
|
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) |
||||||
|
| runninglengthplusrunningbit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running bit. |
||||||
|
* |
||||||
|
* @param b |
||||||
|
* the new running bit |
||||||
|
*/ |
||||||
|
public void setRunningBit(final boolean b) { |
||||||
|
if (b) |
||||||
|
this.parent.buffer[this.position] |= 1l; |
||||||
|
else |
||||||
|
this.parent.buffer[this.position] &= ~1l; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running length. |
||||||
|
* |
||||||
|
* @param number |
||||||
|
* the new running length |
||||||
|
*/ |
||||||
|
public void setRunningLength(final long number) { |
||||||
|
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; |
||||||
|
this.parent.buffer[this.position] &= (number << 1) |
||||||
|
| notshiftedlargestrunninglengthcount; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Return the size in uncompressed words represented by this running |
||||||
|
* length word. |
||||||
|
* |
||||||
|
* @return the size |
||||||
|
*/ |
||||||
|
public long size() { |
||||||
|
return getRunningLength() + getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
* @see java.lang.Object#toString() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public String toString() { |
||||||
|
return "running bit = " + getRunningBit() |
||||||
|
+ " running length = " + getRunningLength() |
||||||
|
+ " number of lit. words " + getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public RunningLengthWord clone() throws CloneNotSupportedException { |
||||||
|
RunningLengthWord answer; |
||||||
|
answer = (RunningLengthWord) super.clone(); |
||||||
|
answer.parent = this.parent; |
||||||
|
answer.position = this.position; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** The array of words. */ |
||||||
|
public EWAHCompressedBitmap parent; |
||||||
|
|
||||||
|
/** The position in array. */ |
||||||
|
public int position; |
||||||
|
|
||||||
|
/** |
||||||
|
* number of bits dedicated to marking of the running length of clean |
||||||
|
* words |
||||||
|
*/ |
||||||
|
public static final int runninglengthbits = 32; |
||||||
|
|
||||||
|
private static final int literalbits = 64 - 1 - runninglengthbits; |
||||||
|
|
||||||
|
/** largest number of literal words in a run. */ |
||||||
|
public static final int largestliteralcount = (1 << literalbits) - 1; |
||||||
|
|
||||||
|
/** largest number of clean words in a run */ |
||||||
|
public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1; |
||||||
|
|
||||||
|
private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1; |
||||||
|
|
||||||
|
private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; |
||||||
|
|
||||||
|
private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; |
||||||
|
|
||||||
|
private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,284 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.List; |
||||||
|
import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap; |
||||||
|
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||||
|
import com.fr.third.googlecode.javaewah.IntIterator; |
||||||
|
import com.fr.third.googlecode.javaewah.IteratingRLW; |
||||||
|
import com.fr.third.googlecode.javaewah.IteratorAggregation; |
||||||
|
import com.fr.third.googlecode.javaewah.IteratorUtil; |
||||||
|
|
||||||
|
/** |
||||||
|
* This class is used to benchmark the performance EWAH. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public class Benchmark { |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the union between two sorted arrays |
||||||
|
* @param set1 first sorted array |
||||||
|
* @param set2 second sorted array |
||||||
|
* @return merged array |
||||||
|
*/ |
||||||
|
static public int[] unite2by2(final int[] set1, final int[] set2) { |
||||||
|
int pos = 0; |
||||||
|
int k1 = 0, k2 = 0; |
||||||
|
if (0 == set1.length) |
||||||
|
return Arrays.copyOf(set2, set2.length); |
||||||
|
if (0 == set2.length) |
||||||
|
return Arrays.copyOf(set1, set1.length); |
||||||
|
int[] buffer = new int[set1.length + set2.length]; |
||||||
|
while (true) { |
||||||
|
if (set1[k1] < set2[k2]) { |
||||||
|
buffer[pos++] = set1[k1]; |
||||||
|
++k1; |
||||||
|
if (k1 >= set1.length) { |
||||||
|
for (; k2 < set2.length; ++k2) |
||||||
|
buffer[pos++] = set2[k2]; |
||||||
|
break; |
||||||
|
} |
||||||
|
} else if (set1[k1] == set2[k2]) { |
||||||
|
buffer[pos++] = set1[k1]; |
||||||
|
++k1; |
||||||
|
++k2; |
||||||
|
if (k1 >= set1.length) { |
||||||
|
for (; k2 < set2.length; ++k2) |
||||||
|
buffer[pos++] = set2[k2]; |
||||||
|
break; |
||||||
|
} |
||||||
|
if (k2 >= set2.length) { |
||||||
|
for (; k1 < set1.length; ++k1) |
||||||
|
buffer[pos++] = set1[k1]; |
||||||
|
break; |
||||||
|
} |
||||||
|
} else {// if (set1[k1]>set2[k2]) {
|
||||||
|
buffer[pos++] = set2[k2]; |
||||||
|
++k2; |
||||||
|
if (k2 >= set2.length) { |
||||||
|
for (; k1 < set1.length; ++k1) |
||||||
|
buffer[pos++] = set1[k1]; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return Arrays.copyOf(buffer, pos); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
//test(2, 24, 1);
|
||||||
|
test(100, 16, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { |
||||||
|
long bogus = 0; |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
System.out.println("# generating random data..."); |
||||||
|
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||||
|
System.out.println("# generating random data... ok."); |
||||||
|
// building
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||||
|
int size = 0; |
||||||
|
for (int r = 0; r < repeat; ++r) { |
||||||
|
size = 0; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
size += ewah[k].sizeInBytes(); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + size; |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
int[] array = ewah[k].toArray(); |
||||||
|
bogus += array.length; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
int[] array = new int[ewah[k].cardinality()]; |
||||||
|
int c = 0; |
||||||
|
for (int x : ewah[k]) |
||||||
|
array[c++] = x; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
List<Integer> L = ewah[k].getPositions(); |
||||||
|
int[] array = new int[L.size()]; |
||||||
|
int c = 0; |
||||||
|
for (int x : L) |
||||||
|
array[c++] = x; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IntIterator iter = ewah[k].intIterator(); |
||||||
|
while (iter.hasNext()) { |
||||||
|
bogus += iter.next(); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
line += "\t\t\t"; |
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.or(ewah[j]); |
||||||
|
} |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
// run sanity check
|
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||||
|
EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1)); |
||||||
|
EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor); |
||||||
|
if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug"); |
||||||
|
} |
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||||
|
bogus += IteratorUtil.materialize(ewahor).sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
line += "\t\t\t"; |
||||||
|
// logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap ewahand = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahand = ewahand.and(ewah[j]); |
||||||
|
} |
||||||
|
bogus += ewahand.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap |
||||||
|
.and(ewahcp); |
||||||
|
bogus += ewahand.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); |
||||||
|
EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1)); |
||||||
|
EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand); |
||||||
|
if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug"); |
||||||
|
} |
||||||
|
// fast logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp); |
||||||
|
bogus += IteratorUtil.materialize(ewahand).sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
System.out |
||||||
|
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); |
||||||
|
System.out.println(line); |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,212 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import java.util.List; |
||||||
|
import com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32; |
||||||
|
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||||
|
import com.fr.third.googlecode.javaewah.IntIterator; |
||||||
|
import com.fr.third.googlecode.javaewah32.IteratingRLW32; |
||||||
|
import com.fr.third.googlecode.javaewah32.IteratorAggregation32; |
||||||
|
import com.fr.third.googlecode.javaewah32.IteratorUtil32; |
||||||
|
|
||||||
|
/** |
||||||
|
* This class is used to benchmark the performance EWAH. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public class Benchmark32 { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(100, 16, 1); |
||||||
|
// test(2, 24, 1);
|
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) { |
||||||
|
long bogus = 0; |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
System.out.println("# generating random data..."); |
||||||
|
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||||
|
System.out.println("# generating random data... ok."); |
||||||
|
// building
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||||
|
int size = 0; |
||||||
|
for (int r = 0; r < repeat; ++r) { |
||||||
|
size = 0; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap32(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
size += ewah[k].sizeInBytes(); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + size; |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
int[] array = ewah[k].toArray(); |
||||||
|
bogus += array.length; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
int[] array = new int[ewah[k].cardinality()]; |
||||||
|
int c = 0; |
||||||
|
for (int x : ewah[k]) |
||||||
|
array[c++] = x; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
List<Integer> L = ewah[k].getPositions(); |
||||||
|
int[] array = new int[L.size()]; |
||||||
|
int c = 0; |
||||||
|
for (int x : L) |
||||||
|
array[c++] = x; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// uncompressing
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IntIterator iter = ewah[k].intIterator(); |
||||||
|
while (iter.hasNext()) { |
||||||
|
bogus += iter.next(); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
line += "\t\t\t"; |
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.or(ewah[j]); |
||||||
|
} |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp); |
||||||
|
bogus += IteratorUtil32.materialize(ewahor).sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
line += "\t\t\t"; |
||||||
|
// logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32 ewahand = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahand = ewahand.and(ewah[j]); |
||||||
|
} |
||||||
|
bogus += ewahand.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32 |
||||||
|
.and(ewahcp); |
||||||
|
bogus += ewahand.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical and
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j].getIteratingRLW(); |
||||||
|
} |
||||||
|
IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp); |
||||||
|
bogus += IteratorUtil32.materialize(ewahand).sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
System.out |
||||||
|
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and"); |
||||||
|
System.out.println(line); |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,130 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import com.fr.third.googlecode.javaewah.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical and (intersection) aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkIntersection { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(10, 18, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc"}) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if (true) { |
||||||
|
EWAHCompressedBitmap answer = ewah[0].and(ewah[1]); |
||||||
|
for (int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.and(ewah[k]); |
||||||
|
|
||||||
|
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah); |
||||||
|
if (!answer.equals(ewahand)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug EWAHCompressedBitmap.and"); |
||||||
|
EWAHCompressedBitmap ewahand2 = FastAggregation |
||||||
|
.bufferedand(65536,ewah); |
||||||
|
if (!ewahand.equals(ewahand2)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug FastAggregation.bufferedand "); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.and(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||||
|
.and(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation |
||||||
|
.bufferedand(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||||
|
ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp); |
||||||
|
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,130 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import com.fr.third.googlecode.javaewah32.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical and (intersection) aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkIntersection32 { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(10, 18, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc" }) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
int[] inter = cdg.generateClustered(1 << (nbr/2), Max); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap32(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if (true) { |
||||||
|
EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]); |
||||||
|
for (int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.and(ewah[k]); |
||||||
|
|
||||||
|
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah); |
||||||
|
if (!answer.equals(ewahand)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug EWAHCompressedBitmap.and"); |
||||||
|
EWAHCompressedBitmap32 ewahand2 = FastAggregation32 |
||||||
|
.bufferedand(65536,ewah); |
||||||
|
if (!ewahand.equals(ewahand2)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug FastAggregation.bufferedand "); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.and(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||||
|
.and(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||||
|
.bufferedand(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord32( |
||||||
|
ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp); |
||||||
|
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,164 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import com.fr.third.googlecode.javaewah.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical or (union) aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkUnion { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(10, 18, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc", "deprecation" }) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if (true) { |
||||||
|
EWAHCompressedBitmap answer = ewah[0].or(ewah[1]); |
||||||
|
for (int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.or(ewah[k]); |
||||||
|
|
||||||
|
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah); |
||||||
|
if (!answer.equals(ewahor)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug EWAHCompressedBitmap.or"); |
||||||
|
EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah); |
||||||
|
if (!ewahor.equals(ewahor3)) |
||||||
|
throw new RuntimeException("bug FastAggregation.or"); |
||||||
|
EWAHCompressedBitmap ewahor2 = FastAggregation |
||||||
|
.bufferedor(65536,ewah); |
||||||
|
if (!ewahor.equals(ewahor2)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug FastAggregation.bufferedor "); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.or(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation |
||||||
|
.bufferedor(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap x = new EWAHCompressedBitmap(); |
||||||
|
FastAggregation.legacy_orWithContainer(x, ewahcp); |
||||||
|
bogus += x.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||||
|
ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp); |
||||||
|
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,165 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||||
|
import com.fr.third.googlecode.javaewah32.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical or (union) aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkUnion32 { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(10, 18, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc", "deprecation" }) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap32(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if(true){ |
||||||
|
EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]); |
||||||
|
for(int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.or(ewah[k]); |
||||||
|
|
||||||
|
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||||
|
.or(ewah); |
||||||
|
if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or"); |
||||||
|
EWAHCompressedBitmap32 ewahor3 = FastAggregation |
||||||
|
.or(ewah); |
||||||
|
if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or"); |
||||||
|
EWAHCompressedBitmap32 ewahor2 = FastAggregation32 |
||||||
|
.bufferedor(65536,ewah); |
||||||
|
if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor "); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.or(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32 |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation |
||||||
|
.or(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||||
|
.bufferedor(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32(); |
||||||
|
FastAggregation32.legacy_orWithContainer(x, ewahcp); |
||||||
|
bogus += x.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical or
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW32 ewahor = IteratorAggregation32 |
||||||
|
.bufferedor(ewahcp); |
||||||
|
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,134 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
import com.fr.third.googlecode.javaewah.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical xor aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkXOR { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
//test(10, 18, 1);
|
||||||
|
test(2, 22, 1); |
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc" }) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if (true) { |
||||||
|
EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]); |
||||||
|
for (int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.xor(ewah[k]); |
||||||
|
EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah); |
||||||
|
if (!answer.equals(ewahor3)) |
||||||
|
throw new RuntimeException("bug FastAggregation.xor"); |
||||||
|
EWAHCompressedBitmap ewahor2 = FastAggregation |
||||||
|
.bufferedxor(65536,ewah); |
||||||
|
if (!answer.equals(ewahor2)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug FastAggregation.bufferedxor "); |
||||||
|
EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah))); |
||||||
|
if (!answer.equals(iwah)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug xor it "); |
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.xor(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation |
||||||
|
.xor(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap ewahor = FastAggregation |
||||||
|
.bufferedxor(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW[] ewahcp = new IteratingRLW[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord( |
||||||
|
ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp); |
||||||
|
int wordcounter = IteratorUtil.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,137 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
import java.text.DecimalFormat; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.FastAggregation; |
||||||
|
import com.fr.third.googlecode.javaewah32.*; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* To benchmark the logical xor aggregate. |
||||||
|
*/ |
||||||
|
public class BenchmarkXOR32 { |
||||||
|
|
||||||
|
@SuppressWarnings("javadoc") |
||||||
|
public static void main(String args[]) { |
||||||
|
test(10, 18, 1); |
||||||
|
//test(2, 22, 1);
|
||||||
|
} |
||||||
|
|
||||||
|
@SuppressWarnings({ "javadoc" }) |
||||||
|
public static void test(int N, int nbr, int repeat) { |
||||||
|
long bogus = 0; |
||||||
|
|
||||||
|
DecimalFormat df = new DecimalFormat("0.###"); |
||||||
|
ClusteredDataGenerator cdg = new ClusteredDataGenerator(); |
||||||
|
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) { |
||||||
|
for (int times = 0; times < 2; ++times) { |
||||||
|
String line = ""; |
||||||
|
long bef, aft; |
||||||
|
line += sparsity; |
||||||
|
int[][] data = new int[N][]; |
||||||
|
int Max = (1 << (nbr + sparsity)); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
data[k] = cdg.generateClustered(1 << nbr, Max); |
||||||
|
// building
|
||||||
|
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N]; |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
ewah[k] = new EWAHCompressedBitmap32(); |
||||||
|
for (int x = 0; x < data[k].length; ++x) { |
||||||
|
ewah[k].set(data[k][x]); |
||||||
|
} |
||||||
|
data[k] = null; |
||||||
|
} |
||||||
|
// sanity check
|
||||||
|
if (true) { |
||||||
|
EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]); |
||||||
|
for (int k = 2; k < ewah.length; ++k) |
||||||
|
answer = answer.xor(ewah[k]); |
||||||
|
EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah); |
||||||
|
if (!answer.equals(ewahor3)) |
||||||
|
throw new RuntimeException("bug FastAggregation.xor"); |
||||||
|
EWAHCompressedBitmap32 ewahor2 = FastAggregation32 |
||||||
|
.bufferedxor(65536,ewah); |
||||||
|
if (!answer.equals(ewahor2)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug FastAggregation.bufferedxor "); |
||||||
|
EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah))); |
||||||
|
if (!answer.equals(iwah)) |
||||||
|
throw new RuntimeException( |
||||||
|
"bug xor it "); |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
// logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32 ewahor = ewah[0]; |
||||||
|
for (int j = 1; j < k + 1; ++j) { |
||||||
|
ewahor = ewahor.xor(ewah[j]); |
||||||
|
} |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation |
||||||
|
.xor(ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = ewah[j]; |
||||||
|
} |
||||||
|
EWAHCompressedBitmap32 ewahor = FastAggregation32 |
||||||
|
.bufferedxor(65536,ewahcp); |
||||||
|
bogus += ewahor.sizeInBits(); |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
// fast logical xor
|
||||||
|
bef = System.currentTimeMillis(); |
||||||
|
for (int r = 0; r < repeat; ++r) |
||||||
|
for (int k = 0; k < N; ++k) { |
||||||
|
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1]; |
||||||
|
for (int j = 0; j < k + 1; ++j) { |
||||||
|
ewahcp[j] = new IteratingBufferedRunningLengthWord32( |
||||||
|
ewah[j]); |
||||||
|
} |
||||||
|
IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp); |
||||||
|
int wordcounter = IteratorUtil32.cardinality(ewahor); |
||||||
|
bogus += wordcounter; |
||||||
|
} |
||||||
|
aft = System.currentTimeMillis(); |
||||||
|
|
||||||
|
line += "\t" + df.format((aft - bef) / 1000.0); |
||||||
|
|
||||||
|
|
||||||
|
System.out |
||||||
|
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based"); |
||||||
|
|
||||||
|
System.out.println(line); |
||||||
|
} |
||||||
|
System.out.println("# bogus =" + bogus); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,78 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* This class will generate lists of random integers with a "clustered" distribution. |
||||||
|
* Reference: |
||||||
|
* Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public class ClusteredDataGenerator { |
||||||
|
|
||||||
|
/** |
||||||
|
* |
||||||
|
*/ |
||||||
|
public ClusteredDataGenerator() { |
||||||
|
this.unidg = new UniformDataGenerator(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param seed random seed |
||||||
|
*/ |
||||||
|
public ClusteredDataGenerator(final int seed) { |
||||||
|
this.unidg = new UniformDataGenerator(seed); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* generates randomly N distinct integers from 0 to Max. |
||||||
|
* @param N number of integers |
||||||
|
* @param Max maximum integer value |
||||||
|
* @return a randomly generated array |
||||||
|
*/ |
||||||
|
public int[] generateClustered(int N, int Max) { |
||||||
|
int[] array = new int[N]; |
||||||
|
fillClustered(array, 0, N, 0, Max); |
||||||
|
return array; |
||||||
|
} |
||||||
|
|
||||||
|
void fillClustered(int[] array, int offset, int length, int Min, int Max) { |
||||||
|
final int range = Max - Min; |
||||||
|
if ((range == length) || (length <= 10)) { |
||||||
|
fillUniform(array, offset, length, Min, Max); |
||||||
|
return; |
||||||
|
} |
||||||
|
final int cut = length / 2 |
||||||
|
+ ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0); |
||||||
|
final double p = this.unidg.rand.nextDouble(); |
||||||
|
if (p < 0.25) { |
||||||
|
fillUniform(array, offset, length / 2, Min, Min + cut); |
||||||
|
fillClustered(array, offset + length / 2, length - length / 2, Min + cut, |
||||||
|
Max); |
||||||
|
} else if (p < 0.5) { |
||||||
|
fillClustered(array, offset, length / 2, Min, Min + cut); |
||||||
|
fillUniform(array, offset + length / 2, length - length / 2, Min + cut, |
||||||
|
Max); |
||||||
|
} else { |
||||||
|
fillClustered(array, offset, length / 2, Min, Min + cut); |
||||||
|
fillClustered(array, offset + length / 2, length - length / 2, Min + cut, |
||||||
|
Max); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void fillUniform(int[] array, int offset, int length, int Min, int Max) { |
||||||
|
int[] v = this.unidg.generateUniform(length, Max - Min); |
||||||
|
for (int k = 0; k < v.length; ++k) |
||||||
|
array[k + offset] = Min + v[k]; |
||||||
|
} |
||||||
|
|
||||||
|
UniformDataGenerator unidg; |
||||||
|
|
||||||
|
} |
||||||
|
|
@ -0,0 +1,114 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah.benchmark; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.BitSet; |
||||||
|
import java.util.HashSet; |
||||||
|
import java.util.Iterator; |
||||||
|
import java.util.Random; |
||||||
|
|
||||||
|
/** |
||||||
|
* This class will generate "uniform" lists of random integers. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public class UniformDataGenerator { |
||||||
|
/** |
||||||
|
* construct generator of random arrays. |
||||||
|
*/ |
||||||
|
public UniformDataGenerator() { |
||||||
|
this.rand = new Random(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param seed random seed |
||||||
|
*/ |
||||||
|
public UniformDataGenerator(final int seed) { |
||||||
|
this.rand = new Random(seed); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* generates randomly N distinct integers from 0 to Max. |
||||||
|
*/ |
||||||
|
int[] generateUniformHash(int N, int Max) { |
||||||
|
if (N > Max) |
||||||
|
throw new RuntimeException("not possible"); |
||||||
|
int[] ans = new int[N]; |
||||||
|
HashSet<Integer> s = new HashSet<Integer>(); |
||||||
|
while (s.size() < N) |
||||||
|
s.add(new Integer(this.rand.nextInt(Max))); |
||||||
|
Iterator<Integer> i = s.iterator(); |
||||||
|
for (int k = 0; k < N; ++k) |
||||||
|
ans[k] = i.next().intValue(); |
||||||
|
Arrays.sort(ans); |
||||||
|
return ans; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* output all integers from the range [0,Max) that are not |
||||||
|
* in the array |
||||||
|
*/ |
||||||
|
static int[] negate(int[] x, int Max) { |
||||||
|
int[] ans = new int[Max - x.length]; |
||||||
|
int i = 0; |
||||||
|
int c = 0; |
||||||
|
for (int j = 0; j < x.length; ++j) { |
||||||
|
int v = x[j]; |
||||||
|
for (; i < v; ++i) |
||||||
|
ans[c++] = i; |
||||||
|
++i; |
||||||
|
} |
||||||
|
while (c < ans.length) |
||||||
|
ans[c++] = i++; |
||||||
|
return ans; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* generates randomly N distinct integers from 0 to Max. |
||||||
|
* @param N Number of integers to generate |
||||||
|
* @param Max Maximum value of the integers |
||||||
|
* @return array containing random integers |
||||||
|
*/ |
||||||
|
public int[] generateUniform(int N, int Max) { |
||||||
|
if(N * 2 > Max) { |
||||||
|
return negate( generateUniform(Max - N, Max), Max ); |
||||||
|
} |
||||||
|
if (2048 * N > Max) |
||||||
|
return generateUniformBitmap(N, Max); |
||||||
|
return generateUniformHash(N, Max); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* generates randomly N distinct integers from 0 to Max using a bitmap. |
||||||
|
* @param N Number of integers to generate |
||||||
|
* @param Max Maximum value of the integers |
||||||
|
* @return array containing random integers |
||||||
|
*/ |
||||||
|
int[] generateUniformBitmap(int N, int Max) { |
||||||
|
if (N > Max) |
||||||
|
throw new RuntimeException("not possible"); |
||||||
|
int[] ans = new int[N]; |
||||||
|
BitSet bs = new BitSet(Max); |
||||||
|
int cardinality = 0; |
||||||
|
while (cardinality < N) { |
||||||
|
int v = this.rand.nextInt(Max); |
||||||
|
if (!bs.get(v)) { |
||||||
|
bs.set(v); |
||||||
|
cardinality++; |
||||||
|
} |
||||||
|
} |
||||||
|
int pos = 0; |
||||||
|
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { |
||||||
|
ans[pos++] = i; |
||||||
|
} |
||||||
|
return ans; |
||||||
|
} |
||||||
|
|
||||||
|
Random rand = new Random(); |
||||||
|
|
||||||
|
} |
@ -0,0 +1,102 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* BitCounter is a fake bitset data structure. Instead of storing the actual data, |
||||||
|
* it only records the number of set bits. |
||||||
|
* |
||||||
|
* @since 0.5.0 |
||||||
|
* @author Daniel Lemire and David McIntosh |
||||||
|
*/ |
||||||
|
|
||||||
|
public final class BitCounter32 implements BitmapStorage32 { |
||||||
|
|
||||||
|
/** |
||||||
|
* Virtually add words directly to the bitmap |
||||||
|
* |
||||||
|
* @param newdata the word |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void add(final int newdata) { |
||||||
|
this.oneBits += Integer.bitCount(newdata); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add several literal words. |
||||||
|
* |
||||||
|
* @param data the literal words |
||||||
|
* @param start the starting point in the array |
||||||
|
* @param number the number of literal words to add |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void addStreamOfLiteralWords(int[] data, int start, int number) { |
||||||
|
for(int i=start;i<start+number;i++) { |
||||||
|
add(data[i]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add many |
||||||
|
* zeroes or ones. |
||||||
|
* |
||||||
|
* @param v zeros or ones |
||||||
|
* @param number how many to words add |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void addStreamOfEmptyWords(boolean v, int number) { |
||||||
|
if (v) { |
||||||
|
this.oneBits += number * EWAHCompressedBitmap32.wordinbits; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* virtually add several negated literal words. |
||||||
|
* |
||||||
|
* @param data the literal words |
||||||
|
* @param start the starting point in the array |
||||||
|
* @param number the number of literal words to add |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void addStreamOfNegatedLiteralWords(int[] data, int start, |
||||||
|
int number) { |
||||||
|
for(int i=start;i<start+number;i++) { |
||||||
|
add(~data[i]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* As you act on this class, it records the number of set (true) bits. |
||||||
|
* |
||||||
|
* @return number of set bits |
||||||
|
*/ |
||||||
|
public int getCount() { |
||||||
|
return this.oneBits; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* should directly set the sizeinbits field, but is effectively ignored in this class. |
||||||
|
* |
||||||
|
* @param bits number of bits |
||||||
|
*/ |
||||||
|
// @Override : causes problems with Java 1.5
|
||||||
|
@Override |
||||||
|
public void setSizeInBits(int bits) { |
||||||
|
// no action
|
||||||
|
} |
||||||
|
|
||||||
|
private int oneBits; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} |
@ -0,0 +1,60 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Low level bitset writing methods. |
||||||
|
* |
||||||
|
* @since 0.5.0 |
||||||
|
* @author Daniel Lemire and David McIntosh |
||||||
|
*/ |
||||||
|
public interface BitmapStorage32 { |
||||||
|
|
||||||
|
/** |
||||||
|
* Adding words directly to the bitmap (for expert use). |
||||||
|
* |
||||||
|
* This is normally how you add data to the array. So you add bits in streams |
||||||
|
* of 8*8 bits. |
||||||
|
* |
||||||
|
* @param newdata the word |
||||||
|
*/ |
||||||
|
public void add(final int newdata); |
||||||
|
|
||||||
|
/** |
||||||
|
* if you have several literal words to copy over, this might be faster. |
||||||
|
* |
||||||
|
* @param data the literal words |
||||||
|
* @param start the starting point in the array |
||||||
|
* @param number the number of literal words to add |
||||||
|
*/ |
||||||
|
public void addStreamOfLiteralWords(final int[] data, final int start, |
||||||
|
final int number); |
||||||
|
|
||||||
|
/** |
||||||
|
* For experts: You want to add many |
||||||
|
* zeroes or ones? This is the method you use. |
||||||
|
* |
||||||
|
* @param v zeros or ones |
||||||
|
* @param number how many to words add |
||||||
|
*/ |
||||||
|
public void addStreamOfEmptyWords(final boolean v, final int number); |
||||||
|
|
||||||
|
/** |
||||||
|
* Like "addStreamOfLiteralWords" but negates the words being added. |
||||||
|
* |
||||||
|
* @param data the literal words |
||||||
|
* @param start the starting point in the array |
||||||
|
* @param number the number of literal words to add |
||||||
|
*/ |
||||||
|
public void addStreamOfNegatedLiteralWords(int[] data, final int start, |
||||||
|
final int number); |
||||||
|
/** |
||||||
|
* directly set the sizeinbits field |
||||||
|
* @param bits number of bits |
||||||
|
*/ |
||||||
|
public void setSizeInBits(final int bits); |
||||||
|
} |
@ -0,0 +1,152 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.CloneableIterator; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* This class can be used to iterate over blocks of bitmap data. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class BufferedIterator32 implements IteratingRLW32, Cloneable { |
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* |
||||||
|
* @param iterator iterator |
||||||
|
*/ |
||||||
|
public BufferedIterator32(final CloneableIterator<EWAHIterator32> iterator) { |
||||||
|
this.masteriterator = iterator; |
||||||
|
if(this.masteriterator.hasNext()) { |
||||||
|
this.iterator = this.masteriterator.next(); |
||||||
|
this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words, iterating to the next running length word if needed. |
||||||
|
* |
||||||
|
* @param x the number of words to be discarded |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void discardFirstWords(int x) { |
||||||
|
while (x > 0) { |
||||||
|
if (this.brlw.RunningLength > x) { |
||||||
|
this.brlw.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.brlw.RunningLength; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||||
|
|
||||||
|
this.literalWordStartPosition += toDiscard; |
||||||
|
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||||
|
x -= toDiscard; |
||||||
|
if ((x > 0) || (this.brlw.size() == 0)) { |
||||||
|
if (!this.next()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
/** |
||||||
|
* Move to the next RunningLengthWord |
||||||
|
* @return whether the move was possible |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
if(!reload()) { |
||||||
|
this.brlw.NumberOfLiteralWords = 0; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||||
|
return true; |
||||||
|
} |
||||||
|
private boolean reload() { |
||||||
|
if(!this.masteriterator.hasNext()) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
this.iterator = this.masteriterator.next(); |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Get the nth literal word for the current running length word |
||||||
|
* @param index zero based index |
||||||
|
* @return the literal word |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getLiteralWordAt(int index) { |
||||||
|
return this.buffer[this.literalWordStartPosition + index]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words for the current running length word. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.brlw.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.brlw.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getRunningLength() { |
||||||
|
return this.brlw.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words of the current running length word. |
||||||
|
* |
||||||
|
* @return the size |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int size() { |
||||||
|
return this.brlw.size(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedIterator32 clone() throws CloneNotSupportedException { |
||||||
|
BufferedIterator32 answer = (BufferedIterator32) super.clone(); |
||||||
|
answer.brlw = this.brlw.clone(); |
||||||
|
answer.buffer = this.buffer; |
||||||
|
answer.iterator = this.iterator.clone(); |
||||||
|
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||||
|
answer.masteriterator = this.masteriterator.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private BufferedRunningLengthWord32 brlw; |
||||||
|
private int[] buffer; |
||||||
|
private int literalWordStartPosition; |
||||||
|
private EWAHIterator32 iterator; |
||||||
|
private CloneableIterator<EWAHIterator32> masteriterator; |
||||||
|
} |
@ -0,0 +1,174 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Mostly for internal use. Similar to RunningLengthWord, but can |
||||||
|
* be modified without access to the array, and has faster access. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* @since 0.5.0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
public final class BufferedRunningLengthWord32 implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new buffered running length word. |
||||||
|
* |
||||||
|
* @param a the word |
||||||
|
*/ |
||||||
|
public BufferedRunningLengthWord32(final int a) { |
||||||
|
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); |
||||||
|
this.RunningBit = (a & 1) != 0; |
||||||
|
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new buffered running length word. |
||||||
|
* |
||||||
|
* @param rlw the rlw |
||||||
|
*/ |
||||||
|
public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) { |
||||||
|
this(rlw.parent.buffer[rlw.position]); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words. |
||||||
|
* |
||||||
|
* @param x the number of words to be discarded |
||||||
|
*/ |
||||||
|
public void discardFirstWords(int x) { |
||||||
|
if (this.RunningLength >= x) { |
||||||
|
this.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.RunningLength; |
||||||
|
this.RunningLength = 0; |
||||||
|
this.literalwordoffset += x; |
||||||
|
this.NumberOfLiteralWords -= x; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
public int getRunningLength() { |
||||||
|
return this.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Reset the values using the provided word. |
||||||
|
* |
||||||
|
* @param a the word |
||||||
|
*/ |
||||||
|
public void reset(final int a) { |
||||||
|
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits)); |
||||||
|
this.RunningBit = (a & 1) != 0; |
||||||
|
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount); |
||||||
|
this.literalwordoffset = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Reset the values of this running length word so that it has the same values |
||||||
|
* as the other running length word. |
||||||
|
* |
||||||
|
* @param rlw the other running length word |
||||||
|
*/ |
||||||
|
public void reset(final RunningLengthWord32 rlw) { |
||||||
|
reset(rlw.parent.buffer[rlw.position]); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the number of literal words. |
||||||
|
* |
||||||
|
* @param number the new number of literal words |
||||||
|
*/ |
||||||
|
public void setNumberOfLiteralWords(final int number) { |
||||||
|
this.NumberOfLiteralWords = number; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running bit. |
||||||
|
* |
||||||
|
* @param b the new running bit |
||||||
|
*/ |
||||||
|
public void setRunningBit(final boolean b) { |
||||||
|
this.RunningBit = b; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running length. |
||||||
|
* |
||||||
|
* @param number the new running length |
||||||
|
*/ |
||||||
|
public void setRunningLength(final int number) { |
||||||
|
this.RunningLength = number; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words. |
||||||
|
* |
||||||
|
* @return the int |
||||||
|
*/ |
||||||
|
public int size() { |
||||||
|
return this.RunningLength + this.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
* @see java.lang.Object#toString() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public String toString() { |
||||||
|
return "running bit = " + getRunningBit() + " running length = " |
||||||
|
+ getRunningLength() + " number of lit. words " |
||||||
|
+ getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException { |
||||||
|
BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone(); |
||||||
|
answer.literalwordoffset = this.literalwordoffset; |
||||||
|
answer.NumberOfLiteralWords = this.NumberOfLiteralWords; |
||||||
|
answer.RunningBit = this.RunningBit; |
||||||
|
answer.RunningLength = this.RunningLength; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** how many literal words have we read so far? */ |
||||||
|
public int literalwordoffset = 0; |
||||||
|
|
||||||
|
/** The Number of literal words. */ |
||||||
|
public int NumberOfLiteralWords; |
||||||
|
|
||||||
|
/** The Running bit. */ |
||||||
|
public boolean RunningBit; |
||||||
|
|
||||||
|
/** The Running length. */ |
||||||
|
public int RunningLength; |
||||||
|
|
||||||
|
|
||||||
|
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,98 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* The class EWAHIterator represents a special type of |
||||||
|
* efficient iterator iterating over (uncompressed) words of bits. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* @since 0.5.0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
public final class EWAHIterator32 implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new eWAH iterator. |
||||||
|
* |
||||||
|
* @param a the array of words |
||||||
|
* @param sizeinwords the number of words that are significant in the array of words |
||||||
|
*/ |
||||||
|
public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) { |
||||||
|
this.rlw = new RunningLengthWord32(a, 0); |
||||||
|
this.size = sizeinwords; |
||||||
|
this.pointer = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Allow expert developers to instantiate an EWAHIterator. |
||||||
|
* |
||||||
|
* @param bitmap we want to iterate over |
||||||
|
* @return an iterator |
||||||
|
*/ |
||||||
|
public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) { |
||||||
|
return bitmap.getEWAHIterator(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Access to the array of words |
||||||
|
* |
||||||
|
* @return the int[] |
||||||
|
*/ |
||||||
|
public int[] buffer() { |
||||||
|
return this.rlw.parent.buffer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Position of the literal words represented by this running length word. |
||||||
|
* |
||||||
|
* @return the int |
||||||
|
*/ |
||||||
|
public int literalWords() { |
||||||
|
return this.pointer - this.rlw.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Checks for next. |
||||||
|
* |
||||||
|
* @return true, if successful |
||||||
|
*/ |
||||||
|
public boolean hasNext() { |
||||||
|
return this.pointer < this.size; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Next running length word. |
||||||
|
* |
||||||
|
* @return the running length word |
||||||
|
*/ |
||||||
|
public RunningLengthWord32 next() { |
||||||
|
this.rlw.position = this.pointer; |
||||||
|
this.pointer += this.rlw.getNumberOfLiteralWords() + 1; |
||||||
|
return this.rlw; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator32 clone() throws CloneNotSupportedException { |
||||||
|
EWAHIterator32 ans = (EWAHIterator32) super.clone(); |
||||||
|
ans.rlw = this.rlw.clone(); |
||||||
|
ans.size = this.size; |
||||||
|
ans.pointer = this.pointer; |
||||||
|
return ans; |
||||||
|
} |
||||||
|
|
||||||
|
/** The pointer represent the location of the current running length |
||||||
|
* word in the array of words (embedded in the rlw attribute). */ |
||||||
|
int pointer; |
||||||
|
|
||||||
|
/** The current running length word. */ |
||||||
|
RunningLengthWord32 rlw; |
||||||
|
|
||||||
|
/** The size in words. */ |
||||||
|
int size; |
||||||
|
|
||||||
|
} |
@ -0,0 +1,377 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.Comparator; |
||||||
|
import java.util.PriorityQueue; |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as |
||||||
|
* reference. They may not be faster than the corresponding methods in the |
||||||
|
* EWAHCompressedBitmap class. |
||||||
|
* |
||||||
|
* @author Daniel Lemire |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class FastAggregation32 { |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||||
|
* @return the or aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap32 bufferedand(final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||||
|
bufferedandWithContainer(answer,bufsize, bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Compute the and aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap) |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
|
||||||
|
java.util.LinkedList<IteratingBufferedRunningLengthWord32> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord32>(); |
||||||
|
for (EWAHCompressedBitmap32 bitmap : bitmaps) { |
||||||
|
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||||
|
} |
||||||
|
int[] hardbitmap = new int[bufsize*bitmaps.length]; |
||||||
|
|
||||||
|
for(IteratingRLW32 i : al) |
||||||
|
if (i.size() == 0) { |
||||||
|
al.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
while (!al.isEmpty()) { |
||||||
|
Arrays.fill(hardbitmap, ~0); |
||||||
|
int effective = Integer.MAX_VALUE; |
||||||
|
for(IteratingRLW32 i : al) { |
||||||
|
int eff = IteratorAggregation32.inplaceand(hardbitmap, i); |
||||||
|
if (eff < effective) |
||||||
|
effective = eff; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
for(IteratingRLW32 i : al) |
||||||
|
if (i.size() == 0) { |
||||||
|
al.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @return the or aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap32 bufferedor(final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||||
|
bufferedorWithContainer(answer, bufsize, bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the or aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
int range = 0; |
||||||
|
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||||
|
return b.sizeinbits - a.sizeinbits; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>(); |
||||||
|
for (EWAHCompressedBitmap32 bitmap : sbitmaps) { |
||||||
|
if (bitmap.sizeinbits > range) |
||||||
|
range = bitmap.sizeinbits; |
||||||
|
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||||
|
} |
||||||
|
int[] hardbitmap = new int[bufsize]; |
||||||
|
int maxr = al.size(); |
||||||
|
while (maxr > 0) { |
||||||
|
int effective = 0; |
||||||
|
for (int k = 0; k < maxr; ++k) { |
||||||
|
if (al.get(k).size() > 0) { |
||||||
|
int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k)); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
maxr = k; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
Arrays.fill(hardbitmap, 0); |
||||||
|
|
||||||
|
} |
||||||
|
container.setSizeInBits(range); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @return the xor aggregate. |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap32 bufferedxor(final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32(); |
||||||
|
bufferedxorWithContainer(answer, bufsize, bitmaps); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the xor aggregate using a temporary uncompressed bitmap. |
||||||
|
* |
||||||
|
* @param container where the aggregate is written |
||||||
|
* @param bufsize buffer size used during the computation in 64-bit words |
||||||
|
* @param bitmaps the source bitmaps |
||||||
|
*/ |
||||||
|
public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
int range = 0; |
||||||
|
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||||
|
return b.sizeinbits - a.sizeinbits; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>(); |
||||||
|
for (EWAHCompressedBitmap32 bitmap : sbitmaps) { |
||||||
|
if (bitmap.sizeinbits > range) |
||||||
|
range = bitmap.sizeinbits; |
||||||
|
al.add(new IteratingBufferedRunningLengthWord32(bitmap)); |
||||||
|
} |
||||||
|
int[] hardbitmap = new int[bufsize]; |
||||||
|
int maxr = al.size(); |
||||||
|
while (maxr > 0) { |
||||||
|
int effective = 0; |
||||||
|
for (int k = 0; k < maxr; ++k) { |
||||||
|
if (al.get(k).size() > 0) { |
||||||
|
int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k)); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
maxr = k; |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
container.add(hardbitmap[k]); |
||||||
|
Arrays.fill(hardbitmap, 0); |
||||||
|
} |
||||||
|
container.setSizeInBits(range); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the or aggregate. |
||||||
|
* @param container where we write the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
public static void orToContainer(final BitmapStorage32 container, |
||||||
|
final EWAHCompressedBitmap32 ... bitmaps) { |
||||||
|
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||||
|
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length, |
||||||
|
new Comparator<EWAHCompressedBitmap32>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (EWAHCompressedBitmap32 x : bitmaps) { |
||||||
|
pq.add(x); |
||||||
|
} |
||||||
|
while (pq.size() > 2) { |
||||||
|
EWAHCompressedBitmap32 x1 = pq.poll(); |
||||||
|
EWAHCompressedBitmap32 x2 = pq.poll(); |
||||||
|
pq.add(x1.or(x2)); |
||||||
|
} |
||||||
|
pq.poll().orToContainer(pq.poll(), container); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Uses a priority queue to compute the xor aggregate. |
||||||
|
* @param container where we write the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
public static void xorToContainer(final BitmapStorage32 container, |
||||||
|
final EWAHCompressedBitmap32 ... bitmaps) { |
||||||
|
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps"); |
||||||
|
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length, |
||||||
|
new Comparator<EWAHCompressedBitmap32>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||||
|
return a.sizeInBytes() - b.sizeInBytes(); |
||||||
|
} |
||||||
|
}); |
||||||
|
for (EWAHCompressedBitmap32 x : bitmaps) { |
||||||
|
pq.add(x); |
||||||
|
} |
||||||
|
while (pq.size() > 2) { |
||||||
|
EWAHCompressedBitmap32 x1 = pq.poll(); |
||||||
|
EWAHCompressedBitmap32 x2 = pq.poll(); |
||||||
|
pq.add(x1.xor(x2)); |
||||||
|
} |
||||||
|
pq.poll().xorToContainer(pq.poll(), container); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* For internal use. Computes the bitwise or of the provided bitmaps and |
||||||
|
* stores the result in the container. (This used to be the default.) |
||||||
|
* |
||||||
|
* @deprecated use EWAHCompressedBitmap32.or instead |
||||||
|
* @since 0.4.0 |
||||||
|
* @param container where store the result |
||||||
|
* @param bitmaps to be aggregated |
||||||
|
*/ |
||||||
|
@Deprecated |
||||||
|
public static void legacy_orWithContainer(final BitmapStorage32 container, |
||||||
|
final EWAHCompressedBitmap32... bitmaps) { |
||||||
|
if (bitmaps.length == 2) { |
||||||
|
// should be more efficient
|
||||||
|
bitmaps[0].orToContainer(bitmaps[1], container); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
|
||||||
|
// sorted bitmaps from right to left.
|
||||||
|
final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone(); |
||||||
|
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap32>() { |
||||||
|
@Override |
||||||
|
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) { |
||||||
|
return a.sizeinbits < b.sizeinbits ? 1 |
||||||
|
: a.sizeinbits == b.sizeinbits ? 0 : -1; |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length]; |
||||||
|
int maxAvailablePos = 0; |
||||||
|
for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) { |
||||||
|
EWAHIterator32 iterator = bitmap.getEWAHIterator(); |
||||||
|
if (iterator.hasNext()) { |
||||||
|
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32( |
||||||
|
iterator); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (maxAvailablePos == 0) { // this never happens...
|
||||||
|
container.setSizeInBits(0); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
int maxSize = sortedBitmaps[0].sizeinbits; |
||||||
|
|
||||||
|
while (true) { |
||||||
|
int maxOneRl = 0; |
||||||
|
int minZeroRl = Integer.MAX_VALUE; |
||||||
|
int minSize = Integer.MAX_VALUE; |
||||||
|
int numEmptyRl = 0; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
int size = rlw.size(); |
||||||
|
if (size == 0) { |
||||||
|
maxAvailablePos = i; |
||||||
|
break; |
||||||
|
} |
||||||
|
minSize = Math.min(minSize, size); |
||||||
|
|
||||||
|
if (rlw.getRunningBit()) { |
||||||
|
int rl = rlw.getRunningLength(); |
||||||
|
maxOneRl = Math.max(maxOneRl, rl); |
||||||
|
minZeroRl = 0; |
||||||
|
if (rl == 0 && size > 0) { |
||||||
|
numEmptyRl++; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int rl = rlw.getRunningLength(); |
||||||
|
minZeroRl = Math.min(minZeroRl, rl); |
||||||
|
if (rl == 0 && size > 0) { |
||||||
|
numEmptyRl++; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if (maxAvailablePos == 0) { |
||||||
|
break; |
||||||
|
} else if (maxAvailablePos == 1) { |
||||||
|
// only one bitmap is left so just write the rest of it out
|
||||||
|
rlws[0].discharge(container); |
||||||
|
break; |
||||||
|
} |
||||||
|
|
||||||
|
if (maxOneRl > 0) { |
||||||
|
container.addStreamOfEmptyWords(true, maxOneRl); |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(maxOneRl); |
||||||
|
} |
||||||
|
} else if (minZeroRl > 0) { |
||||||
|
container.addStreamOfEmptyWords(false, minZeroRl); |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(minZeroRl); |
||||||
|
} |
||||||
|
} else { |
||||||
|
int index = 0; |
||||||
|
|
||||||
|
if (numEmptyRl == 1) { |
||||||
|
// if one rlw has literal words to process and the rest have a run of
|
||||||
|
// 0's we can write them out here
|
||||||
|
IteratingBufferedRunningLengthWord32 emptyRl = null; |
||||||
|
int minNonEmptyRl = Integer.MAX_VALUE; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
int rl = rlw.getRunningLength(); |
||||||
|
if (rl == 0) { |
||||||
|
assert emptyRl == null; |
||||||
|
emptyRl = rlw; |
||||||
|
} else { |
||||||
|
minNonEmptyRl = Math.min(minNonEmptyRl, rl); |
||||||
|
} |
||||||
|
} |
||||||
|
int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl; |
||||||
|
if (emptyRl != null) |
||||||
|
emptyRl.writeLiteralWords(wordsToWrite, container); |
||||||
|
index += wordsToWrite; |
||||||
|
} |
||||||
|
|
||||||
|
while (index < minSize) { |
||||||
|
int word = 0; |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
if (rlw.getRunningLength() <= index) { |
||||||
|
word |= rlw.getLiteralWordAt(index - rlw.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
container.add(word); |
||||||
|
index++; |
||||||
|
} |
||||||
|
for (int i = 0; i < maxAvailablePos; i++) { |
||||||
|
IteratingBufferedRunningLengthWord32 rlw = rlws[i]; |
||||||
|
rlw.discardFirstWords(minSize); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
container.setSizeInBits(maxSize); |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,90 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2012, Google Inc. |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
import static com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.IntIterator; |
||||||
|
|
||||||
|
/** |
||||||
|
* The IntIteratorImpl32 is the 32 bit implementation of the IntIterator |
||||||
|
* interface, which efficiently returns the stream of integers represented by an |
||||||
|
* EWAHIterator32. |
||||||
|
* |
||||||
|
* @author Colby Ranger |
||||||
|
* @since 0.5.6 |
||||||
|
*/ |
||||||
|
final class IntIteratorImpl32 implements IntIterator { |
||||||
|
|
||||||
|
private final EWAHIterator32 ewahIter; |
||||||
|
private final int[] ewahBuffer; |
||||||
|
private int position; |
||||||
|
private int runningLength; |
||||||
|
private int word; |
||||||
|
private int wordPosition; |
||||||
|
private int wordLength; |
||||||
|
private int literalPosition; |
||||||
|
private boolean hasnext; |
||||||
|
|
||||||
|
IntIteratorImpl32(EWAHIterator32 ewahIter) { |
||||||
|
this.ewahIter = ewahIter; |
||||||
|
this.ewahBuffer = ewahIter.buffer(); |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
} |
||||||
|
|
||||||
|
public final boolean moveToNext() { |
||||||
|
while (!runningHasNext() && !literalHasNext()) { |
||||||
|
if (!this.ewahIter.hasNext()) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
setRunningLengthWord(this.ewahIter.next()); |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public final boolean hasNext() { |
||||||
|
return this.hasnext; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public final int next() { |
||||||
|
final int answer; |
||||||
|
if (runningHasNext()) { |
||||||
|
answer = this.position++; |
||||||
|
} else { |
||||||
|
final int bit = Long.numberOfTrailingZeros(this.word); |
||||||
|
this.word ^= (1l << bit); |
||||||
|
answer = this.literalPosition + bit; |
||||||
|
} |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private final void setRunningLengthWord(RunningLengthWord32 rlw) { |
||||||
|
this.runningLength = wordinbits * rlw.getRunningLength() |
||||||
|
+ this.position; |
||||||
|
if (!rlw.getRunningBit()) { |
||||||
|
this.position = this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
this.wordPosition = this.ewahIter.literalWords(); |
||||||
|
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean runningHasNext() { |
||||||
|
return this.position < this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean literalHasNext() { |
||||||
|
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||||
|
this.word = this.ewahBuffer[this.wordPosition++]; |
||||||
|
this.literalPosition = this.position; |
||||||
|
this.position += wordinbits; |
||||||
|
} |
||||||
|
return this.word != 0; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,91 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.IntIterator; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Implementation of an IntIterator over an IteratingRLW. |
||||||
|
* |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IntIteratorOverIteratingRLW32 implements IntIterator { |
||||||
|
IteratingRLW32 parent; |
||||||
|
private int position; |
||||||
|
private int runningLength; |
||||||
|
private int word; |
||||||
|
private int wordPosition; |
||||||
|
private int wordLength; |
||||||
|
private int literalPosition; |
||||||
|
private boolean hasnext; |
||||||
|
|
||||||
|
/** |
||||||
|
* @param p iterator we wish to iterate over |
||||||
|
*/ |
||||||
|
public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) { |
||||||
|
this.parent = p; |
||||||
|
this.position = 0; |
||||||
|
setupForCurrentRunningLengthWord(); |
||||||
|
this.hasnext = moveToNext(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @return whether we could find another set bit; don't move if there is an unprocessed value |
||||||
|
*/ |
||||||
|
private final boolean moveToNext() { |
||||||
|
while (!runningHasNext() && !literalHasNext()) { |
||||||
|
if (this.parent.next()) |
||||||
|
setupForCurrentRunningLengthWord(); |
||||||
|
else return false; |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return this.hasnext; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public final int next() { |
||||||
|
final int answer; |
||||||
|
if (runningHasNext()) { |
||||||
|
answer = this.position++; |
||||||
|
} else { |
||||||
|
final int bit = Long.numberOfTrailingZeros(this.word); |
||||||
|
this.word ^= (1l << bit); |
||||||
|
answer = this.literalPosition + bit; |
||||||
|
} |
||||||
|
this.hasnext = this.moveToNext(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private final void setupForCurrentRunningLengthWord() { |
||||||
|
this.runningLength = wordinbits * this.parent.getRunningLength() |
||||||
|
+ this.position; |
||||||
|
|
||||||
|
if (!this.parent.getRunningBit()) { |
||||||
|
this.position = this.runningLength; |
||||||
|
} |
||||||
|
this.wordPosition = 0; |
||||||
|
this.wordLength = this.parent.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean runningHasNext() { |
||||||
|
return this.position < this.runningLength; |
||||||
|
} |
||||||
|
|
||||||
|
private final boolean literalHasNext() { |
||||||
|
while (this.word == 0 && this.wordPosition < this.wordLength) { |
||||||
|
this.word = this.parent.getLiteralWordAt(this.wordPosition++); |
||||||
|
this.literalPosition = this.position; |
||||||
|
this.position += wordinbits; |
||||||
|
} |
||||||
|
return this.word != 0; |
||||||
|
} |
||||||
|
} |
||||||
|
|
@ -0,0 +1,274 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically |
||||||
|
* advances to the next BufferedRunningLengthWord32 as words are discarded. |
||||||
|
* |
||||||
|
* @since 0.5.0 |
||||||
|
* @author Daniel Lemire and David McIntosh |
||||||
|
*/ |
||||||
|
public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable { |
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* |
||||||
|
* @param iterator iterator |
||||||
|
*/ |
||||||
|
public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) { |
||||||
|
this.iterator = iterator; |
||||||
|
this.brlw = new BufferedRunningLengthWord32(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset; |
||||||
|
this.buffer = this.iterator.buffer(); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new iterating buffered running length word. |
||||||
|
* @param bitmap over which we want to iterate |
||||||
|
* |
||||||
|
*/ |
||||||
|
public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) { |
||||||
|
this(EWAHIterator32.getEWAHIterator(bitmap)); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Discard first words, iterating to the next running length word if needed. |
||||||
|
* |
||||||
|
* @param x the x |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void discardFirstWords(int x) { |
||||||
|
|
||||||
|
while (x > 0) { |
||||||
|
if (this.brlw.RunningLength > x) { |
||||||
|
this.brlw.RunningLength -= x; |
||||||
|
return; |
||||||
|
} |
||||||
|
x -= this.brlw.RunningLength; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x; |
||||||
|
|
||||||
|
this.literalWordStartPosition += toDiscard; |
||||||
|
this.brlw.NumberOfLiteralWords -= toDiscard; |
||||||
|
x -= toDiscard; |
||||||
|
if ((x > 0) || (this.brlw.size() == 0)) { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0;
|
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
/** |
||||||
|
* Write out up to max words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
public int discharge(BitmapStorage32 container, int max) { |
||||||
|
int index = 0; |
||||||
|
while ((index < max) && (size() > 0)) { |
||||||
|
// first run
|
||||||
|
int pl = getRunningLength(); |
||||||
|
if (index + pl > max) { |
||||||
|
pl = max - index; |
||||||
|
} |
||||||
|
container.addStreamOfEmptyWords(getRunningBit(), pl); |
||||||
|
index += pl; |
||||||
|
int pd = getNumberOfLiteralWords(); |
||||||
|
if (pd + index > max) { |
||||||
|
pd = max - index; |
||||||
|
} |
||||||
|
writeLiteralWords(pd, container); |
||||||
|
discardFirstWords(pl+pd); |
||||||
|
index += pd; |
||||||
|
} |
||||||
|
return index; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max words (negated), returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
public int dischargeNegated(BitmapStorage32 container, int max) { |
||||||
|
int index = 0; |
||||||
|
while ((index < max) && (size() > 0)) { |
||||||
|
// first run
|
||||||
|
int pl = getRunningLength(); |
||||||
|
if (index + pl > max) { |
||||||
|
pl = max - index; |
||||||
|
} |
||||||
|
container.addStreamOfEmptyWords(!getRunningBit(), pl); |
||||||
|
index += pl; |
||||||
|
int pd = getNumberOfLiteralWords(); |
||||||
|
if (pd + index > max) { |
||||||
|
pd = max - index; |
||||||
|
} |
||||||
|
writeNegatedLiteralWords(pd, container); |
||||||
|
discardFirstWords(pl+pd); |
||||||
|
index += pd; |
||||||
|
} |
||||||
|
return index; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Move to the next RunningLengthWord |
||||||
|
* @return whether the move was possible |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
if (!this.iterator.hasNext()) { |
||||||
|
this.brlw.NumberOfLiteralWords = 0; |
||||||
|
this.brlw.RunningLength = 0; |
||||||
|
return false; |
||||||
|
} |
||||||
|
this.brlw.reset(this.iterator.next()); |
||||||
|
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
|
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out the remain words, transforming them to zeroes. |
||||||
|
* @param container target for writes |
||||||
|
*/ |
||||||
|
public void dischargeAsEmpty(BitmapStorage32 container) { |
||||||
|
while(size()>0) { |
||||||
|
container.addStreamOfEmptyWords(false, size()); |
||||||
|
discardFirstWords(size()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out the remaining words |
||||||
|
* @param container target for writes |
||||||
|
*/ |
||||||
|
public void discharge(BitmapStorage32 container) { |
||||||
|
// fix the offset
|
||||||
|
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords(); |
||||||
|
discharge(this.brlw, this.iterator, container); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Get the nth literal word for the current running length word |
||||||
|
* @param index zero based index |
||||||
|
* @return the literal word |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getLiteralWordAt(int index) { |
||||||
|
return this.buffer[this.literalWordStartPosition + index]; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words for the current running length word. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return this.brlw.NumberOfLiteralWords; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return this.brlw.RunningBit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int getRunningLength() { |
||||||
|
return this.brlw.RunningLength; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Size in uncompressed words of the current running length word. |
||||||
|
* |
||||||
|
* @return the int |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public int size() { |
||||||
|
return this.brlw.size(); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration. |
||||||
|
* @param numWords number of words to be written |
||||||
|
* @param container where we write the data |
||||||
|
*/ |
||||||
|
public void writeLiteralWords(int numWords, BitmapStorage32 container) { |
||||||
|
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration. |
||||||
|
* @param numWords number of words to be written |
||||||
|
* @param container where we write the data |
||||||
|
*/ |
||||||
|
public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) { |
||||||
|
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* For internal use. (One could use the non-static discharge method instead, |
||||||
|
* but we expect them to be slower.) |
||||||
|
* |
||||||
|
* @param initialWord |
||||||
|
* the initial word |
||||||
|
* @param iterator |
||||||
|
* the iterator |
||||||
|
* @param container |
||||||
|
* the container |
||||||
|
*/ |
||||||
|
protected static void discharge( |
||||||
|
final BufferedRunningLengthWord32 initialWord, |
||||||
|
final EWAHIterator32 iterator, final BitmapStorage32 container) { |
||||||
|
BufferedRunningLengthWord32 runningLengthWord = initialWord; |
||||||
|
for (;;) { |
||||||
|
final int runningLength = runningLengthWord.getRunningLength(); |
||||||
|
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), |
||||||
|
runningLength); |
||||||
|
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords() |
||||||
|
+ runningLengthWord.literalwordoffset, |
||||||
|
runningLengthWord.getNumberOfLiteralWords()); |
||||||
|
if (!iterator.hasNext()) |
||||||
|
break; |
||||||
|
runningLengthWord = new BufferedRunningLengthWord32(iterator.next()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override |
||||||
|
public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException { |
||||||
|
IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone(); |
||||||
|
answer.brlw = this.brlw.clone(); |
||||||
|
answer.buffer = this.buffer; |
||||||
|
answer.iterator = this.iterator.clone(); |
||||||
|
answer.literalWordStartPosition = this.literalWordStartPosition; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
private BufferedRunningLengthWord32 brlw; |
||||||
|
private int[] buffer; |
||||||
|
private int literalWordStartPosition; |
||||||
|
private EWAHIterator32 iterator; |
||||||
|
} |
@ -0,0 +1,42 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* High-level iterator over a compressed bitmap. |
||||||
|
* |
||||||
|
*/ |
||||||
|
public interface IteratingRLW32 { |
||||||
|
/** |
||||||
|
* @return whether there is more |
||||||
|
*/ |
||||||
|
public boolean next() ; |
||||||
|
/** |
||||||
|
* @param index where the literal word is |
||||||
|
* @return the literal word at the given index. |
||||||
|
*/ |
||||||
|
public int getLiteralWordAt(int index); |
||||||
|
/** |
||||||
|
* @return the number of literal (non-fill) words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() ; |
||||||
|
/** |
||||||
|
* @return the bit used for the fill bits |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() ; |
||||||
|
/** |
||||||
|
* @return sum of getRunningLength() and getNumberOfLiteralWords() |
||||||
|
*/ |
||||||
|
public int size() ; |
||||||
|
/** |
||||||
|
* @return length of the run of fill words |
||||||
|
*/ |
||||||
|
public int getRunningLength() ; |
||||||
|
/** |
||||||
|
* @param x the number of words to discard |
||||||
|
*/ |
||||||
|
public void discardFirstWords(int x); |
||||||
|
} |
@ -0,0 +1,601 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
import java.util.Arrays; |
||||||
|
import java.util.Iterator; |
||||||
|
import java.util.LinkedList; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.CloneableIterator; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Set of helper functions to aggregate bitmaps. |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IteratorAggregation32 { |
||||||
|
/** |
||||||
|
* @param x iterator to negate |
||||||
|
* @return negated version of the iterator |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 not(final IteratingRLW32 x) { |
||||||
|
return new IteratingRLW32() { |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean next() { |
||||||
|
return x.next(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public int getLiteralWordAt(int index) { |
||||||
|
return ~x.getLiteralWordAt(index); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return x.getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean getRunningBit() { |
||||||
|
return ! x.getRunningBit(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public int size() { |
||||||
|
return x.size(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public int getRunningLength() { |
||||||
|
return x.getRunningLength(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void discardFirstWords(int y) { |
||||||
|
x.discardFirstWords(y); |
||||||
|
} |
||||||
|
|
||||||
|
}; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @return and aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedand(final IteratingRLW32... al) { |
||||||
|
return bufferedand (DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||||
|
* @return and aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||||
|
for (IteratingRLW32 i : al) |
||||||
|
basell.add(i); |
||||||
|
return new BufferedIterator32(new AndIt(basell,bufsize)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @return or aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedor(final IteratingRLW32... al) { |
||||||
|
return bufferedor(DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||||
|
* @return or aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
|
||||||
|
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||||
|
for (IteratingRLW32 i : al) |
||||||
|
basell.add(i); |
||||||
|
return new BufferedIterator32(new ORIt(basell,bufsize)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @return xor aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) { |
||||||
|
return bufferedxor (DEFAULTMAXBUFSIZE,al); |
||||||
|
} |
||||||
|
/** |
||||||
|
* Aggregate the iterators using a bitmap buffer. |
||||||
|
* |
||||||
|
* @param al iterators to aggregate |
||||||
|
* @param bufsize size of the internal buffer used by the iterator in 64-bit words |
||||||
|
* @return xor aggregate |
||||||
|
*/ |
||||||
|
public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) { |
||||||
|
if (al.length == 0) |
||||||
|
throw new IllegalArgumentException("Need at least one iterator"); |
||||||
|
if (al.length == 1) |
||||||
|
return al[0]; |
||||||
|
|
||||||
|
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>(); |
||||||
|
for (IteratingRLW32 i : al) |
||||||
|
basell.add(i); |
||||||
|
return new BufferedIterator32(new XORIt(basell,bufsize)); |
||||||
|
} |
||||||
|
/** |
||||||
|
* Write out the content of the iterator, but as if it were all zeros. |
||||||
|
* |
||||||
|
* @param container |
||||||
|
* where we write |
||||||
|
* @param i |
||||||
|
* the iterator |
||||||
|
*/ |
||||||
|
protected static void dischargeAsEmpty(final BitmapStorage32 container, |
||||||
|
final IteratingRLW32 i) { |
||||||
|
while (i.size() > 0) { |
||||||
|
container.addStreamOfEmptyWords(false, i.size()); |
||||||
|
i.next(); |
||||||
|
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param i source of data |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) { |
||||||
|
int counter = 0; |
||||||
|
while (i.size() > 0 && counter < max) { |
||||||
|
int L1 = i.getRunningLength(); |
||||||
|
if (L1 > 0) { |
||||||
|
if (L1 + counter > max) |
||||||
|
L1 = max - counter; |
||||||
|
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||||
|
counter += L1; |
||||||
|
} |
||||||
|
int L = i.getNumberOfLiteralWords(); |
||||||
|
if(L + counter > max) L = max - counter; |
||||||
|
for (int k = 0; k < L; ++k) { |
||||||
|
container.add(i.getLiteralWordAt(k)); |
||||||
|
} |
||||||
|
counter += L; |
||||||
|
i.discardFirstWords(L+L1); |
||||||
|
} |
||||||
|
return counter; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Write out up to max negated words, returns how many were written |
||||||
|
* @param container target for writes |
||||||
|
* @param i source of data |
||||||
|
* @param max maximal number of writes |
||||||
|
* @return how many written |
||||||
|
*/ |
||||||
|
protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) { |
||||||
|
int counter = 0; |
||||||
|
while (i.size() > 0 && counter < max) { |
||||||
|
int L1 = i.getRunningLength(); |
||||||
|
if (L1 > 0) { |
||||||
|
if (L1 + counter > max) |
||||||
|
L1 = max - counter; |
||||||
|
container.addStreamOfEmptyWords(i.getRunningBit(), L1); |
||||||
|
counter += L1; |
||||||
|
} |
||||||
|
int L = i.getNumberOfLiteralWords(); |
||||||
|
if(L + counter > max) L = max - counter; |
||||||
|
for (int k = 0; k < L; ++k) { |
||||||
|
container.add(i.getLiteralWordAt(k)); |
||||||
|
} |
||||||
|
counter += L; |
||||||
|
i.discardFirstWords(L+L1); |
||||||
|
} |
||||||
|
return counter; |
||||||
|
} |
||||||
|
|
||||||
|
static void andToContainer(final BitmapStorage32 container, |
||||||
|
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||||
|
prey.discardFirstWords(predator.getRunningLength()); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
final int index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
desiredrlwcount -= nbre_literal; |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void andToContainer(final BitmapStorage32 container, |
||||||
|
final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength()); |
||||||
|
prey.discardFirstWords(predator.getRunningLength()); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
final int index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* Compute the first few words of the XOR aggregate between two iterators. |
||||||
|
* |
||||||
|
* @param container where to write |
||||||
|
* @param desiredrlwcount number of words to be written (max) |
||||||
|
* @param rlwi first iterator to aggregate |
||||||
|
* @param rlwj second iterator to aggregate |
||||||
|
*/ |
||||||
|
public static void xorToContainer(final BitmapStorage32 container, |
||||||
|
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) { |
||||||
|
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) { |
||||||
|
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) { |
||||||
|
final boolean i_is_prey = rlwi.getRunningLength() < rlwj |
||||||
|
.getRunningLength(); |
||||||
|
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj; |
||||||
|
final IteratingRLW32 predator = i_is_prey ? rlwj |
||||||
|
: rlwi; |
||||||
|
if (predator.getRunningBit() == false) { |
||||||
|
int index = discharge(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(false, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} else { |
||||||
|
int index = dischargeNegated(container, prey, predator.getRunningLength()); |
||||||
|
container.addStreamOfEmptyWords(true, predator.getRunningLength() |
||||||
|
- index); |
||||||
|
predator.discardFirstWords(predator.getRunningLength()); |
||||||
|
} |
||||||
|
} |
||||||
|
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), |
||||||
|
rlwj.getNumberOfLiteralWords()); |
||||||
|
if (nbre_literal > 0) { |
||||||
|
desiredrlwcount -= nbre_literal; |
||||||
|
for (int k = 0; k < nbre_literal; ++k) |
||||||
|
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k)); |
||||||
|
rlwi.discardFirstWords(nbre_literal); |
||||||
|
rlwj.discardFirstWords(nbre_literal); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
protected static int inplaceor(int[] bitmap, |
||||||
|
IteratingRLW32 i) { |
||||||
|
int pos = 0; |
||||||
|
int s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = i.getRunningLength(); |
||||||
|
if (i.getRunningBit()) |
||||||
|
Arrays.fill(bitmap, pos, pos + L, ~0); |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = i.getRunningLength(); |
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (i.getRunningBit()) { |
||||||
|
Arrays.fill(bitmap, pos, bitmap.length, ~0); |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (i.getRunningBit()) |
||||||
|
Arrays.fill(bitmap, pos, pos + L, ~0); |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] |= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
protected static int inplacexor(int[] bitmap, |
||||||
|
IteratingRLW32 i) { |
||||||
|
int pos = 0; |
||||||
|
int s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = i.getRunningLength(); |
||||||
|
if (i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
} |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = i.getRunningLength(); |
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < bitmap.length; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (i.getRunningBit()) |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = ~bitmap[k]; |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] ^= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
protected static int inplaceand(int[] bitmap, |
||||||
|
IteratingRLW32 i) { |
||||||
|
int pos = 0; |
||||||
|
int s; |
||||||
|
while ((s = i.size()) > 0) { |
||||||
|
if (pos + s < bitmap.length) { |
||||||
|
final int L = i.getRunningLength(); |
||||||
|
if (!i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
} |
||||||
|
pos += L; |
||||||
|
final int LR = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < LR; ++k) |
||||||
|
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||||
|
if (!i.next()) { |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} else { |
||||||
|
int howmany = bitmap.length - pos; |
||||||
|
int L = i.getRunningLength(); |
||||||
|
if (pos + L > bitmap.length) { |
||||||
|
if (!i.getRunningBit()) { |
||||||
|
for(int k = pos ; k < bitmap.length; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
} |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return bitmap.length; |
||||||
|
} |
||||||
|
if (!i.getRunningBit()) |
||||||
|
for(int k = pos ; k < pos + L; ++k) |
||||||
|
bitmap[k] = 0; |
||||||
|
pos += L; |
||||||
|
for (int k = 0; pos < bitmap.length; ++k) |
||||||
|
bitmap[pos++] &= i.getLiteralWordAt(k); |
||||||
|
i.discardFirstWords(howmany); |
||||||
|
return pos; |
||||||
|
} |
||||||
|
} |
||||||
|
return pos; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* An optimization option. Larger values may improve speed, but at |
||||||
|
* the expense of memory. |
||||||
|
*/ |
||||||
|
public final static int DEFAULTMAXBUFSIZE = 65536; |
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
class ORIt implements CloneableIterator<EWAHIterator32> { |
||||||
|
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||||
|
int[] hardbitmap; |
||||||
|
LinkedList<IteratingRLW32> ll; |
||||||
|
|
||||||
|
ORIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.hardbitmap = new int[bufsize]; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public XORIt clone() throws CloneNotSupportedException { |
||||||
|
XORIt answer = (XORIt) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.hardbitmap = this.hardbitmap.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator32 next() { |
||||||
|
this.buffer.clear(); |
||||||
|
int effective = 0; |
||||||
|
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||||
|
while (i.hasNext()) { |
||||||
|
IteratingRLW32 rlw = i.next(); |
||||||
|
if (rlw.size() > 0) { |
||||||
|
int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
i.remove(); |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
this.buffer.add(this.hardbitmap[k]); |
||||||
|
Arrays.fill(this.hardbitmap, 0); |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
class XORIt implements CloneableIterator<EWAHIterator32> { |
||||||
|
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||||
|
int[] hardbitmap; |
||||||
|
LinkedList<IteratingRLW32> ll; |
||||||
|
|
||||||
|
XORIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.hardbitmap = new int[bufsize]; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public XORIt clone() throws CloneNotSupportedException { |
||||||
|
XORIt answer = (XORIt) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.hardbitmap = this.hardbitmap.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator32 next() { |
||||||
|
this.buffer.clear(); |
||||||
|
int effective = 0; |
||||||
|
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||||
|
while (i.hasNext()) { |
||||||
|
IteratingRLW32 rlw = i.next(); |
||||||
|
if (rlw.size() > 0) { |
||||||
|
int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw); |
||||||
|
if (eff > effective) |
||||||
|
effective = eff; |
||||||
|
} else |
||||||
|
i.remove(); |
||||||
|
} |
||||||
|
for (int k = 0; k < effective; ++k) |
||||||
|
this.buffer.add(this.hardbitmap[k]); |
||||||
|
Arrays.fill(this.hardbitmap, 0); |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
class AndIt implements CloneableIterator<EWAHIterator32> { |
||||||
|
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32(); |
||||||
|
LinkedList<IteratingRLW32> ll; |
||||||
|
int buffersize; |
||||||
|
|
||||||
|
public AndIt(LinkedList<IteratingRLW32> basell, final int bufsize) { |
||||||
|
this.ll = basell; |
||||||
|
this.buffersize = bufsize; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return !this.ll.isEmpty(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public AndIt clone() throws CloneNotSupportedException { |
||||||
|
AndIt answer = (AndIt) super.clone(); |
||||||
|
answer.buffer = this.buffer.clone(); |
||||||
|
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone(); |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public EWAHIterator32 next() { |
||||||
|
this.buffer.clear(); |
||||||
|
IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(), |
||||||
|
this.ll.get(0), this.ll.get(1)); |
||||||
|
if (this.ll.size() > 2) { |
||||||
|
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||||
|
i.next(); |
||||||
|
i.next(); |
||||||
|
EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32(); |
||||||
|
while (i.hasNext() && this.buffer.sizeInBytes() > 0) { |
||||||
|
IteratorAggregation32.andToContainer(tmpbuffer, |
||||||
|
this.buffer.getIteratingRLW(), i.next()); |
||||||
|
this.buffer.swap(tmpbuffer); |
||||||
|
tmpbuffer.clear(); |
||||||
|
} |
||||||
|
} |
||||||
|
Iterator<IteratingRLW32> i = this.ll.iterator(); |
||||||
|
while(i.hasNext()) { |
||||||
|
if(i.next().size() == 0) { |
||||||
|
this.ll.clear(); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
return this.buffer.getEWAHIterator(); |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,135 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
import java.util.Iterator; |
||||||
|
|
||||||
|
import com.fr.third.googlecode.javaewah.IntIterator; |
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* Convenience functions for working over iterators |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class IteratorUtil32 { |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return an iterator over the set bits corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) { |
||||||
|
return new IntIteratorOverIteratingRLW32(i); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return an iterator over the set bits corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW32 i) { |
||||||
|
return new Iterator<Integer>() { |
||||||
|
@Override |
||||||
|
public boolean hasNext() { |
||||||
|
return this.under.hasNext(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public Integer next() { |
||||||
|
return new Integer(this.under.next()); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void remove() { |
||||||
|
} |
||||||
|
|
||||||
|
final private IntIterator under = toSetBitsIntIterator(i); |
||||||
|
}; |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Turn an iterator into a bitmap |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @param c where we write |
||||||
|
*/ |
||||||
|
public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) { |
||||||
|
while (true) { |
||||||
|
if (i.getRunningLength() > 0) { |
||||||
|
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength()); |
||||||
|
} |
||||||
|
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||||
|
c.add(i.getLiteralWordAt(k)); |
||||||
|
if (!i.next()) |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* @param i iterator we wish to iterate over |
||||||
|
* @return the cardinality (number of set bits) corresponding to the iterator |
||||||
|
*/ |
||||||
|
public static int cardinality(final IteratingRLW32 i) { |
||||||
|
int answer = 0; |
||||||
|
while (true) { |
||||||
|
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits; |
||||||
|
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k) |
||||||
|
answer += Long.bitCount(i.getLiteralWordAt(k)); |
||||||
|
if(!i.next()) break; |
||||||
|
} |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* |
||||||
|
* @param x set of bitmaps we wish to iterate over |
||||||
|
* @return an array of iterators corresponding to the array of bitmaps |
||||||
|
*/ |
||||||
|
public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) { |
||||||
|
IteratingRLW32[] X = new IteratingRLW32[x.length]; |
||||||
|
for (int k = 0; k < X.length; ++k) { |
||||||
|
X[k] = new IteratingBufferedRunningLengthWord32(x[k]); |
||||||
|
} |
||||||
|
return X; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Turn an iterator into a bitmap |
||||||
|
* |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @param c where we write |
||||||
|
* @param Max maximum number of words to materialize |
||||||
|
* @return how many words were actually materialized |
||||||
|
*/ |
||||||
|
public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) { |
||||||
|
final int origMax = Max; |
||||||
|
while (true) { |
||||||
|
if (i.getRunningLength() > 0) { |
||||||
|
int L = i.getRunningLength(); |
||||||
|
if(L > Max) L = Max; |
||||||
|
c.addStreamOfEmptyWords(i.getRunningBit(), L); |
||||||
|
Max -= L; |
||||||
|
} |
||||||
|
long L = i.getNumberOfLiteralWords(); |
||||||
|
for (int k = 0; k < L; ++k) |
||||||
|
c.add(i.getLiteralWordAt(k)); |
||||||
|
if(Max>0) { |
||||||
|
if (!i.next()) |
||||||
|
break; |
||||||
|
} |
||||||
|
else break; |
||||||
|
} |
||||||
|
return origMax - Max; |
||||||
|
} |
||||||
|
/** |
||||||
|
* Turn an iterator into a bitmap |
||||||
|
* |
||||||
|
* @param i iterator we wish to materialize |
||||||
|
* @return materialized version of the iterator |
||||||
|
*/ |
||||||
|
public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) { |
||||||
|
EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32(); |
||||||
|
materialize(i, ewah); |
||||||
|
return ewah; |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,87 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
/** |
||||||
|
* This is a BitmapStorage that can be used to determine quickly |
||||||
|
* if the result of an operation is non-trivial... that is, whether |
||||||
|
* there will be at least on set bit. |
||||||
|
* |
||||||
|
* @since 0.5.0 |
||||||
|
* @author Daniel Lemire and Veronika Zenz |
||||||
|
* |
||||||
|
*/ |
||||||
|
public class NonEmptyVirtualStorage32 implements BitmapStorage32 { |
||||||
|
static class NonEmptyException extends RuntimeException { |
||||||
|
private static final long serialVersionUID = 1L; |
||||||
|
|
||||||
|
/** |
||||||
|
* Do not fill in the stack trace for this exception |
||||||
|
* for performance reasons. |
||||||
|
* |
||||||
|
* @return this instance |
||||||
|
* @see Throwable#fillInStackTrace() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public synchronized Throwable fillInStackTrace() { |
||||||
|
return this; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
private static final NonEmptyException nonEmptyException = new NonEmptyException(); |
||||||
|
|
||||||
|
|
||||||
|
/** |
||||||
|
* If the word to be added is non-zero, a NonEmptyException exception is thrown. |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void add(int newdata) { |
||||||
|
if(newdata!=0) throw nonEmptyException; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* throws a NonEmptyException exception when number is greater than 0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfLiteralWords(int[] data, int start, int number) { |
||||||
|
if (number > 0){ |
||||||
|
throw nonEmptyException; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception, |
||||||
|
* otherwise, nothing happens. |
||||||
|
* |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfEmptyWords(boolean v, int number) { |
||||||
|
if(v && (number>0)) throw nonEmptyException; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* throws a NonEmptyException exception when number is greater than 0 |
||||||
|
* |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) { |
||||||
|
if (number > 0){ |
||||||
|
throw nonEmptyException; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Does nothing. |
||||||
|
* |
||||||
|
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int) |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public void setSizeInBits(int bits) { |
||||||
|
} |
||||||
|
|
||||||
|
} |
@ -0,0 +1,152 @@ |
|||||||
|
package com.fr.third.googlecode.javaewah32; |
||||||
|
|
||||||
|
/* |
||||||
|
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser |
||||||
|
* Licensed under the Apache License, Version 2.0. |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* Mostly for internal use. |
||||||
|
* |
||||||
|
* @since 0.5.0 |
||||||
|
* @author Daniel Lemire |
||||||
|
*/ |
||||||
|
public final class RunningLengthWord32 implements Cloneable { |
||||||
|
|
||||||
|
/** |
||||||
|
* Instantiates a new running length word. |
||||||
|
* |
||||||
|
* @param a |
||||||
|
* an array of 32-bit words |
||||||
|
* @param p |
||||||
|
* position in the array where the running length word is |
||||||
|
* located. |
||||||
|
*/ |
||||||
|
RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) { |
||||||
|
this.parent = a; |
||||||
|
this.position = p; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the number of literal words. |
||||||
|
* |
||||||
|
* @return the number of literal words |
||||||
|
*/ |
||||||
|
public int getNumberOfLiteralWords() { |
||||||
|
return (this.parent.buffer[this.position] >>> (1 + runninglengthbits)); |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running bit. |
||||||
|
* |
||||||
|
* @return the running bit |
||||||
|
*/ |
||||||
|
public boolean getRunningBit() { |
||||||
|
return (this.parent.buffer[this.position] & 1) != 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Gets the running length. |
||||||
|
* |
||||||
|
* @return the running length |
||||||
|
*/ |
||||||
|
public int getRunningLength() { |
||||||
|
return (this.parent.buffer[this.position] >>> 1) |
||||||
|
& largestrunninglengthcount; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the number of literal words. |
||||||
|
* |
||||||
|
* @param number |
||||||
|
* the new number of literal words |
||||||
|
*/ |
||||||
|
public void setNumberOfLiteralWords(final int number) { |
||||||
|
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit; |
||||||
|
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1)) |
||||||
|
| runninglengthplusrunningbit; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running bit. |
||||||
|
* |
||||||
|
* @param b |
||||||
|
* the new running bit |
||||||
|
*/ |
||||||
|
public void setRunningBit(final boolean b) { |
||||||
|
if (b) |
||||||
|
this.parent.buffer[this.position] |= 1; |
||||||
|
else |
||||||
|
this.parent.buffer[this.position] &= ~1; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Sets the running length. |
||||||
|
* |
||||||
|
* @param number |
||||||
|
* the new running length |
||||||
|
*/ |
||||||
|
public void setRunningLength(final int number) { |
||||||
|
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount; |
||||||
|
this.parent.buffer[this.position] &= (number << 1) |
||||||
|
| notshiftedlargestrunninglengthcount; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Return the size in uncompressed words represented by this running |
||||||
|
* length word. |
||||||
|
* |
||||||
|
* @return the int |
||||||
|
*/ |
||||||
|
public int size() { |
||||||
|
return getRunningLength() + getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
* @see java.lang.Object#toString() |
||||||
|
*/ |
||||||
|
@Override |
||||||
|
public String toString() { |
||||||
|
return "running bit = " + getRunningBit() |
||||||
|
+ " running length = " + getRunningLength() |
||||||
|
+ " number of lit. words " + getNumberOfLiteralWords(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public RunningLengthWord32 clone() throws CloneNotSupportedException { |
||||||
|
RunningLengthWord32 answer; |
||||||
|
answer = (RunningLengthWord32) super.clone(); |
||||||
|
answer.parent = this.parent; |
||||||
|
answer.position = this.position; |
||||||
|
return answer; |
||||||
|
} |
||||||
|
|
||||||
|
/** The array of words. */ |
||||||
|
public EWAHCompressedBitmap32 parent; |
||||||
|
|
||||||
|
/** The position in array. */ |
||||||
|
public int position; |
||||||
|
|
||||||
|
/** |
||||||
|
* number of bits dedicated to marking of the running length of clean |
||||||
|
* words |
||||||
|
*/ |
||||||
|
public static final int runninglengthbits = 16; |
||||||
|
|
||||||
|
private static final int literalbits = 32 - 1 - runninglengthbits; |
||||||
|
|
||||||
|
/** largest number of literal words in a run. */ |
||||||
|
public static final int largestliteralcount = (1 << literalbits) - 1; |
||||||
|
|
||||||
|
/** largest number of clean words in a run */ |
||||||
|
public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1; |
||||||
|
|
||||||
|
private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1; |
||||||
|
|
||||||
|
private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1; |
||||||
|
|
||||||
|
private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit; |
||||||
|
|
||||||
|
private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount; |
||||||
|
|
||||||
|
} |
Loading…
Reference in new issue