diff --git a/build.third_step6.gradle b/build.third_step6.gradle index 7b416a1f2..6a77d1092 100644 --- a/build.third_step6.gradle +++ b/build.third_step6.gradle @@ -45,7 +45,8 @@ sourceSets{ "${srcDir}/fine-kryo/src", "${srcDir}/fine-lz4/src", "${srcDir}/fine-log4j/src", - "${srcDir}/fine-jgit/src" + "${srcDir}/fine-jgit/src", + "${srcDir}/fine-roaringbitmap/src" ] } } @@ -118,6 +119,7 @@ task copyFiles(type:Copy,dependsOn:'compileJava'){ with dataContent.call("${srcDir}/fine-log4j/resources") with dataContent.call("${srcDir}/fine-jgit/src") with dataContent.call("${srcDir}/fine-jgit/resources") + with dataContent.call("${srcDir}/fine-roaringbitmap/src") into "${classesDir}" } } diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ArrayContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ArrayContainer.java new file mode 100644 index 000000000..bdf19e669 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ArrayContainer.java @@ -0,0 +1,1240 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableArrayContainer; +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableContainer; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.Iterator; + + +/** + * Simple container made of an array of 16-bit integers + */ +public final class ArrayContainer extends Container implements Cloneable { + static final int DEFAULT_MAX_SIZE = 4096;// containers with DEFAULT_MAX_SZE or less integers + private static final int DEFAULT_INIT_SIZE = 4; + private static final int ARRAY_LAZY_LOWERBOUND = 1024; + // should be ArrayContainers + private static final long serialVersionUID = 1L; + protected int cardinality = 0; + short[] content; + + /** + * Create an array container with default capacity + */ + public ArrayContainer() { + this(DEFAULT_INIT_SIZE); + } + + + /** + * Create an array container with specified capacity + * + * @param capacity The capacity of the container + */ + public ArrayContainer(final int capacity) { + content = new short[capacity]; + } + + /** + * Create an array container with a run of ones from firstOfRun to lastOfRun, inclusive. Caller is + * responsible for making sure the range is small enough that ArrayContainer is appropriate. + * + * @param firstOfRun first index + * @param lastOfRun last index (range is exclusive) + */ + public ArrayContainer(final int firstOfRun, final int lastOfRun) { + final int valuesInRange = lastOfRun - firstOfRun; + this.content = new short[valuesInRange]; + for (int i = 0; i < valuesInRange; ++i) { + content[i] = (short) (firstOfRun + i); + } + cardinality = valuesInRange; + } + + /** + * Create a new container, no copy is made + * + * @param newCard desired cardinality + * @param newContent actual values (length should equal or exceed cardinality) + */ + public ArrayContainer(int newCard, short[] newContent) { + this.cardinality = newCard; + this.content = Arrays.copyOf(newContent, newCard); + } + + /** + * Creates a new non-mappeable container from a mappeable one. This copies the data. + * + * @param bc the original container + */ + public ArrayContainer(MappeableArrayContainer bc) { + this.cardinality = bc.getCardinality(); + this.content = bc.toShortArray(); + } + + protected ArrayContainer(short[] newContent) { + this.cardinality = newContent.length; + this.content = newContent; + } + + protected static int serializedSizeInBytes(int cardinality) { + return cardinality * 2 + 2; + } + + @Override + public Container add(int begin, int end) { + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + // TODO: may need to convert to a RunContainer + int indexstart = Util.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = Util.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = end - begin; + int newcardinality = indexstart + (cardinality - indexend) + rangelength; + if (newcardinality > DEFAULT_MAX_SIZE) { + BitmapContainer a = this.toBitmapContainer(); + return a.iadd(begin, end); + } + ArrayContainer answer = new ArrayContainer(newcardinality, content); + System.arraycopy(content, indexend, answer.content, indexstart + rangelength, + cardinality - indexend); + for (int k = 0; k < rangelength; ++k) { + answer.content[k + indexstart] = (short) (begin + k); + } + answer.cardinality = newcardinality; + return answer; + } + + + /** + * running time is in O(n) time if insert is not in order. + */ + @Override + public Container add(final short x) { + int loc = Util.unsignedBinarySearch(content, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality = DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + BitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= this.content.length) { + increaseCapacity(); + } + // insertion : shift the elements > x by one position to + // the right + // and put x in it's appropriate place + System.arraycopy(content, -loc - 1, content, -loc, cardinality + loc + 1); + content[-loc - 1] = x; + ++cardinality; + } + return this; + } + + private int advance(ShortIterator it) { + if (it.hasNext()) { + return Util.toIntUnsigned(it.next()); + } else { + return -1; + } + } + + @Override + public ArrayContainer and(final ArrayContainer value2) { + ArrayContainer value1 = this; + final int desiredCapacity = Math.min(value1.getCardinality(), value2.getCardinality()); + ArrayContainer answer = new ArrayContainer(desiredCapacity); + answer.cardinality = Util.unsignedIntersect2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content); + return answer; + } + + @Override + public Container and(BitmapContainer x) { + return x.and(this); + } + + @Override + // see andNot for an approach that might be better. + public Container and(RunContainer x) { + return x.and(this); + } + + @Override + public int andCardinality(final ArrayContainer value2) { + return Util.unsignedLocalIntersect2by2Cardinality(content, cardinality, value2.content, + value2.getCardinality()); + } + + @Override + public int andCardinality(BitmapContainer x) { + return x.andCardinality(this); + } + + @Override + // see andNot for an approach that might be better. + public int andCardinality(RunContainer x) { + return x.andCardinality(this); + } + + @Override + public ArrayContainer andNot(final ArrayContainer value2) { + ArrayContainer value1 = this; + final int desiredCapacity = value1.getCardinality(); + ArrayContainer answer = new ArrayContainer(desiredCapacity); + answer.cardinality = Util.unsignedDifference(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content); + return answer; + } + + @Override + public ArrayContainer andNot(BitmapContainer value2) { + final ArrayContainer answer = new ArrayContainer(content.length); + int pos = 0; + for (int k = 0; k < cardinality; ++k) { + short val = this.content[k]; + if (!value2.contains(val)) { + answer.content[pos++] = val; + } + } + answer.cardinality = pos; + return answer; + } + + @Override + public Container andNot(RunContainer x) { + int writeLocation = 0; + int runStart, runEnd; // the current or upcoming run. + int whichRun; + short[] buffer = new short[cardinality]; + if (x.nbrruns == 0) { + return clone(); + } else { + runStart = Util.toIntUnsigned(x.getValue(0)); + runEnd = runStart + Util.toIntUnsigned(x.getLength(0)); + whichRun = 0; + } + + short val; + for (int i = 0; i < cardinality; ++i) { + val = content[i]; + int valInt = Util.toIntUnsigned(val); + if (valInt < runStart) { + buffer[writeLocation++] = val; + } else if (valInt <= runEnd) { + ; // don't want item + } else { + // greater than this run, need to do an advanceUntil on runs + // done sequentially for now (no galloping attempts). + do { + if (whichRun + 1 < x.nbrruns) { + whichRun++; + runStart = Util.toIntUnsigned(x.getValue(whichRun)); + runEnd = runStart + Util.toIntUnsigned(x.getLength(whichRun)); + } else { + runStart = runEnd = (1 << 16) + 1; // infinity.... + } + } while (valInt > runEnd); + --i; // need to re-process this val + } + } + return new ArrayContainer(writeLocation, buffer); + } + + @Override + public void clear() { + cardinality = 0; + } + + @Override + public ArrayContainer clone() { + return new ArrayContainer(this.cardinality, this.content); + } + + @Override + public boolean contains(final short x) { + return Util.unsignedBinarySearch(content, 0, cardinality, x) >= 0; + } + + @Override + public void deserialize(DataInput in) throws IOException { + this.cardinality = 0xFFFF & Short.reverseBytes(in.readShort()); + if (this.content.length < this.cardinality) { + this.content = new short[this.cardinality]; + } + for (int k = 0; k < this.cardinality; ++k) { + this.content[k] = Short.reverseBytes(in.readShort()); + ; + } + } + + // in order + private void emit(short val) { + if (cardinality == content.length) { + increaseCapacity(true); + } + content[cardinality++] = val; + } + + + @Override + public boolean equals(Object o) { + if (o instanceof ArrayContainer) { + ArrayContainer srb = (ArrayContainer) o; + if (srb.cardinality != this.cardinality) { + return false; + } + for (int i = 0; i < this.cardinality; ++i) { + if (this.content[i] != srb.content[i]) { + return false; + } + } + return true; + } else if (o instanceof RunContainer) { + return o.equals(this); + } + return false; + } + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + for (int k = 0; k < this.cardinality; ++k) { + x[k + i] = Util.toIntUnsigned(this.content[k]) | mask; + } + + } + + @Override + public Container flip(short x) { + int loc = Util.unsignedBinarySearch(content, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality = DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + BitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= this.content.length) { + increaseCapacity(); + } + // insertion : shift the elements > x by one position to + // the right + // and put x in it's appropriate place + System.arraycopy(content, -loc - 1, content, -loc, cardinality + loc + 1); + content[-loc - 1] = x; + ++cardinality; + } else { + System.arraycopy(content, loc + 1, content, loc, cardinality - loc - 1); + --cardinality; + } + return this; + } + + @Override + protected int getArraySizeInBytes() { + return cardinality * 2; + } + + @Override + public int getCardinality() { + return cardinality; + } + + @Override + public ShortIterator getReverseShortIterator() { + return new ReverseArrayContainerShortIterator(this); + } + + @Override + public PeekableShortIterator getShortIterator() { + return new ArrayContainerShortIterator(this); + } + + + @Override + public int getSizeInBytes() { + return this.cardinality * 2 + 4; + } + + @Override + public int hashCode() { + int hash = 0; + for (int k = 0; k < cardinality; ++k) { + hash += 31 * hash + content[k]; + } + return hash; + } + + @Override + public Container iadd(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = Util.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = Util.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = end - begin; + int newcardinality = indexstart + (cardinality - indexend) + rangelength; + if (newcardinality > DEFAULT_MAX_SIZE) { + BitmapContainer a = this.toBitmapContainer(); + return a.iadd(begin, end); + } + if (newcardinality >= this.content.length) { + increaseCapacity(newcardinality); + } + System.arraycopy(content, indexend, content, indexstart + rangelength, cardinality - indexend); + for (int k = 0; k < rangelength; ++k) { + content[k + indexstart] = (short) (begin + k); + } + cardinality = newcardinality; + return this; + } + + + @Override + public ArrayContainer iand(final ArrayContainer value2) { + ArrayContainer value1 = this; + value1.cardinality = Util.unsignedIntersect2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), value1.content); + return this; + } + + @Override + public Container iand(BitmapContainer value2) { + int pos = 0; + for (int k = 0; k < cardinality; ++k) { + short v = this.content[k]; + if (value2.contains(v)) { + this.content[pos++] = v; + } + } + cardinality = pos; + return this; + } + + @Override + public Container iand(RunContainer x) { + // possible performance issue, not taking advantage of possible inplace + return x.and(this); + } + + + @Override + public ArrayContainer iandNot(final ArrayContainer value2) { + this.cardinality = Util.unsignedDifference(this.content, this.getCardinality(), value2.content, + value2.getCardinality(), this.content); + return this; + } + + @Override + public ArrayContainer iandNot(BitmapContainer value2) { + int pos = 0; + for (int k = 0; k < cardinality; ++k) { + short v = this.content[k]; + if (!value2.contains(v)) { + this.content[pos++] = v; + } + } + this.cardinality = pos; + return this; + } + + @Override + public Container iandNot(RunContainer x) { + // possible performance issue, not taking advantage of possible inplace + // could adapt algo above + return andNot(x); + } + + private void increaseCapacity() { + increaseCapacity(false); + } + + + // temporarily allow an illegally large size, as long as the operation creating + // the illegal container does not return it. + private void increaseCapacity(boolean allowIllegalSize) { + int newCapacity = (this.content.length == 0) ? DEFAULT_INIT_SIZE + : this.content.length < 64 ? this.content.length * 2 + : this.content.length < 1067 ? this.content.length * 3 / 2 + : this.content.length * 5 / 4; + // never allocate more than we will ever need + if (newCapacity > ArrayContainer.DEFAULT_MAX_SIZE && !allowIllegalSize) { + newCapacity = ArrayContainer.DEFAULT_MAX_SIZE; + } + // if we are within 1/16th of the max, go to max + if (newCapacity > ArrayContainer.DEFAULT_MAX_SIZE - ArrayContainer.DEFAULT_MAX_SIZE / 16 + && !allowIllegalSize) { + newCapacity = ArrayContainer.DEFAULT_MAX_SIZE; + } + this.content = Arrays.copyOf(this.content, newCapacity); + } + + private void increaseCapacity(int min) { + int newCapacity = (this.content.length == 0) ? DEFAULT_INIT_SIZE + : this.content.length < 64 ? this.content.length * 2 + : this.content.length < 1024 ? this.content.length * 3 / 2 + : this.content.length * 5 / 4; + if (newCapacity < min) { + newCapacity = min; + } + // never allocate more than we will ever need + if (newCapacity > ArrayContainer.DEFAULT_MAX_SIZE) { + newCapacity = ArrayContainer.DEFAULT_MAX_SIZE; + } + // if we are within 1/16th of the max, go to max + if (newCapacity > ArrayContainer.DEFAULT_MAX_SIZE - ArrayContainer.DEFAULT_MAX_SIZE / 16) { + newCapacity = ArrayContainer.DEFAULT_MAX_SIZE; + } + this.content = Arrays.copyOf(this.content, newCapacity); + } + + @Override + public Container inot(final int firstOfRange, final int lastOfRange) { + // TODO: may need to convert to a RunContainer + // determine the span of array indices to be affected + int startIndex = Util.unsignedBinarySearch(content, 0, cardinality, (short) firstOfRange); + if (startIndex < 0) { + startIndex = -startIndex - 1; + } + int lastIndex = Util.unsignedBinarySearch(content, 0, cardinality, (short) (lastOfRange - 1)); + if (lastIndex < 0) { + lastIndex = -lastIndex - 1 - 1; + } + final int currentValuesInRange = lastIndex - startIndex + 1; + final int spanToBeFlipped = lastOfRange - firstOfRange; + final int newValuesInRange = spanToBeFlipped - currentValuesInRange; + final short[] buffer = new short[newValuesInRange]; + final int cardinalityChange = newValuesInRange - currentValuesInRange; + final int newCardinality = cardinality + cardinalityChange; + + if (cardinalityChange > 0) { // expansion, right shifting needed + if (newCardinality > content.length) { + // so big we need a bitmap? + if (newCardinality > DEFAULT_MAX_SIZE) { + return toBitmapContainer().inot(firstOfRange, lastOfRange); + } + content = Arrays.copyOf(content, newCardinality); + } + // slide right the contents after the range + System.arraycopy(content, lastIndex + 1, content, lastIndex + 1 + cardinalityChange, + cardinality - 1 - lastIndex); + negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange); + } else { // no expansion needed + negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange); + if (cardinalityChange < 0) { + // contraction, left sliding. + // Leave array oversize + System.arraycopy(content, startIndex + newValuesInRange - cardinalityChange, content, + startIndex + newValuesInRange, newCardinality - (startIndex + newValuesInRange)); + } + } + cardinality = newCardinality; + return this; + } + + @Override + public boolean intersects(ArrayContainer value2) { + ArrayContainer value1 = this; + return Util.unsignedIntersects(value1.content, value1.getCardinality(), value2.content, + value2.getCardinality()); + } + + + @Override + public boolean intersects(BitmapContainer x) { + return x.intersects(this); + } + + @Override + public boolean intersects(RunContainer x) { + return x.intersects(this); + } + + + @Override + public Container ior(final ArrayContainer value2) { + return this.or(value2); + } + + @Override + public Container ior(BitmapContainer x) { + return x.or(this); + } + + @Override + public Container ior(RunContainer x) { + // possible performance issue, not taking advantage of possible inplace + return x.or(this); + } + + @Override + public Container iremove(int begin, int end) { + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = Util.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = Util.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = indexend - indexstart; + System.arraycopy(content, indexstart + rangelength, content, indexstart, + cardinality - indexstart - rangelength); + cardinality -= rangelength; + return this; + } + + @Override + public Iterator iterator() { + return new Iterator() { + short pos = 0; + + @Override + public boolean hasNext() { + return pos < ArrayContainer.this.cardinality; + } + + @Override + public Short next() { + return ArrayContainer.this.content[pos++]; + } + + @Override + public void remove() { + ArrayContainer.this.remove((short) (pos - 1)); + pos--; + } + }; + } + + @Override + public Container ixor(final ArrayContainer value2) { + return this.xor(value2); + } + + @Override + public Container ixor(BitmapContainer x) { + return x.xor(this); + } + + + @Override + public Container ixor(RunContainer x) { + // possible performance issue, not taking advantage of possible inplace + return x.xor(this); + } + + + @Override + public Container limit(int maxcardinality) { + if (maxcardinality < this.getCardinality()) { + return new ArrayContainer(maxcardinality, this.content); + } else { + return clone(); + } + } + + protected void loadData(final BitmapContainer bitmapContainer) { + this.cardinality = bitmapContainer.cardinality; + bitmapContainer.fillArray(content); + } + + // for use in inot range known to be nonempty + private void negateRange(final short[] buffer, final int startIndex, final int lastIndex, + final int startRange, final int lastRange) { + // compute the negation into buffer + + int outPos = 0; + int inPos = startIndex; // value here always >= valInRange, + // until it is exhausted + // n.b., we can start initially exhausted. + + int valInRange = startRange; + for (; valInRange < lastRange && inPos <= lastIndex; ++valInRange) { + if ((short) valInRange != content[inPos]) { + buffer[outPos++] = (short) valInRange; + } else { + ++inPos; + } + } + + // if there are extra items (greater than the biggest + // pre-existing one in range), buffer them + for (; valInRange < lastRange; ++valInRange) { + buffer[outPos++] = (short) valInRange; + } + + if (outPos != buffer.length) { + throw new RuntimeException( + "negateRange: outPos " + outPos + " whereas buffer.length=" + buffer.length); + } + // copy back from buffer...caller must ensure there is room + int i = startIndex; + for (short item : buffer) { + content[i++] = item; + } + } + + // shares lots of code with inot; candidate for refactoring + @Override + public Container not(final int firstOfRange, final int lastOfRange) { + // TODO: may need to convert to a RunContainer + if (firstOfRange >= lastOfRange) { + return clone(); // empty range + } + + // determine the span of array indices to be affected + int startIndex = Util.unsignedBinarySearch(content, 0, cardinality, (short) firstOfRange); + if (startIndex < 0) { + startIndex = -startIndex - 1; + } + int lastIndex = Util.unsignedBinarySearch(content, 0, cardinality, (short) (lastOfRange - 1)); + if (lastIndex < 0) { + lastIndex = -lastIndex - 2; + } + final int currentValuesInRange = lastIndex - startIndex + 1; + final int spanToBeFlipped = lastOfRange - firstOfRange; + final int newValuesInRange = spanToBeFlipped - currentValuesInRange; + final int cardinalityChange = newValuesInRange - currentValuesInRange; + final int newCardinality = cardinality + cardinalityChange; + + if (newCardinality > DEFAULT_MAX_SIZE) { + return toBitmapContainer().not(firstOfRange, lastOfRange); + } + + ArrayContainer answer = new ArrayContainer(newCardinality); + + // copy stuff before the active area + System.arraycopy(content, 0, answer.content, 0, startIndex); + + int outPos = startIndex; + int inPos = startIndex; // item at inPos always >= valInRange + + int valInRange = firstOfRange; + for (; valInRange < lastOfRange && inPos <= lastIndex; ++valInRange) { + if ((short) valInRange != content[inPos]) { + answer.content[outPos++] = (short) valInRange; + } else { + ++inPos; + } + } + + for (; valInRange < lastOfRange; ++valInRange) { + answer.content[outPos++] = (short) valInRange; + } + + // content after the active range + for (int i = lastIndex + 1; i < cardinality; ++i) { + answer.content[outPos++] = content[i]; + } + answer.cardinality = newCardinality; + return answer; + } + + @Override + int numberOfRuns() { + if (cardinality == 0) { + return 0; // should never happen + } + int numRuns = 1; + int oldv = Util.toIntUnsigned(content[0]); + for (int i = 1; i < cardinality; i++) { + int newv = Util.toIntUnsigned(content[i]); + if (oldv + 1 != newv) { + ++numRuns; + } + oldv = newv; + } + return numRuns; + } + + @Override + public Container or(final ArrayContainer value2) { + final ArrayContainer value1 = this; + int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > DEFAULT_MAX_SIZE) {// it could be a bitmap! + BitmapContainer bc = new BitmapContainer(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] |= (1L << v); + } + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] |= (1L << v); + } + bc.cardinality = 0; + for (long k : bc.bitmap) { + bc.cardinality += Long.bitCount(k); + } + if (bc.cardinality <= DEFAULT_MAX_SIZE) { + return bc.toArrayContainer(); + } + return bc; + } + final int desiredCapacity = totalCardinality; // Math.min(BitmapContainer.MAX_CAPACITY, + // totalCardinality); + ArrayContainer answer = new ArrayContainer(desiredCapacity); + answer.cardinality = Util.unsignedUnion2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content); + return answer; + } + + @Override + public Container or(BitmapContainer x) { + return x.or(this); + } + + @Override + public Container or(RunContainer x) { + return x.or(this); + } + + protected Container or(ShortIterator it) { + return or(it, false); + } + + /** + * it must return items in (unsigned) sorted order. Possible candidate for Container interface? + **/ + private Container or(ShortIterator it, final boolean exclusive) { + ArrayContainer ac = new ArrayContainer(); + int myItPos = 0; + ac.cardinality = 0; + // do a merge. int -1 denotes end of input. + int myHead = (myItPos == cardinality) ? -1 : Util.toIntUnsigned(content[myItPos++]); + int hisHead = advance(it); + + while (myHead != -1 && hisHead != -1) { + if (myHead < hisHead) { + ac.emit((short) myHead); + myHead = (myItPos == cardinality) ? -1 : Util.toIntUnsigned(content[myItPos++]); + } else if (myHead > hisHead) { + ac.emit((short) hisHead); + hisHead = advance(it); + } else { + if (!exclusive) { + ac.emit((short) hisHead); + } + hisHead = advance(it); + myHead = (myItPos == cardinality) ? -1 : Util.toIntUnsigned(content[myItPos++]); + } + } + + while (myHead != -1) { + ac.emit((short) myHead); + myHead = (myItPos == cardinality) ? -1 : Util.toIntUnsigned(content[myItPos++]); + } + + while (hisHead != -1) { + ac.emit((short) hisHead); + hisHead = advance(it); + } + + if (ac.cardinality > DEFAULT_MAX_SIZE) { + return ac.toBitmapContainer(); + } else { + return ac; + } + } + + @Override + public int rank(short lowbits) { + int answer = Util.unsignedBinarySearch(content, 0, cardinality, lowbits); + if (answer >= 0) { + return answer + 1; + } else { + return -answer - 1; + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + @Override + public Container remove(int begin, int end) { + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = Util.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = Util.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = indexend - indexstart; + ArrayContainer answer = clone(); + System.arraycopy(content, indexstart + rangelength, answer.content, indexstart, + cardinality - indexstart - rangelength); + answer.cardinality = cardinality - rangelength; + return answer; + } + + @Override + public Container remove(final short x) { + final int loc = Util.unsignedBinarySearch(content, 0, cardinality, x); + if (loc >= 0) { + // insertion + System.arraycopy(content, loc + 1, content, loc, cardinality - loc - 1); + --cardinality; + } + return this; + } + + @Override + public Container repairAfterLazy() { + return this; + } + + @Override + public Container runOptimize() { + // TODO: consider borrowing the BitmapContainer idea of early + // abandonment + // with ArrayContainers, when the number of runs in the arrayContainer + // passes some threshold based on the cardinality. + int numRuns = numberOfRuns(); + int sizeAsRunContainer = RunContainer.serializedSizeInBytes(numRuns); + if (getArraySizeInBytes() > sizeAsRunContainer) { + return new RunContainer(this, numRuns); // this could be maybe + // faster if initial + // container is a bitmap + } else { + return this; + } + } + + @Override + public short select(int j) { + return this.content[j]; + } + + @Override + public void serialize(DataOutput out) throws IOException { + out.writeShort(Short.reverseBytes((short) this.cardinality)); + // little endian + for (int k = 0; k < this.cardinality; ++k) { + out.writeShort(Short.reverseBytes(this.content[k])); + } + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(cardinality); + } + + /** + * Copies the data in a bitmap container. + * + * @return the bitmap container + */ + public BitmapContainer toBitmapContainer() { + BitmapContainer bc = new BitmapContainer(); + bc.loadData(this); + return bc; + } + + @Override + public MappeableContainer toMappeableContainer() { + return new MappeableArrayContainer(this); + } + + /** + * Return the content of this container as a ShortBuffer. This creates a copy and might be + * relatively slow. + * + * @return the ShortBuffer + */ + public ShortBuffer toShortBuffer() { + ShortBuffer sb = ShortBuffer.allocate(this.cardinality); + sb.put(this.content, 0, this.cardinality); + return sb; + } + + @Override + public String toString() { + if (this.cardinality == 0) { + return "{}"; + } + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (int i = 0; i < this.cardinality - 1; i++) { + sb.append(this.content[i]); + sb.append(","); + } + sb.append(this.content[this.cardinality - 1]); + sb.append("}"); + return sb.toString(); + } + + @Override + public void trim() { + if (this.content.length == this.cardinality) { + return; + } + this.content = Arrays.copyOf(this.content, this.cardinality); + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + // little endian + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content[k]; + out.write(v & 0xFF); + out.write((v >>> 8) & 0xFF); + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + @Override + public Container xor(final ArrayContainer value2) { + final ArrayContainer value1 = this; + final int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > DEFAULT_MAX_SIZE) {// it could be a bitmap! + BitmapContainer bc = new BitmapContainer(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] ^= (1L << v); + } + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] ^= (1L << v); + } + bc.cardinality = 0; + for (long k : bc.bitmap) { + bc.cardinality += Long.bitCount(k); + } + if (bc.cardinality <= DEFAULT_MAX_SIZE) { + return bc.toArrayContainer(); + } + return bc; + } + final int desiredCapacity = totalCardinality; + ArrayContainer answer = new ArrayContainer(desiredCapacity); + answer.cardinality = Util.unsignedExclusiveUnion2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content); + return answer; + } + + @Override + public Container xor(BitmapContainer x) { + return x.xor(this); + } + + @Override + public Container xor(RunContainer x) { + return x.xor(this); + } + + + protected Container xor(ShortIterator it) { + return or(it, true); + } + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + for (int k = 0; k < cardinality; ++k) { + ic.accept((content[k] & 0xFFFF) | high); + } + } + + protected Container lazyor(ArrayContainer value2) { + final ArrayContainer value1 = this; + int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > ARRAY_LAZY_LOWERBOUND) {// it could be a bitmap! + BitmapContainer bc = new BitmapContainer(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] |= (1L << v); + } + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + bc.bitmap[i] |= (1L << v); + } + bc.cardinality = -1; + return bc; + } + final int desiredCapacity = totalCardinality; // Math.min(BitmapContainer.MAX_CAPACITY, + // totalCardinality); + ArrayContainer answer = new ArrayContainer(desiredCapacity); + answer.cardinality = Util.unsignedUnion2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content); + return answer; + + } + + +} + + +final class ArrayContainerShortIterator implements PeekableShortIterator { + int pos; + ArrayContainer parent; + + ArrayContainerShortIterator() { + } + + ArrayContainerShortIterator(ArrayContainer p) { + wrap(p); + } + + @Override + public void advanceIfNeeded(short minval) { + pos = Util.advanceUntil(parent.content, pos - 1, parent.cardinality, minval); + } + + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < parent.cardinality; + } + + @Override + public short next() { + return parent.content[pos++]; + } + + @Override + public int nextAsInt() { + return Util.toIntUnsigned(parent.content[pos++]); + } + + @Override + public short peekNext() { + return parent.content[pos]; + } + + + @Override + public void remove() { + parent.remove((short) (pos - 1)); + pos--; + } + + void wrap(ArrayContainer p) { + parent = p; + pos = 0; + } + +}; + + +final class ReverseArrayContainerShortIterator implements ShortIterator { + int pos; + ArrayContainer parent; + + ReverseArrayContainerShortIterator() { + } + + ReverseArrayContainerShortIterator(ArrayContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public short next() { + return parent.content[pos--]; + } + + @Override + public int nextAsInt() { + return Util.toIntUnsigned(parent.content[pos--]); + } + + + @Override + public void remove() { + parent.remove((short) (pos + 1)); + pos++; + } + + void wrap(ArrayContainer p) { + parent = p; + pos = parent.cardinality - 1; + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitSetUtil.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitSetUtil.java new file mode 100644 index 000000000..c213e2a6a --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitSetUtil.java @@ -0,0 +1,108 @@ +package com.fr.third.bitmap.roaringbitmap; + + +import java.util.Arrays; +import java.util.BitSet; + + +/*** + * + * This class provides convenience functions to manipulate BitSet and RoaringBitmap objects. + * + */ +public class BitSetUtil { + // todo: add a method to convert a RoaringBitmap to a BitSet using BitSet.valueOf + + // a block consists has a maximum of 1024 words, each representing 64 bits, + // thus representing at maximum 65536 bits + static final private int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; // + // 64-bit + // word + + private static ArrayContainer arrayContainerOf(final int from, final int to, + final int cardinality, final long[] words) { + // precondition: cardinality is max 4096 + final short[] content = new short[cardinality]; + int index = 0; + + for (int i = from, socket = 0; i < to; ++i, socket += Long.SIZE) { + long word = words[i]; + while (word != 0) { + long t = word & -word; + content[index++] = (short) (socket + Long.bitCount(t - 1)); + word ^= t; + } + } + return new ArrayContainer(content); + } + + /** + * Generate a RoaringBitmap out of a long[], each long using little-endian representation of its + * bits + * + * @param words array of longs (will not be modified) + * @return roaring bitmap + * @see BitSet#toLongArray() for an equivalent + */ + public static RoaringBitmap bitmapOf(final long[] words) { + // split long[] into blocks. + // each block becomes a single container, if any bit is set + final RoaringBitmap ans = new RoaringBitmap(); + int containerIndex = 0; + for (int from = 0; from < words.length; from += BLOCK_LENGTH) { + final int to = Math.min(from + BLOCK_LENGTH, words.length); + final int blockCardinality = cardinality(from, to, words); + if (blockCardinality > 0) { + ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(from * Long.SIZE), + BitSetUtil.containerOf(from, to, blockCardinality, words)); + } + } + return ans; + } + + private static int cardinality(final int from, final int to, final long[] words) { + int sum = 0; + for (int i = from; i < to; i++) { + sum += Long.bitCount(words[i]); + } + return sum; + } + + + private static Container containerOf(final int from, final int to, final int blockCardinality, + final long[] words) { + // find the best container available + if (blockCardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + // containers with DEFAULT_MAX_SIZE or less integers should be + // ArrayContainers + return arrayContainerOf(from, to, blockCardinality, words); + } else { + // otherwise use bitmap container + return new BitmapContainer(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH), + blockCardinality); + } + } + + + /** + * Compares a RoaringBitmap and a BitSet. They are equal if and only if they contain the same set + * of integers. + * + * @param bitset first object to be compared + * @param bitmap second object to be compared + * @return whether they are equals + */ + public static boolean equals(final BitSet bitset, final RoaringBitmap bitmap) { + if (bitset.cardinality() != bitmap.getCardinality()) { + return false; + } + final IntIterator it = bitmap.getIntIterator(); + while (it.hasNext()) { + int val = it.next(); + if (!bitset.get(val)) { + return false; + } + } + return true; + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapContainer.java new file mode 100644 index 000000000..5d33fe299 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapContainer.java @@ -0,0 +1,1392 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableBitmapContainer; +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableContainer; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.LongBuffer; +import java.util.Arrays; +import java.util.Iterator; + + +/** + * Simple bitset-like container. + */ +public final class BitmapContainer extends Container implements Cloneable { + /** + * optimization flag: whether the cardinality of the bitmaps is maintained through branchless + * operations + */ + public static final boolean USE_BRANCHLESS = true; + protected static final int MAX_CAPACITY = 1 << 16; + private static final long serialVersionUID = 2L; + // 64 words can have max 32 runs per word, max 2k runs + // bail out early when the number of runs is excessive, without + // an exact count (just a decent lower bound) + private static final int BLOCKSIZE = 128; + // nruns value for which RunContainer.serializedSizeInBytes == + // BitmapContainer.getArraySizeInBytes() + private final int MAXRUNS = (getArraySizeInBytes() - 2) / 4; + long[] bitmap; + int cardinality; + + /** + * Create a bitmap container with all bits set to false + */ + public BitmapContainer() { + this.cardinality = 0; + this.bitmap = new long[MAX_CAPACITY / 64]; + } + + /** + * Create a bitmap container with a run of ones from firstOfRun to lastOfRun. Caller must ensure + * that the range isn't so small that an ArrayContainer should have been created instead + * + * @param firstOfRun first index + * @param lastOfRun last index (range is exclusive) + */ + public BitmapContainer(final int firstOfRun, final int lastOfRun) { + this.cardinality = lastOfRun - firstOfRun; + this.bitmap = new long[MAX_CAPACITY / 64]; + Util.setBitmapRange(bitmap, firstOfRun, lastOfRun); + } + + private BitmapContainer(int newCardinality, long[] newBitmap) { + this.cardinality = newCardinality; + this.bitmap = Arrays.copyOf(newBitmap, newBitmap.length); + } + + + /** + * Create a new container, no copy is made. + * + * @param newBitmap content + * @param newCardinality desired cardinality. + */ + public BitmapContainer(long[] newBitmap, int newCardinality) { + this.cardinality = newCardinality; + this.bitmap = newBitmap; + } + + + /** + * Creates a new non-mappeable container from a mappeable one. This copies the data. + * + * @param bc the original container + */ + public BitmapContainer(MappeableBitmapContainer bc) { + this.cardinality = bc.getCardinality(); + this.bitmap = bc.toLongArray(); + } + + /** + * Return a bitmap iterator over this array + * + * @param bitmap array to be iterated over + * @return an iterator + */ + public static ShortIterator getReverseShortIterator(long[] bitmap) { + return new ReverseBitmapContainerShortIterator(bitmap); + } + + /** + * Return a bitmap iterator over this array + * + * @param bitmap array to be iterated over + * @return an iterator + */ + public static PeekableShortIterator getShortIterator(long[] bitmap) { + return new BitmapContainerShortIterator(bitmap); + } + + // the parameter is for overloading and symmetry with ArrayContainer + protected static int serializedSizeInBytes(int unusedCardinality) { + return MAX_CAPACITY / 8; + } + + @Override + public Container add(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + BitmapContainer answer = clone(); + Util.setBitmapRange(answer.bitmap, begin, end); + answer.computeCardinality(); + return answer; + } + + + @Override + public Container add(final short i) { + final int x = Util.toIntUnsigned(i); + final long previous = bitmap[x / 64]; + long newval = previous | (1L << x); + bitmap[x / 64] = newval; + if (USE_BRANCHLESS) { + cardinality += (previous ^ newval) >>> x; + } else if (previous != newval) { + ++cardinality; + } + return this; + } + + @Override + public ArrayContainer and(final ArrayContainer value2) { + final ArrayContainer answer = new ArrayContainer(value2.content.length); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + if (this.contains(v)) { + answer.content[answer.cardinality++] = v; + } + } + return answer; + } + + @Override + public Container and(final BitmapContainer value2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] & value2.bitmap[k]); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + final BitmapContainer answer = new BitmapContainer(); + for (int k = 0; k < answer.bitmap.length; ++k) { + answer.bitmap[k] = this.bitmap[k] & value2.bitmap[k]; + } + answer.cardinality = newCardinality; + return answer; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayAND(ac.content, this.bitmap, value2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container and(RunContainer x) { + return x.and(this); + } + + @Override + public int andCardinality(final ArrayContainer value2) { + int answer = 0; + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + if (this.contains(v)) { + answer++; + } + } + return answer; + } + + @Override + public int andCardinality(final BitmapContainer value2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] & value2.bitmap[k]); + } + return newCardinality; + } + + @Override + public int andCardinality(RunContainer x) { + return x.andCardinality(this); + } + + @Override + public Container andNot(final ArrayContainer value2) { + final BitmapContainer answer = clone(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + long w = answer.bitmap[i]; + long aft = w & (~(1L << v)); + answer.bitmap[i] = aft; + answer.cardinality -= (w ^ aft) >>> v; + } + if (answer.cardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + @Override + public Container andNot(final BitmapContainer value2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] & (~value2.bitmap[k])); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + final BitmapContainer answer = new BitmapContainer(); + for (int k = 0; k < answer.bitmap.length; ++k) { + answer.bitmap[k] = this.bitmap[k] & (~value2.bitmap[k]); + } + answer.cardinality = newCardinality; + return answer; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayANDNOT(ac.content, this.bitmap, value2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container andNot(RunContainer x) { + // could be rewritten as return andNot(x.toBitmapOrArrayContainer()); + BitmapContainer answer = this.clone(); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.resetBitmapRange(answer.bitmap, start, end); + } + answer.computeCardinality(); + if (answer.getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public void clear() { + if (cardinality != 0) { + cardinality = 0; + Arrays.fill(bitmap, 0); + } + } + + @Override + public BitmapContainer clone() { + return new BitmapContainer(this.cardinality, this.bitmap); + } + + /** + * Recomputes the cardinality of the bitmap. + */ + protected void computeCardinality() { + this.cardinality = 0; + for (int k = 0; k < this.bitmap.length; k++) { + this.cardinality += Long.bitCount(this.bitmap[k]); + } + } + + @Override + public boolean contains(final short i) { + final int x = Util.toIntUnsigned(i); + return (bitmap[x / 64] & (1L << x)) != 0; + } + + @Override + public void deserialize(DataInput in) throws IOException { + // little endian + this.cardinality = 0; + for (int k = 0; k < bitmap.length; ++k) { + long w = Long.reverseBytes(in.readLong()); + bitmap[k] = w; + this.cardinality += Long.bitCount(w); + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof BitmapContainer) { + BitmapContainer srb = (BitmapContainer) o; + if (srb.cardinality != this.cardinality) { + return false; + } + return Arrays.equals(this.bitmap, srb.bitmap); + } else if (o instanceof RunContainer) { + return o.equals(this); + } + return false; + } + + + /** + * Fill the array with set bits + * + * @param array container (should be sufficiently large) + */ + protected void fillArray(final short[] array) { + int pos = 0; + int base = 0; + for (int k = 0; k < bitmap.length; ++k) { + long bitset = bitmap[k]; + while (bitset != 0) { + long t = bitset & -bitset; + array[pos++] = (short) (base + Long.bitCount(t - 1)); + bitset ^= t; + } + base += 64; + } + } + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + int pos = i; + int base = mask; + for (int k = 0; k < bitmap.length; ++k) { + long bitset = bitmap[k]; + while (bitset != 0) { + long t = bitset & -bitset; + x[pos++] = base + Long.bitCount(t - 1); + bitset ^= t; + } + base += 64; + } + } + + + @Override + public Container flip(short i) { + final int x = Util.toIntUnsigned(i); + int index = x / 64; + long bef = bitmap[index]; + long mask = 1L << x; + if (cardinality == ArrayContainer.DEFAULT_MAX_SIZE + 1) {// this is + // the + // uncommon + // path + if ((bef & mask) != 0) { + --cardinality; + bitmap[index] &= ~mask; + return this.toArrayContainer(); + } + } + // TODO: check whether a branchy version could be faster + cardinality += 1 - 2 * ((bef & mask) >>> x); + bitmap[index] ^= mask; + return this; + } + + @Override + protected int getArraySizeInBytes() { + return MAX_CAPACITY / 8; + } + + @Override + public int getCardinality() { + return cardinality; + } + + @Override + public ShortIterator getReverseShortIterator() { + return new ReverseBitmapContainerShortIterator(this.bitmap); + } + + @Override + public PeekableShortIterator getShortIterator() { + return new BitmapContainerShortIterator(this.bitmap); + } + + @Override + public int getSizeInBytes() { + return this.bitmap.length * 8; + } + + @Override + public int hashCode() { + return Arrays.hashCode(this.bitmap); + } + + @Override + public Container iadd(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + Util.setBitmapRange(bitmap, begin, end); + computeCardinality(); + return this; + } + + @Override + public Container iand(final ArrayContainer b2) { + return b2.and(this);// no inplace possible + } + + @Override + public Container iand(final BitmapContainer b2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] & b2.bitmap[k]); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < this.bitmap.length; ++k) { + this.bitmap[k] = this.bitmap[k] & b2.bitmap[k]; + } + this.cardinality = newCardinality; + return this; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayAND(ac.content, this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container iand(RunContainer x) { + // could probably be replaced with return iand(x.toBitmapOrArrayContainer()); + final int card = x.getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + // no point in doing it in-place + ArrayContainer answer = new ArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(x.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(x.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (this.contains((short) runValue)) {// it looks like contains() should be cheap enough + // if accessed sequentially + answer.content[answer.cardinality++] = (short) runValue; + } + } + } + return answer; + } + int start = 0; + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int end = Util.toIntUnsigned(x.getValue(rlepos)); + Util.resetBitmapRange(this.bitmap, start, end); + start = end + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + } + Util.resetBitmapRange(this.bitmap, start, Util.maxLowBitAsInteger() + 1); + computeCardinality(); + if (getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + + @Override + public Container iandNot(final ArrayContainer b2) { + for (int k = 0; k < b2.cardinality; ++k) { + this.remove(b2.content[k]); + } + if (cardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + return this; + } + + @Override + public Container iandNot(final BitmapContainer b2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] & (~b2.bitmap[k])); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < this.bitmap.length; ++k) { + this.bitmap[k] = this.bitmap[k] & (~b2.bitmap[k]); + } + this.cardinality = newCardinality; + return this; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayANDNOT(ac.content, this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container iandNot(RunContainer x) { + // could probably be replaced with return iandNot(x.toBitmapOrArrayContainer()); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.resetBitmapRange(this.bitmap, start, end); + } + computeCardinality(); + if (getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + + protected Container ilazyor(ArrayContainer value2) { + this.cardinality = -1;// invalid + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + this.bitmap[i] |= (1L << v); + } + return this; + } + + protected Container ilazyor(BitmapContainer x) { + this.cardinality = -1;// invalid + for (int k = 0; k < this.bitmap.length; k++) { + this.bitmap[k] |= x.bitmap[k]; + } + return this; + } + + protected Container ilazyor(RunContainer x) { + // could be implemented as return ilazyor(x.toTemporaryBitmap()); + cardinality = -1; // invalid + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.setBitmapRange(this.bitmap, start, end); + } + return this; + } + + @Override + public Container inot(final int firstOfRange, final int lastOfRange) { + if (lastOfRange - firstOfRange == MAX_CAPACITY) { + Util.flipBitmapRange(bitmap, firstOfRange, lastOfRange); + cardinality = MAX_CAPACITY - cardinality; + } else if (lastOfRange - firstOfRange > MAX_CAPACITY / 2) { + Util.flipBitmapRange(bitmap, firstOfRange, lastOfRange); + computeCardinality(); + } else { + cardinality += Util.flipBitmapRangeAndCardinalityChange(bitmap, firstOfRange, lastOfRange); + } + if (cardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + return toArrayContainer(); + } + return this; + } + + @Override + public boolean intersects(ArrayContainer value2) { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + if (this.contains(value2.content[k])) { + return true; + } + } + return false; + } + + @Override + public boolean intersects(BitmapContainer value2) { + for (int k = 0; k < this.bitmap.length; ++k) { + if ((this.bitmap[k] & value2.bitmap[k]) != 0) { + return true; + } + } + return false; + } + + @Override + public boolean intersects(RunContainer x) { + return x.intersects(this); + } + + @Override + public BitmapContainer ior(final ArrayContainer value2) { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + final int i = Util.toIntUnsigned(value2.content[k]) >>> 6; + + long bef = this.bitmap[i]; + long aft = bef | (1L << value2.content[k]); + this.bitmap[i] = aft; + if (USE_BRANCHLESS) { + cardinality += (bef - aft) >>> 63; + } else { + if (bef != aft) { + cardinality++; + } + } + } + return this; + } + + @Override + public Container ior(final BitmapContainer b2) { + this.cardinality = 0; + for (int k = 0; k < this.bitmap.length; k++) { + long w = this.bitmap[k] | b2.bitmap[k]; + this.bitmap[k] = w; + this.cardinality += Long.bitCount(w); + } + return this; + } + + @Override + public Container ior(RunContainer x) { + // could probably be replaced with return ior(x.toBitmapOrArrayContainer()); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.setBitmapRange(this.bitmap, start, end); + } + computeCardinality(); + return this; + } + + @Override + public Container iremove(int begin, int end) { + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + Util.resetBitmapRange(bitmap, begin, end); + computeCardinality(); + if (getCardinality() <= ArrayContainer.DEFAULT_MAX_SIZE) { + return toArrayContainer(); + } + return this; + } + + @Override + public Iterator iterator() { + return new Iterator() { + final ShortIterator si = BitmapContainer.this.getShortIterator(); + + @Override + public boolean hasNext() { + return si.hasNext(); + } + + @Override + public Short next() { + return si.next(); + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + }; + } + + @Override + public Container ixor(final ArrayContainer value2) { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short vc = value2.content[k]; + long mask = 1L << vc; + final int index = Util.toIntUnsigned(vc) >>> 6; + long ba = this.bitmap[index]; + // TODO: check whether a branchy version could be faster + this.cardinality += 1 - 2 * ((ba & mask) >>> vc); + this.bitmap[index] = ba ^ mask; + } + if (this.cardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + return this; + } + + + @Override + public Container ixor(BitmapContainer b2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] ^ b2.bitmap[k]); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < this.bitmap.length; ++k) { + this.bitmap[k] = this.bitmap[k] ^ b2.bitmap[k]; + } + this.cardinality = newCardinality; + return this; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayXOR(ac.content, this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container ixor(RunContainer x) { + // could probably be replaced with return ixor(x.toBitmapOrArrayContainer()); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.flipBitmapRange(this.bitmap, start, end); + } + computeCardinality(); + if (this.getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + + protected Container lazyor(ArrayContainer value2) { + BitmapContainer answer = this.clone(); + answer.cardinality = -1;// invalid + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + answer.bitmap[i] |= (1L << v); + } + return answer; + } + + protected Container lazyor(BitmapContainer x) { + BitmapContainer answer = new BitmapContainer(); + answer.cardinality = -1;// invalid + for (int k = 0; k < this.bitmap.length; k++) { + answer.bitmap[k] = this.bitmap[k] | x.bitmap[k]; + } + return answer; + } + + + protected Container lazyor(RunContainer x) { + BitmapContainer bc = clone(); + bc.cardinality = -1; // invalid + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(x.getValue(rlepos)); + int end = start + Util.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.setBitmapRange(bc.bitmap, start, end); + } + return bc; + } + + @Override + public Container limit(int maxcardinality) { + if (maxcardinality >= this.cardinality) { + return clone(); + } + if (maxcardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + ArrayContainer ac = new ArrayContainer(maxcardinality); + int pos = 0; + for (int k = 0; (ac.cardinality < maxcardinality) && (k < bitmap.length); ++k) { + long bitset = bitmap[k]; + while ((ac.cardinality < maxcardinality) && (bitset != 0)) { + long t = bitset & -bitset; + ac.content[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + ac.cardinality++; + bitset ^= t; + } + } + return ac; + } + BitmapContainer bc = new BitmapContainer(maxcardinality, this.bitmap); + int s = Util.toIntUnsigned(select(maxcardinality)); + int usedwords = (s + 63) / 64; + int todelete = this.bitmap.length - usedwords; + for (int k = 0; k < todelete; ++k) { + bc.bitmap[bc.bitmap.length - 1 - k] = 0; + } + int lastword = s % 64; + if (lastword != 0) { + bc.bitmap[s / 64] &= (0xFFFFFFFFFFFFFFFFL >>> (64 - lastword)); + } + return bc; + } + + protected void loadData(final ArrayContainer arrayContainer) { + this.cardinality = arrayContainer.cardinality; + for (int k = 0; k < arrayContainer.cardinality; ++k) { + final short x = arrayContainer.content[k]; + bitmap[Util.toIntUnsigned(x) / 64] |= (1L << x); + } + } + + /** + * Find the index of the next set bit greater or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the next set bit + */ + public int nextSetBit(final int i) { + int x = i >> 6; // i / 64 with sign extension + long w = bitmap[x]; + w >>>= i; + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + for (++x; x < bitmap.length; ++x) { + if (bitmap[x] != 0) { + return x * 64 + Long.numberOfTrailingZeros(bitmap[x]); + } + } + return -1; + } + + /** + * Find the index of the next unset bit greater or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the next unset bit + */ + public short nextUnsetBit(final int i) { + int x = i / 64; + long w = ~bitmap[x]; + w >>>= i; + if (w != 0) { + return (short) (i + Long.numberOfTrailingZeros(w)); + } + ++x; + for (; x < bitmap.length; ++x) { + if (bitmap[x] != ~0L) { + return (short) (x * 64 + Long.numberOfTrailingZeros(~bitmap[x])); + } + } + return -1; + } + + + @Override + public Container not(final int firstOfRange, final int lastOfRange) { + BitmapContainer answer = clone(); + return answer.inot(firstOfRange, lastOfRange); + } + + @Override + int numberOfRuns() { + int numRuns = 0; + long nextWord = bitmap[0]; + + for (int i = 0; i < bitmap.length - 1; i++) { + long word = nextWord; + nextWord = bitmap[i + 1]; + numRuns += Long.bitCount((~word) & (word << 1)) + ((word >>> 63) & ~nextWord); + } + + long word = nextWord; + numRuns += Long.bitCount((~word) & (word << 1)); + if ((word & 0x8000000000000000L) != 0) { + numRuns++; + } + + return numRuns; + } + + /** + * Computes the number of runs + * + * @return the number of runs + */ + public int numberOfRunsAdjustment() { + int ans = 0; + long nextWord = bitmap[0]; + for (int i = 0; i < bitmap.length - 1; i++) { + final long word = nextWord; + + nextWord = bitmap[i + 1]; + ans += ((word >>> 63) & ~nextWord); + } + final long word = nextWord; + + if ((word & 0x8000000000000000L) != 0) { + ans++; + } + return ans; + } + + /** + * Counts how many runs there is in the bitmap, up to a maximum + * + * @param mustNotExceed maximum of runs beyond which counting is pointless + * @return estimated number of courses + */ + public int numberOfRunsLowerBound(int mustNotExceed) { + int numRuns = 0; + + for (int blockOffset = 0; blockOffset < bitmap.length; blockOffset += BLOCKSIZE) { + + for (int i = blockOffset; i < blockOffset + BLOCKSIZE; i++) { + long word = bitmap[i]; + numRuns += Long.bitCount((~word) & (word << 1)); + } + if (numRuns > mustNotExceed) { + return numRuns; + } + } + return numRuns; + } + + @Override + public BitmapContainer or(final ArrayContainer value2) { + final BitmapContainer answer = clone(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content[k]; + final int i = Util.toIntUnsigned(v) >>> 6; + long w = answer.bitmap[i]; + long aft = w | (1L << v); + answer.bitmap[i] = aft; + if (USE_BRANCHLESS) { + answer.cardinality += (w - aft) >>> 63; + } else { + if (w != aft) { + answer.cardinality++; + } + } + } + return answer; + } + + @Override + public Container or(final BitmapContainer value2) { + BitmapContainer value1 = this.clone(); + return value1.ior(value2); + } + + @Override + public Container or(RunContainer x) { + return x.or(this); + } + + /** + * Find the index of the previous set bit less than or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the previous set bit + */ + public int prevSetBit(final int i) { + int x = i >> 6; // i / 64 with sign extension + long w = bitmap[x]; + w <<= 64 - i - 1; + if (w != 0) { + return i - Long.numberOfLeadingZeros(w); + } + for (--x; x >= 0; --x) { + if (bitmap[x] != 0) { + return x * 64 + 63 - Long.numberOfLeadingZeros(bitmap[x]); + } + } + return -1; + } + + @Override + public int rank(short lowbits) { + int x = Util.toIntUnsigned(lowbits); + int leftover = (x + 1) & 63; + int answer = 0; + for (int k = 0; k < (x + 1) / 64; ++k) { + answer += Long.bitCount(bitmap[k]); + } + if (leftover != 0) { + answer += Long.bitCount(bitmap[(x + 1) / 64] << (64 - leftover)); + } + return answer; + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + + @Override + public Container remove(int begin, int end) { + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + BitmapContainer answer = clone(); + Util.resetBitmapRange(answer.bitmap, begin, end); + answer.computeCardinality(); + if (answer.getCardinality() <= ArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + @Override + public Container remove(final short i) { + final int x = Util.toIntUnsigned(i); + int index = x / 64; + long bef = bitmap[index]; + long mask = 1L << x; + if (cardinality == ArrayContainer.DEFAULT_MAX_SIZE + 1) {// this is + // the + // uncommon + // path + if ((bef & mask) != 0) { + --cardinality; + bitmap[x / 64] = bef & (~mask); + return this.toArrayContainer(); + } + } + long aft = bef & (~mask); + cardinality -= (aft - bef) >>> 63; + bitmap[index] = aft; + return this; + } + + @Override + public Container repairAfterLazy() { + if (getCardinality() < 0) { + computeCardinality(); + if (getCardinality() <= ArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + } + return this; + } + + @Override + public Container runOptimize() { + int numRuns = numberOfRunsLowerBound(MAXRUNS); // decent choice + + int sizeAsRunContainerLowerBound = RunContainer.serializedSizeInBytes(numRuns); + + if (sizeAsRunContainerLowerBound >= getArraySizeInBytes()) { + return this; + } + // else numRuns is a relatively tight bound that needs to be exact + // in some cases (or if we need to make the runContainer the right + // size) + numRuns += numberOfRunsAdjustment(); + int sizeAsRunContainer = RunContainer.serializedSizeInBytes(numRuns); + + if (getArraySizeInBytes() > sizeAsRunContainer) { + return new RunContainer(this, numRuns); + } else { + return this; + } + } + + @Override + public short select(int j) { + int leftover = j; + for (int k = 0; k < bitmap.length; ++k) { + int w = Long.bitCount(bitmap[k]); + if (w > leftover) { + return (short) (k * 64 + Util.select(bitmap[k], leftover)); + } + leftover -= w; + } + throw new IllegalArgumentException("Insufficient cardinality."); + } + + @Override + public void serialize(DataOutput out) throws IOException { + // little endian + for (long w : bitmap) { + out.writeLong(Long.reverseBytes(w)); + } + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(0); + } + + /** + * Copies the data to an array container + * + * @return the array container + */ + public ArrayContainer toArrayContainer() { + ArrayContainer ac = new ArrayContainer(cardinality); + ac.loadData(this); + if (ac.getCardinality() != cardinality) { + throw new RuntimeException("Internal error."); + } + return ac; + } + + /** + * Return the content of this container as a LongBuffer. This creates a copy and might be + * relatively slow. + * + * @return the LongBuffer + */ + public LongBuffer toLongBuffer() { + LongBuffer lb = LongBuffer.allocate(bitmap.length); + lb.put(bitmap); + return lb; + } + + @Override + public MappeableContainer toMappeableContainer() { + return new MappeableBitmapContainer(this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + final ShortIterator i = this.getShortIterator(); + sb.append("{"); + while (i.hasNext()) { + sb.append(i.next()); + if (i.hasNext()) { + sb.append(","); + } + } + sb.append("}"); + return sb.toString(); + } + + @Override + public void trim() { + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + serialize(out); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } + + @Override + public Container xor(final ArrayContainer value2) { + final BitmapContainer answer = clone(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short vc = value2.content[k]; + final int index = Util.toIntUnsigned(vc) >>> 6; + final long mask = 1L << vc; + final long val = answer.bitmap[index]; + // TODO: check whether a branchy version could be faster + answer.cardinality += 1 - 2 * ((val & mask) >>> vc); + answer.bitmap[index] = val ^ mask; + } + if (answer.cardinality <= ArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + @Override + public Container xor(BitmapContainer value2) { + int newCardinality = 0; + for (int k = 0; k < this.bitmap.length; ++k) { + newCardinality += Long.bitCount(this.bitmap[k] ^ value2.bitmap[k]); + } + if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) { + final BitmapContainer answer = new BitmapContainer(); + for (int k = 0; k < answer.bitmap.length; ++k) { + answer.bitmap[k] = this.bitmap[k] ^ value2.bitmap[k]; + } + answer.cardinality = newCardinality; + return answer; + } + ArrayContainer ac = new ArrayContainer(newCardinality); + Util.fillArrayXOR(ac.content, this.bitmap, value2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public Container xor(RunContainer x) { + return x.xor(this); + } + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + for (int x = 0; x < bitmap.length; ++x) { + long w = bitmap[x]; + while (w != 0) { + long t = w & -w; + ic.accept((x * 64 + Long.bitCount(t - 1)) | high); + w ^= t; + } + } + } + + @Override + public BitmapContainer toBitmapContainer() { + return this; + } + +} + + +final class BitmapContainerShortIterator implements PeekableShortIterator { + + long w; + int x; + + long[] bitmap; + + BitmapContainerShortIterator() { + } + + BitmapContainerShortIterator(long[] p) { + wrap(p); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return x < bitmap.length; + } + + @Override + public short next() { + long t = w & -w; + short answer = (short) (x * 64 + Long.bitCount(t - 1)); + w ^= t; + while (w == 0) { + ++x; + if (x == bitmap.length) { + break; + } + w = bitmap[x]; + } + return answer; + } + + + @Override + public int nextAsInt() { + long t = w & -w; + int answer = (x << 6) + Long.bitCount(t - 1); + w ^= t; + while (w == 0) { + ++x; + if (x == bitmap.length) { + break; + } + w = bitmap[x]; + } + return answer; + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + + + public void wrap(long[] b) { + bitmap = b; + for (x = 0; x < bitmap.length; ++x) { + if ((w = bitmap[x]) != 0) { + break; + } + } + } + + @Override + public void advanceIfNeeded(short minval) { + if (Util.toIntUnsigned(minval) >= (x + 1) * 64) { + x = Util.toIntUnsigned(minval) / 64; + w = bitmap[x]; + while (w == 0) { + ++x; + if (x == bitmap.length) { + return; + } + w = bitmap[x]; + } + } + while (hasNext() && (Util.toIntUnsigned(peekNext()) < Util.toIntUnsigned(minval))) { + next(); // could be optimized + } + } + + @Override + public short peekNext() { + long t = w & -w; + return (short) (x * 64 + Long.bitCount(t - 1)); + } +} + + +final class ReverseBitmapContainerShortIterator implements ShortIterator { + + long w; + int x; + + long[] bitmap; + + ReverseBitmapContainerShortIterator() { + } + + ReverseBitmapContainerShortIterator(long[] b) { + wrap(b); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null; + } + } + + @Override + public boolean hasNext() { + return x >= 0; + } + + @Override + public short next() { + long t = w & -w; + short answer = (short) ((x + 1) * 64 - 1 - Long.bitCount(t - 1)); + w ^= t; + while (w == 0) { + --x; + if (x < 0) { + break; + } + w = Long.reverse(bitmap[x]); + } + return answer; + } + + @Override + public int nextAsInt() { + long t = w & -w; + int answer = (x + 1) * 64 - 1 - Long.bitCount(t - 1); + w ^= t; + while (w == 0) { + --x; + if (x < 0) { + break; + } + w = Long.reverse(bitmap[x]); + } + return answer; + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + + void wrap(long[] b) { + bitmap = b; + for (x = bitmap.length - 1; x >= 0; --x) { + if ((w = Long.reverse(bitmap[x])) != 0) { + break; + } + } + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapDataProvider.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapDataProvider.java new file mode 100644 index 000000000..f38969b52 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/BitmapDataProvider.java @@ -0,0 +1,38 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * Representing a general bitmap interface. + */ +public interface BitmapDataProvider extends ImmutableBitmapDataProvider { + /** + * set the value to "true", whether it already appears or not. + * + * @param x integer value + */ + public void add(int x); + + /** + * If present remove the specified integers (effectively, sets its bit value to false) + * + * @param x integer value representing the index in a bitmap + */ + public void remove(int x); + + /** + * Return the jth value stored in this bitmap. + * + * @param j index of the value + * @return the value + */ + @Override + public int select(int j); + + /** + * Recover allocated but unused memory. + */ + public void trim(); +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Container.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Container.java new file mode 100644 index 000000000..0a119bf84 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Container.java @@ -0,0 +1,808 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableContainer; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; + +/** + * Base container class. + */ +public abstract class Container implements Iterable, Cloneable, Externalizable { + + /** + * Name of the various possible containers + */ + public static String ContainerNames[] = {"bitmap", "array", "run"}; + + /** + * Create a container initialized with a range of consecutive values + * + * @param start first index + * @param last last index (range is exclusive) + * @return a new container initialized with the specified values + */ + public static Container rangeOfOnes(final int start, final int last) { + final int sizeAsArrayContainer = ArrayContainer.serializedSizeInBytes(last - start); + final int sizeAsRunContainer = RunContainer.serializedSizeInBytes(1); + Container answer = + sizeAsRunContainer < sizeAsArrayContainer ? new RunContainer() : new ArrayContainer(); + answer = answer.iadd(start, last); + return answer; + } + + /** + * Return a new container with all shorts in [begin,end) added using an unsigned interpretation. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract Container add(int begin, int end); + + /** + * Add a short to the container. May generate a new container. + * + * @param x short to be added + * @return the new container + */ + public abstract Container add(short x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container and(ArrayContainer x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container and(BitmapContainer x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public Container and(Container x) { + if (x instanceof ArrayContainer) { + return and((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return and((BitmapContainer) x); + } + return and((RunContainer) x); + } + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container and(RunContainer x); + + protected abstract int andCardinality(ArrayContainer x); + + protected abstract int andCardinality(BitmapContainer x); + + protected abstract int andCardinality(RunContainer x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public int andCardinality(Container x) { + if (this.getCardinality() == 0) { + return 0; + } else if (x.getCardinality() == 0) { + return 0; + } else { + if (x instanceof ArrayContainer) { + return andCardinality((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return andCardinality((BitmapContainer) x); + } + return andCardinality((RunContainer) x); + } + } + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container andNot(ArrayContainer x); + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container andNot(BitmapContainer x); + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public Container andNot(Container x) { + if (x instanceof ArrayContainer) { + return andNot((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return andNot((BitmapContainer) x); + } + return andNot((RunContainer) x); + } + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container andNot(RunContainer x); + + /** + * Empties the container + */ + public abstract void clear(); + + @Override + public abstract Container clone(); + + /** + * Checks whether the contain contains the provided value + * + * @param x value to check + * @return whether the value is in the container + */ + public abstract boolean contains(short x); + + /** + * Deserialize (recover) the container. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public abstract void deserialize(DataInput in) throws IOException; + + /** + * Fill the least significant 16 bits of the integer array, starting at index i, with the short + * values from this container. The caller is responsible to allocate enough room. The most + * significant 16 bits of each integer are given by the most significant bits of the provided + * mask. + * + * @param x provided array + * @param i starting index + * @param mask indicates most significant bits + */ + public abstract void fillLeastSignificant16bits(int[] x, int i, int mask); + + /** + * Add a short to the container if it is not present, otherwise remove it. May generate a new + * container. + * + * @param x short to be added + * @return the new container + */ + public abstract Container flip(short x); + + /** + * Size of the underlying array + * + * @return size in bytes + */ + protected abstract int getArraySizeInBytes(); + + /** + * Computes the distinct number of short values in the container. Can be expected to run in + * constant time. + * + * @return the cardinality + */ + public abstract int getCardinality(); + + /** + * Get the name of this container. + * + * @return name of the container + */ + public String getContainerName() { + if (this instanceof BitmapContainer) { + return ContainerNames[0]; + } else if (this instanceof ArrayContainer) { + return ContainerNames[1]; + } else { + return ContainerNames[2]; + } + } + + /** + * Iterate through the values of this container and pass them + * along to the IntConsumer, using msb as the 16 most significant bits. + * + * @param msb 16 most significant bits + * @param ic consumer + */ + public abstract void forEach(short msb, IntConsumer ic); + + /** + * Iterator to visit the short values in the container in descending order. + * + * @return iterator + */ + public abstract ShortIterator getReverseShortIterator(); + + /** + * Iterator to visit the short values in the container in ascending order. + * + * @return iterator + */ + public abstract PeekableShortIterator getShortIterator(); + + /** + * Computes an estimate of the memory usage of this container. The estimate is not meant to be + * exact. + * + * @return estimated memory usage in bytes + */ + public abstract int getSizeInBytes(); + + /** + * Add all shorts in [begin,end) using an unsigned interpretation. May generate a new container. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract Container iadd(int begin, int end); + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iand(ArrayContainer x); + + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iand(BitmapContainer x); + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public Container iand(Container x) { + if (x instanceof ArrayContainer) { + return iand((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return iand((BitmapContainer) x); + } + return iand((RunContainer) x); + } + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iand(RunContainer x); + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iandNot(ArrayContainer x); + + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iandNot(BitmapContainer x); + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public Container iandNot(Container x) { + if (x instanceof ArrayContainer) { + return iandNot((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return iandNot((BitmapContainer) x); + } + return iandNot((RunContainer) x); + } + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container iandNot(RunContainer x); + + + /** + * Computes the in-place bitwise NOT of this container (complement). Only those bits within the + * range are affected. The current container is generally modified. May generate a new container. + * + * @param rangeStart beginning of range (inclusive); 0 is beginning of this container. + * @param rangeEnd ending of range (exclusive) + * @return (partially) complemented container + */ + public abstract Container inot(int rangeStart, int rangeEnd); + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(ArrayContainer x); + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(BitmapContainer x); + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public boolean intersects(Container x) { + if (x instanceof ArrayContainer) { + return intersects((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return intersects((BitmapContainer) x); + } + return intersects((RunContainer) x); + } + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(RunContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ior(ArrayContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ior(BitmapContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public Container ior(Container x) { + if (x instanceof ArrayContainer) { + return ior((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ior((BitmapContainer) x); + } + return ior((RunContainer) x); + } + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ior(RunContainer x); + + /** + * Remove shorts in [begin,end) using an unsigned interpretation. May generate a new container. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract Container iremove(int begin, int end); + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ixor(ArrayContainer x); + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ixor(BitmapContainer x); + + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public Container ixor(Container x) { + if (x instanceof ArrayContainer) { + return ixor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ixor((BitmapContainer) x); + } + return ixor((RunContainer) x); + } + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract Container ixor(RunContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. The resulting container may not track its cardinality correctly. The resulting + * container may not track its cardinality correctly. This can be fixed as follows: + * if(c.getCardinality()<0) ((BitmapContainer)c).computeCardinality(); + * + * @param x other container + * @return aggregated container + */ + public Container lazyIOR(Container x) { + if (this instanceof ArrayContainer) { + if (x instanceof ArrayContainer) { + return ((ArrayContainer) this).lazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ior((BitmapContainer) x); + } + return ((RunContainer) x).lazyor((ArrayContainer) this); + } else if (this instanceof RunContainer) { + if (x instanceof ArrayContainer) { + return ((RunContainer) this).ilazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ior((BitmapContainer) x); + } + return ior((RunContainer) x); + } else { + if (x instanceof ArrayContainer) { + return ((BitmapContainer) this).ilazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ((BitmapContainer) this).ilazyor((BitmapContainer) x); + } + return ((BitmapContainer) this).ilazyor((RunContainer) x); + } + } + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. The resulting container may not track its cardinality + * correctly. This can be fixed as follows: if(c.getCardinality()<0) + * ((BitmapContainer)c).computeCardinality(); + * + * @param x other container + * @return aggregated container + */ + public Container lazyOR(Container x) { + if (this instanceof ArrayContainer) { + if (x instanceof ArrayContainer) { + return ((ArrayContainer) this).lazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ((BitmapContainer) x).lazyor((ArrayContainer) this); + } + return ((RunContainer) x).lazyor((ArrayContainer) this); + } else if (this instanceof RunContainer) { + if (x instanceof ArrayContainer) { + return ((RunContainer) this).lazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ((BitmapContainer) x).lazyor((RunContainer) this); + } + return or((RunContainer) x); + } else { + if (x instanceof ArrayContainer) { + return ((BitmapContainer) this).lazyor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return ((BitmapContainer) this).lazyor((BitmapContainer) x); + } + return ((BitmapContainer) this).lazyor((RunContainer) x); + } + } + + /** + * Create a new Container containing at most maxcardinality integers. + * + * @param maxcardinality maximal cardinality + * @return a new bitmap with cardinality no more than maxcardinality + */ + public abstract Container limit(int maxcardinality); + + /** + * Computes the bitwise NOT of this container (complement). Only those bits within the range are + * affected. The current container is left unaffected. + * + * @param rangeStart beginning of range (inclusive); 0 is beginning of this container. + * @param rangeEnd ending of range (exclusive) + * @return (partially) complemented container + */ + public abstract Container not(int rangeStart, int rangeEnd); + + abstract int numberOfRuns(); // exact + + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container or(ArrayContainer x); + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container or(BitmapContainer x); + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public Container or(Container x) { + if (x instanceof ArrayContainer) { + return or((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return or((BitmapContainer) x); + } + return or((RunContainer) x); + } + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container or(RunContainer x); + + + /** + * Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be + * GetCardinality()). + * + * @param lowbits upper limit + * @return the rank + */ + public abstract int rank(short lowbits); + + /** + * Return a new container with all shorts in [begin,end) remove using an unsigned interpretation. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract Container remove(int begin, int end); + + /** + * Remove the short from this container. May create a new container. + * + * @param x to be removed + * @return New container + */ + public abstract Container remove(short x); + + /** + * The output of a lazyOR or lazyIOR might be an invalid container, this should be called on it. + * + * @return a new valid container + */ + public abstract Container repairAfterLazy(); + + /** + * Convert to RunContainers, when the result is smaller. Overridden by RunContainer to possibility + * switch from RunContainer to a smaller alternative. Overridden by BitmapContainer with a more + * efficient approach. + * + * @return the new container + */ + public abstract Container runOptimize(); + + /** + * Return the jth value + * + * @param j index of the value + * @return the value + */ + public abstract short select(int j); + + /** + * Serialize the container. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public abstract void serialize(DataOutput out) throws IOException; + + /** + * Report the number of bytes required to serialize this container. + * + * @return the size in bytes + */ + public abstract int serializedSizeInBytes(); + + /** + * Convert to a mappeable container. + * + * @return the mappeable container + */ + public abstract MappeableContainer toMappeableContainer(); + + + /** + * If possible, recover wasted memory. + */ + public abstract void trim(); + + /** + * Write just the underlying array. + * + * @param out output stream + * @throws IOException in case of failure + */ + protected abstract void writeArray(DataOutput out) throws IOException; + + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container xor(ArrayContainer x); + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container xor(BitmapContainer x); + + + /** + * Computes the bitwise OR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other parameter + * @return aggregated container + */ + public Container xor(Container x) { + if (x instanceof ArrayContainer) { + return xor((ArrayContainer) x); + } else if (x instanceof BitmapContainer) { + return xor((BitmapContainer) x); + } + return xor((RunContainer) x); + } + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract Container xor(RunContainer x); + + /** + * Convert the current container to a BitmapContainer, if a conversion is needed. + * If the container is already a bitmap, the container is returned unchanged. + * + * @return a bitmap container + */ + public abstract BitmapContainer toBitmapContainer(); +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ContainerPointer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ContainerPointer.java new file mode 100644 index 000000000..59bbe841f --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ContainerPointer.java @@ -0,0 +1,60 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * This interface allows you to iterate over the containers in a roaring bitmap. + */ +public interface ContainerPointer extends Comparable, Cloneable { + /** + * Move to the next container + */ + void advance(); + + + /** + * Create a copy + * + * @return return a clone of this pointer + */ + ContainerPointer clone(); + + /** + * Return the cardinality of the current container + * + * @return the cardinality + */ + int getCardinality(); + + /** + * This method can be used to check whether there is current a valid container as it returns null + * when there is not. + * + * @return null or the current container + */ + Container getContainer(); + + /** + * Check whether the current container is a bitmap container. + * + * @return whether it is a bitmap container + */ + boolean isBitmapContainer(); + + /** + * Check whether the current container is a run container. + * + * @return whether it is a run container + */ + boolean isRunContainer(); + + /** + * The key is a 16-bit integer that indicates the position of the container in the roaring bitmap. + * To be interpreted as an unsigned integer. + * + * @return the key + */ + short key(); +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/FastAggregation.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/FastAggregation.java new file mode 100644 index 000000000..5a301db3c --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/FastAggregation.java @@ -0,0 +1,532 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.PriorityQueue; + + +/** + * Fast algorithms to aggregate many bitmaps. + * + * @author Daniel Lemire + */ +public final class FastAggregation { + + + /** + * Private constructor to prevent instantiation of utility class + */ + private FastAggregation() { + } + + /** + * Compute the AND aggregate. + *

+ * In practice, calls {#link naive_and} + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap and(Iterator bitmaps) { + return naive_and(bitmaps); + } + + /** + * Compute the AND aggregate. + *

+ * In practice, calls {#link naive_and} + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap and(RoaringBitmap... bitmaps) { + return naive_and(bitmaps); + } + + /** + * Calls naive_or. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + @Deprecated + public static RoaringBitmap horizontal_or(Iterator bitmaps) { + return naive_or(bitmaps); + } + + /** + * Minimizes memory usage while computing the or aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #or(RoaringBitmap...) + */ + public static RoaringBitmap horizontal_or(List bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + if (bitmaps.isEmpty()) { + return answer; + } + PriorityQueue pq = new PriorityQueue(bitmaps.size()); + for (int k = 0; k < bitmaps.size(); ++k) { + ContainerPointer x = bitmaps.get(k).highLowContainer.getContainerPointer(); + if (x.getContainer() != null) { + pq.add(x); + } + } + + while (!pq.isEmpty()) { + ContainerPointer x1 = pq.poll(); + if (pq.isEmpty() || (pq.peek().key() != x1.key())) { + answer.highLowContainer.append(x1.key(), x1.getContainer().clone()); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + continue; + } + ContainerPointer x2 = pq.poll(); + Container newc = x1.getContainer().lazyOR(x2.getContainer()); + while (!pq.isEmpty() && (pq.peek().key() == x1.key())) { + + ContainerPointer x = pq.poll(); + newc = newc.lazyIOR(x.getContainer()); + x.advance(); + if (x.getContainer() != null) { + pq.add(x); + } else if (pq.isEmpty()) { + break; + } + } + newc = newc.repairAfterLazy(); + answer.highLowContainer.append(x1.key(), newc); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + x2.advance(); + if (x2.getContainer() != null) { + pq.add(x2); + } + } + return answer; + } + + /** + * Minimizes memory usage while computing the or aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #or(RoaringBitmap...) + */ + public static RoaringBitmap horizontal_or(RoaringBitmap... bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + if (bitmaps.length == 0) { + return answer; + } + PriorityQueue pq = new PriorityQueue(bitmaps.length); + for (int k = 0; k < bitmaps.length; ++k) { + ContainerPointer x = bitmaps[k].highLowContainer.getContainerPointer(); + if (x.getContainer() != null) { + pq.add(x); + } + } + + while (!pq.isEmpty()) { + ContainerPointer x1 = pq.poll(); + if (pq.isEmpty() || (pq.peek().key() != x1.key())) { + answer.highLowContainer.append(x1.key(), x1.getContainer().clone()); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + continue; + } + ContainerPointer x2 = pq.poll(); + Container newc = x1.getContainer().lazyOR(x2.getContainer()); + while (!pq.isEmpty() && (pq.peek().key() == x1.key())) { + + ContainerPointer x = pq.poll(); + newc = newc.lazyIOR(x.getContainer()); + x.advance(); + if (x.getContainer() != null) { + pq.add(x); + } else if (pq.isEmpty()) { + break; + } + } + newc = newc.repairAfterLazy(); + answer.highLowContainer.append(x1.key(), newc); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + x2.advance(); + if (x2.getContainer() != null) { + pq.add(x2); + } + } + return answer; + } + + /** + * Minimizes memory usage while computing the xor aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #xor(RoaringBitmap...) + */ + public static RoaringBitmap horizontal_xor(RoaringBitmap... bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + if (bitmaps.length == 0) { + return answer; + } + PriorityQueue pq = new PriorityQueue(bitmaps.length); + for (int k = 0; k < bitmaps.length; ++k) { + ContainerPointer x = bitmaps[k].highLowContainer.getContainerPointer(); + if (x.getContainer() != null) { + pq.add(x); + } + } + + while (!pq.isEmpty()) { + ContainerPointer x1 = pq.poll(); + if (pq.isEmpty() || (pq.peek().key() != x1.key())) { + answer.highLowContainer.append(x1.key(), x1.getContainer().clone()); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + continue; + } + ContainerPointer x2 = pq.poll(); + Container newc = x1.getContainer().xor(x2.getContainer()); + while (!pq.isEmpty() && (pq.peek().key() == x1.key())) { + ContainerPointer x = pq.poll(); + newc = newc.ixor(x.getContainer()); + x.advance(); + if (x.getContainer() != null) { + pq.add(x); + } else if (pq.isEmpty()) { + break; + } + } + answer.highLowContainer.append(x1.key(), newc); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + x2.advance(); + if (x2.getContainer() != null) { + pq.add(x2); + } + } + return answer; + } + + /** + * Compute overall AND between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_and(Iterator bitmaps) { + if (!bitmaps.hasNext()) { + return new RoaringBitmap(); + } + RoaringBitmap answer = bitmaps.next().clone(); + while (bitmaps.hasNext()) { + answer.and(bitmaps.next()); + } + return answer; + } + + /** + * Compute overall AND between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_and(RoaringBitmap... bitmaps) { + if (bitmaps.length == 0) { + return new RoaringBitmap(); + } + RoaringBitmap answer = bitmaps[0].clone(); + for (int k = 1; k < bitmaps.length; ++k) { + answer.and(bitmaps[k]); + } + return answer; + } + + /** + * Compute overall OR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_or(Iterator bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + while (bitmaps.hasNext()) { + answer.naivelazyor(bitmaps.next()); + } + answer.repairAfterLazy(); + return answer; + } + + /** + * Compute overall OR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_or(RoaringBitmap... bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.naivelazyor(bitmaps[k]); + } + answer.repairAfterLazy(); + return answer; + } + + /** + * Compute overall XOR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_xor(Iterator bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + while (bitmaps.hasNext()) { + answer.xor(bitmaps.next()); + } + return answer; + } + + /** + * Compute overall XOR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap naive_xor(RoaringBitmap... bitmaps) { + RoaringBitmap answer = new RoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.xor(bitmaps[k]); + } + return answer; + } + + /** + * Compute overall OR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap or(Iterator bitmaps) { + return naive_or(bitmaps); + } + + /** + * Compute overall OR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap or(RoaringBitmap... bitmaps) { + return naive_or(bitmaps); + } + + /** + * Uses a priority queue to compute the or aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_or(RoaringBitmap...) + */ + public static RoaringBitmap priorityqueue_or(Iterator bitmaps) { + if (!bitmaps.hasNext()) { + return new RoaringBitmap(); + } + // we buffer the call to getSizeInBytes(), hence the code complexity + ArrayList buffer = new ArrayList(); + while (bitmaps.hasNext()) { + buffer.add(bitmaps.next()); + } + final long[] sizes = new long[buffer.size()]; + final boolean[] istmp = new boolean[buffer.size()]; + for (int k = 0; k < sizes.length; ++k) { + sizes[k] = buffer.get(k).getLongSizeInBytes(); + } + PriorityQueue pq = new PriorityQueue(128, new Comparator() { + @Override + public int compare(Integer a, Integer b) { + return (int) (sizes[a] - sizes[b]); + } + }); + for (int k = 0; k < sizes.length; ++k) { + pq.add(k); + } + while (pq.size() > 1) { + Integer x1 = pq.poll(); + Integer x2 = pq.poll(); + if (istmp[x2] && istmp[x1]) { + buffer.set(x1, RoaringBitmap.lazyorfromlazyinputs(buffer.get(x1), buffer.get(x2))); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } else if (istmp[x2]) { + buffer.get(x2).lazyor(buffer.get(x1)); + sizes[x2] = buffer.get(x2).getLongSizeInBytes(); + pq.add(x2); + } else if (istmp[x1]) { + buffer.get(x1).lazyor(buffer.get(x2)); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + pq.add(x1); + } else { + buffer.set(x1, RoaringBitmap.lazyor(buffer.get(x1), buffer.get(x2))); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } + } + RoaringBitmap answer = buffer.get(pq.poll()); + answer.repairAfterLazy(); + return answer; + } + + /** + * Uses a priority queue to compute the or aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_or(RoaringBitmap...) + */ + public static RoaringBitmap priorityqueue_or(RoaringBitmap... bitmaps) { + if (bitmaps.length == 0) { + return new RoaringBitmap(); + } + // we buffer the call to getSizeInBytes(), hence the code complexity + final RoaringBitmap[] buffer = Arrays.copyOf(bitmaps, bitmaps.length); + final long[] sizes = new long[buffer.length]; + final boolean[] istmp = new boolean[buffer.length]; + for (int k = 0; k < sizes.length; ++k) { + sizes[k] = buffer[k].getLongSizeInBytes(); + } + PriorityQueue pq = new PriorityQueue(128, new Comparator() { + @Override + public int compare(Integer a, Integer b) { + return (int) (sizes[a] - sizes[b]); + } + }); + for (int k = 0; k < sizes.length; ++k) { + pq.add(k); + } + while (pq.size() > 1) { + Integer x1 = pq.poll(); + Integer x2 = pq.poll(); + if (istmp[x2] && istmp[x1]) { + buffer[x1] = RoaringBitmap.lazyorfromlazyinputs(buffer[x1], buffer[x2]); + sizes[x1] = buffer[x1].getLongSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } else if (istmp[x2]) { + buffer[x2].lazyor(buffer[x1]); + sizes[x2] = buffer[x2].getLongSizeInBytes(); + pq.add(x2); + } else if (istmp[x1]) { + buffer[x1].lazyor(buffer[x2]); + sizes[x1] = buffer[x1].getLongSizeInBytes(); + pq.add(x1); + } else { + buffer[x1] = RoaringBitmap.lazyor(buffer[x1], buffer[x2]); + sizes[x1] = buffer[x1].getLongSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } + } + RoaringBitmap answer = buffer[pq.poll()]; + answer.repairAfterLazy(); + return answer; + } + + /** + * Uses a priority queue to compute the xor aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_xor(RoaringBitmap...) + */ + public static RoaringBitmap priorityqueue_xor(RoaringBitmap... bitmaps) { + // TODO: This code could be faster, see priorityqueue_or + if (bitmaps.length == 0) { + return new RoaringBitmap(); + } + + PriorityQueue pq = + new PriorityQueue(bitmaps.length, new Comparator() { + @Override + public int compare(RoaringBitmap a, RoaringBitmap b) { + return (int) (a.getLongSizeInBytes() - b.getLongSizeInBytes()); + } + }); + Collections.addAll(pq, bitmaps); + while (pq.size() > 1) { + RoaringBitmap x1 = pq.poll(); + RoaringBitmap x2 = pq.poll(); + pq.add(RoaringBitmap.xor(x1, x2)); + } + return pq.poll(); + } + + /** + * Compute overall XOR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap xor(Iterator bitmaps) { + return naive_xor(bitmaps); + } + + /** + * Compute overall XOR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap xor(RoaringBitmap... bitmaps) { + return naive_xor(bitmaps); + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ImmutableBitmapDataProvider.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ImmutableBitmapDataProvider.java new file mode 100644 index 000000000..f1cdd5d4f --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ImmutableBitmapDataProvider.java @@ -0,0 +1,157 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import java.io.DataOutput; +import java.io.IOException; + +/** + * Interface representing an immutable bitmap. + */ +public interface ImmutableBitmapDataProvider { + /** + * Checks whether the value in included, which is equivalent to checking if the corresponding bit + * is set (get in BitSet class). + * + * @param x integer value + * @return whether the integer value is included. + */ + public boolean contains(int x); + + /** + * Returns the number of distinct integers added to the bitmap (e.g., number of bits set). + * Internally, this is computed as a 64-bit number. + * + * @return the cardinality + */ + public int getCardinality(); + + /** + * Returns the number of distinct integers added to the bitmap (e.g., number of bits set). + * This returns a full 64-bit result. + * + * @return the cardinality + */ + public long getLongCardinality(); + + /** + * Visit all values in the bitmap and pass them to the consumer. + *

+ * * Usage: + *

+     * {@code
+     *  bitmap.forEach(new IntConsumer() {
+     *
+     *    {@literal @}Override
+     *    public void accept(int value) {
+     *      // do something here
+     *
+     *    }});
+     *   }
+     * }
+     * 
+ * + * @param ic the consumer + */ + public void forEach(IntConsumer ic); + + /** + * For better performance, consider the Use the {@link #forEach forEach} method. + * + * @return a custom iterator over set bits, the bits are traversed in ascending sorted order + */ + public PeekableIntIterator getIntIterator(); + + /** + * @return a custom iterator over set bits, the bits are traversed in descending sorted order + */ + public IntIterator getReverseIntIterator(); + + /** + * Estimate of the memory usage of this data structure. + *

+ * Internally, this is computed as a 64-bit counter. + * + * @return estimated memory usage. + */ + public int getSizeInBytes(); + + /** + * Estimate of the memory usage of this data structure. Provides + * full 64-bit number. + * + * @return estimated memory usage. + */ + public long getLongSizeInBytes(); + + /** + * Checks whether the bitmap is empty. + * + * @return true if this bitmap contains no set bit + */ + public boolean isEmpty(); + + /** + * Create a new bitmap of the same class, containing at most maxcardinality integers. + * + * @param x maximal cardinality + * @return a new bitmap with cardinality no more than maxcardinality + */ + public ImmutableBitmapDataProvider limit(int x); + + /** + * Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be + * GetCardinality()). + *

+ * The value is internally computed as a 64-bit number. + * + * @param x upper limit + * @return the rank + */ + public int rank(int x); + + /** + * Same as "rank" but produces a full 64-bit value. + * + * @param x upper limit + * @return the rank + */ + public long rankLong(int x); + + /** + * Return the jth value stored in this bitmap. + * + * @param j index of the value + * @return the value + */ + public int select(int j); + + /** + * Serialize this bitmap. + *

+ * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException; + + /** + * Report the number of bytes required to serialize this bitmap. This is the number of bytes + * written out when using the serialize method. When using the writeExternal method, the count + * will be higher due to the overhead of Java serialization. + * + * @return the size in bytes + */ + public int serializedSizeInBytes(); + + /** + * Return the set values as an array. The integer values are in sorted order. + * + * @return array representing the set values. + */ + public int[] toArray(); + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntConsumer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntConsumer.java new file mode 100644 index 000000000..47ff1e27d --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntConsumer.java @@ -0,0 +1,29 @@ +package com.fr.third.bitmap.roaringbitmap; + +/** + * An IntConsumer receives the int values contained in a data structure. + * Each value is visited once. + *

+ * Usage: + *

+ *

+ * {@code
+ *  bitmap.forEach(new IntConsumer() {
+ *
+ *    @Override
+ *    public void accept(int value) {
+ *      // do something here
+ *
+ *    }});
+ *   }
+ * }
+ * 
+ */ +public interface IntConsumer { + /** + * Receives the integer + * + * @param value the integer value + */ + void accept(int value); +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIterator.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIterator.java new file mode 100644 index 000000000..ef86f6611 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIterator.java @@ -0,0 +1,28 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * A simple iterator over integer values + */ +public interface IntIterator extends Cloneable { + /** + * Creates a copy of the iterator. + * + * @return a clone of the current iterator + */ + IntIterator clone(); + + /** + * @return whether there is another value + */ + boolean hasNext(); + + /** + * @return next integer value + */ + int next(); + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIteratorFlyweight.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIteratorFlyweight.java new file mode 100644 index 000000000..36a91b168 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/IntIteratorFlyweight.java @@ -0,0 +1,124 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * Fast iterator minimizing the stress on the garbage collector. You can create one reusable + * instance of this class and then {@link #wrap(RoaringBitmap)} + *

+ * For better performance, consider the {@link RoaringBitmap#forEach} method. + * + * @author Borislav Ivanov + **/ +public class IntIteratorFlyweight implements PeekableIntIterator { + + private int hs; + + private PeekableShortIterator iter; + + private ArrayContainerShortIterator arrIter = new ArrayContainerShortIterator(); + + private BitmapContainerShortIterator bitmapIter = new BitmapContainerShortIterator(); + + private RunContainerShortIterator runIter = new RunContainerShortIterator(); + + private int pos; + + private RoaringBitmap roaringBitmap = null; + + /** + * Creates an instance that is not ready for iteration. You must first call + * {@link #wrap(RoaringBitmap)}. + */ + public IntIteratorFlyweight() { + + } + + /** + * Creates an instance that is ready for iteration. + * + * @param r bitmap to be iterated over + */ + public IntIteratorFlyweight(RoaringBitmap r) { + wrap(r); + } + + @Override + public PeekableIntIterator clone() { + try { + IntIteratorFlyweight x = (IntIteratorFlyweight) super.clone(); + x.iter = this.iter.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < this.roaringBitmap.highLowContainer.size(); + } + + @Override + public int next() { + int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + if (pos < this.roaringBitmap.highLowContainer.size()) { + + Container container = this.roaringBitmap.highLowContainer.getContainerAtIndex(pos); + + if (container instanceof BitmapContainer) { + bitmapIter.wrap(((BitmapContainer) container).bitmap); + iter = bitmapIter; + } else if (container instanceof ArrayContainer) { + arrIter.wrap((ArrayContainer) container); + iter = arrIter; + } else { + runIter.wrap((RunContainer) container); + iter = runIter; + } + hs = Util.toIntUnsigned(this.roaringBitmap.highLowContainer.getKeyAtIndex(pos)) << 16; + } + } + + /** + * Prepares a bitmap for iteration + * + * @param r bitmap to be iterated over + */ + public void wrap(RoaringBitmap r) { + this.hs = 0; + this.pos = 0; + this.roaringBitmap = r; + this.nextContainer(); + } + + @Override + public void advanceIfNeeded(final int minval) { + while (hasNext() && ((hs >>> 16) < (minval >>> 16))) { + ++pos; + nextContainer(); + } + if (hasNext() && ((hs >>> 16) == (minval >>> 16))) { + iter.advanceIfNeeded(Util.lowbits(minval)); + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + } + } + + @Override + public int peekNext() { + return Util.toIntUnsigned(iter.peekNext()) | hs; + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableIntIterator.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableIntIterator.java new file mode 100644 index 000000000..906f23b73 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableIntIterator.java @@ -0,0 +1,33 @@ +package com.fr.third.bitmap.roaringbitmap; + + +/** + * Simple extension to the IntIterator interface. + * It allows you to "skip" values using the advanceIfNeeded + * method, and to look at the value without advancing (peekNext). + */ +public interface PeekableIntIterator extends IntIterator { + /** + * If needed, advance as long as the next value is smaller than minval + * + * @param minval threshold + */ + public void advanceIfNeeded(int minval); + + /** + * Look at the next value without advancing + * + * @return next value + */ + public int peekNext(); + + /** + * Creates a copy of the iterator. + * + * @return a clone of the current iterator + */ + @Override + PeekableIntIterator clone(); +} + + diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableShortIterator.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableShortIterator.java new file mode 100644 index 000000000..278f0e20a --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/PeekableShortIterator.java @@ -0,0 +1,34 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ +package com.fr.third.bitmap.roaringbitmap; + + +/** + * Simple extension to the ShortIterator interface + */ +public interface PeekableShortIterator extends ShortIterator { + /** + * If needed, advance as long as the next value is smaller than minval (as an unsigned + * short) + * + * @param minval threshold + */ + public void advanceIfNeeded(short minval); + + /** + * Look at the next value without advancing + * + * @return next value + */ + public short peekNext(); + + /** + * Creates a copy of the iterator. + * + * @return a clone of the current iterator + */ + @Override + PeekableShortIterator clone(); +} + diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ReverseIntIteratorFlyweight.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ReverseIntIteratorFlyweight.java new file mode 100644 index 000000000..0475cc78d --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ReverseIntIteratorFlyweight.java @@ -0,0 +1,110 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * Fast iterator minimizing the stress on the garbage collector. You can create one reusable + * instance of this class and then {@link #wrap(RoaringBitmap)} + *

+ * This iterator enumerates the stored values in reverse (starting from the end). + * + * @author Borislav Ivanov + **/ +public class ReverseIntIteratorFlyweight implements IntIterator { + + private int hs; + + private ShortIterator iter; + + private ReverseArrayContainerShortIterator arrIter = new ReverseArrayContainerShortIterator(); + + private ReverseBitmapContainerShortIterator bitmapIter = + new ReverseBitmapContainerShortIterator(); + + private ReverseRunContainerShortIterator runIter = new ReverseRunContainerShortIterator(); + + private short pos; + + private RoaringBitmap roaringBitmap = null; + + + /** + * Creates an instance that is not ready for iteration. You must first call + * {@link #wrap(RoaringBitmap)}. + */ + public ReverseIntIteratorFlyweight() { + + } + + /** + * Creates an instance that is ready for iteration. + * + * @param r bitmap to be iterated over + */ + public ReverseIntIteratorFlyweight(RoaringBitmap r) { + wrap(r); + } + + @Override + public IntIterator clone() { + try { + ReverseIntIteratorFlyweight x = (ReverseIntIteratorFlyweight) super.clone(); + x.iter = this.iter.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + + @Override + public int next() { + final int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + --pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + + + if (pos >= 0) { + + Container container = this.roaringBitmap.highLowContainer.getContainerAtIndex(pos); + if (container instanceof BitmapContainer) { + bitmapIter.wrap(((BitmapContainer) container).bitmap); + iter = bitmapIter; + } else if (container instanceof ArrayContainer) { + arrIter.wrap((ArrayContainer) container); + iter = arrIter; + } else { + runIter.wrap((RunContainer) container); + iter = runIter; + } + hs = Util.toIntUnsigned(this.roaringBitmap.highLowContainer.getKeyAtIndex(pos)) << 16; + } + } + + /** + * Prepares a bitmap for iteration + * + * @param r bitmap to be iterated over + */ + public void wrap(RoaringBitmap r) { + this.roaringBitmap = r; + this.hs = 0; + this.pos = (short) (this.roaringBitmap.highLowContainer.size() - 1); + this.nextContainer(); + } + +} + diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringArray.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringArray.java new file mode 100644 index 000000000..eebb99d77 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringArray.java @@ -0,0 +1,582 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.Arrays; + + +/** + * Specialized array to store the containers used by a RoaringBitmap. This is not meant to be used + * by end users. + */ +public final class RoaringArray implements Cloneable, Externalizable { + protected static final short SERIAL_COOKIE_NO_RUNCONTAINER = 12346; + protected static final short SERIAL_COOKIE = 12347; + protected static final int NO_OFFSET_THRESHOLD = 4; + static final int INITIAL_CAPACITY = 4; + // bumped serialVersionUID with runcontainers, so default serialization + // will not work... + private static final long serialVersionUID = 8L; + short[] keys = null; + + Container[] values = null; + + int size = 0; + + protected RoaringArray() { + this.keys = new short[INITIAL_CAPACITY]; + this.values = new Container[INITIAL_CAPACITY]; + } + + /** + * Find the smallest integer index larger than pos such that array[index].key>=x. If none can + * be found, return size. Based on code by O. Kaser. + * + * @param x minimal value + * @param pos index to exceed + * @return the smallest index greater than pos such that array[index].key is at least as large as + * min, or size if it is not possible. + */ + protected int advanceUntil(short x, int pos) { + int lower = pos + 1; + + // special handling for a possibly common sequential case + if (lower >= size || Util.toIntUnsigned(keys[lower]) >= Util.toIntUnsigned(x)) { + return lower; + } + + int spansize = 1; // could set larger + // bootstrap an upper limit + + while (lower + spansize < size + && Util.toIntUnsigned(keys[lower + spansize]) < Util.toIntUnsigned(x)) { + spansize *= 2; // hoping for compiler will reduce to shift + } + int upper = (lower + spansize < size) ? lower + spansize : size - 1; + + // maybe we are lucky (could be common case when the seek ahead + // expected to be small and sequential will otherwise make us look bad) + if (keys[upper] == x) { + return upper; + } + + if (Util.toIntUnsigned(keys[upper]) < Util.toIntUnsigned(x)) {// means array has no item key >= + // x + return size; + } + + // we know that the next-smallest span was too small + lower += (spansize / 2); + + // else begin binary search + // invariant: array[lower]x + while (lower + 1 != upper) { + int mid = (lower + upper) / 2; + if (keys[mid] == x) { + return mid; + } else if (Util.toIntUnsigned(keys[mid]) < Util.toIntUnsigned(x)) { + lower = mid; + } else { + upper = mid; + } + } + return upper; + } + + protected void append(short key, Container value) { + extendArray(1); + this.keys[this.size] = key; + this.values[this.size] = value; + this.size++; + } + + /** + * Append copies of the values AFTER a specified key (may or may not be present) to end. + * + * @param sa other array + * @param beforeStart given key is the largest key that we won't copy + */ + protected void appendCopiesAfter(RoaringArray sa, short beforeStart) { + int startLocation = sa.getIndex(beforeStart); + if (startLocation >= 0) { + startLocation++; + } else { + startLocation = -startLocation - 1; + } + extendArray(sa.size - startLocation); + + for (int i = startLocation; i < sa.size; ++i) { + this.keys[this.size] = sa.keys[i]; + this.values[this.size] = sa.values[i].clone(); + this.size++; + } + } + + /** + * Append copies of the values from another array, from the start + * + * @param sourceArray The array to copy from + * @param stoppingKey any equal or larger key in other array will terminate copying + */ + protected void appendCopiesUntil(RoaringArray sourceArray, short stoppingKey) { + int stopKey = Util.toIntUnsigned(stoppingKey); + for (int i = 0; i < sourceArray.size; ++i) { + if (Util.toIntUnsigned(sourceArray.keys[i]) >= stopKey) { + break; + } + extendArray(1); + this.keys[this.size] = sourceArray.keys[i]; + this.values[this.size] = sourceArray.values[i].clone(); + this.size++; + } + } + + /** + * Append copy of the one value from another array + * + * @param sa other array + * @param index index in the other array + */ + protected void appendCopy(RoaringArray sa, int index) { + extendArray(1); + this.keys[this.size] = sa.keys[index]; + this.values[this.size] = sa.values[index].clone(); + this.size++; + } + + /** + * Append copies of the values from another array + * + * @param sa other array + * @param startingIndex starting index in the other array + * @param end endingIndex (exclusive) in the other array + */ + protected void appendCopy(RoaringArray sa, int startingIndex, int end) { + extendArray(end - startingIndex); + for (int i = startingIndex; i < end; ++i) { + this.keys[this.size] = sa.keys[i]; + this.values[this.size] = sa.values[i].clone(); + this.size++; + } + } + + + /** + * Append the values from another array, no copy is made (use with care) + * + * @param sa other array + * @param startingIndex starting index in the other array + * @param end endingIndex (exclusive) in the other array + */ + protected void append(RoaringArray sa, int startingIndex, int end) { + extendArray(end - startingIndex); + for (int i = startingIndex; i < end; ++i) { + this.keys[this.size] = sa.keys[i]; + this.values[this.size] = sa.values[i]; + this.size++; + } + } + + + private int binarySearch(int begin, int end, short key) { + return Util.unsignedBinarySearch(keys, begin, end, key); + } + + protected void clear() { + this.keys = null; + this.values = null; + this.size = 0; + } + + @Override + public RoaringArray clone() throws CloneNotSupportedException { + RoaringArray sa; + sa = (RoaringArray) super.clone(); + sa.keys = Arrays.copyOf(this.keys, this.size); + sa.values = Arrays.copyOf(this.values, this.size); + for (int k = 0; k < this.size; ++k) { + sa.values[k] = sa.values[k].clone(); + } + sa.size = this.size; + return sa; + } + + protected void copyRange(int begin, int end, int newBegin) { + // assuming begin <= end and newBegin < begin + final int range = end - begin; + System.arraycopy(this.keys, begin, this.keys, newBegin, range); + System.arraycopy(this.values, begin, this.values, newBegin, range); + } + + /** + * Deserialize. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.clear(); + // little endian + final int cookie = Integer.reverseBytes(in.readInt()); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + throw new IOException("I failed to find one of the right cookies."); + } + this.size = ((cookie & 0xFFFF) == SERIAL_COOKIE) ? (cookie >>> 16) + 1 + : Integer.reverseBytes(in.readInt()); + + if ((this.keys == null) || (this.keys.length < this.size)) { + this.keys = new short[this.size]; + this.values = new Container[this.size]; + } + + + byte[] bitmapOfRunContainers = null; + boolean hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + bitmapOfRunContainers = new byte[(size + 7) / 8]; + in.readFully(bitmapOfRunContainers); + } + + final short keys[] = new short[this.size]; + final int cardinalities[] = new int[this.size]; + final boolean isBitmap[] = new boolean[this.size]; + for (int k = 0; k < this.size; ++k) { + keys[k] = Short.reverseBytes(in.readShort()); + cardinalities[k] = 1 + (0xFFFF & Short.reverseBytes(in.readShort())); + + isBitmap[k] = cardinalities[k] > ArrayContainer.DEFAULT_MAX_SIZE; + if (bitmapOfRunContainers != null && (bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isBitmap[k] = false; + } + } + if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + in.skipBytes(this.size * 4); + } + // Reading the containers + for (int k = 0; k < this.size; ++k) { + Container val; + if (isBitmap[k]) { + final long[] bitmapArray = new long[BitmapContainer.MAX_CAPACITY / 64]; + // little endian + for (int l = 0; l < bitmapArray.length; ++l) { + bitmapArray[l] = Long.reverseBytes(in.readLong()); + } + val = new BitmapContainer(bitmapArray, cardinalities[k]); + } else if (bitmapOfRunContainers != null + && ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0)) { + // cf RunContainer.writeArray() + int nbrruns = Util.toIntUnsigned(Short.reverseBytes(in.readShort())); + final short lengthsAndValues[] = new short[2 * nbrruns]; + + for (int j = 0; j < 2 * nbrruns; ++j) { + lengthsAndValues[j] = Short.reverseBytes(in.readShort()); + } + val = new RunContainer(lengthsAndValues, nbrruns); + } else { + final short[] shortArray = new short[cardinalities[k]]; + for (int l = 0; l < shortArray.length; ++l) { + shortArray[l] = Short.reverseBytes(in.readShort()); + } + val = new ArrayContainer(shortArray); + } + this.keys[k] = keys[k]; + this.values[k] = val; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof RoaringArray) { + RoaringArray srb = (RoaringArray) o; + if (srb.size != this.size) { + return false; + } + for (int i = 0; i < srb.size; ++i) { + if (this.keys[i] != srb.keys[i] || !this.values[i].equals(srb.values[i])) { + return false; + } + } + return true; + } + return false; + } + + // make sure there is capacity for at least k more elements + protected void extendArray(int k) { + // size + 1 could overflow + if (this.size + k >= this.keys.length) { + int newCapacity; + if (this.keys.length < 1024) { + newCapacity = 2 * (this.size + k); + } else { + newCapacity = 5 * (this.size + k) / 4; + } + this.keys = Arrays.copyOf(this.keys, newCapacity); + this.values = Arrays.copyOf(this.values, newCapacity); + } + } + + // involves a binary search + protected Container getContainer(short x) { + int i = this.binarySearch(0, size, x); + if (i < 0) { + return null; + } + return this.values[i]; + } + + protected Container getContainerAtIndex(int i) { + return this.values[i]; + } + + /** + * Create a ContainerPointer for this RoaringArray + * + * @return a ContainerPointer + */ + public ContainerPointer getContainerPointer() { + return getContainerPointer(0); + } + + /** + * Create a ContainerPointer for this RoaringArray + * + * @param startIndex starting index in the container list + * @return a ContainerPointer + */ + public ContainerPointer getContainerPointer(final int startIndex) { + return new ContainerPointer() { + int k = startIndex; + + @Override + public void advance() { + ++k; + + } + + @Override + public ContainerPointer clone() { + try { + return (ContainerPointer) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public int compareTo(ContainerPointer o) { + if (key() != o.key()) { + return Util.toIntUnsigned(key()) - Util.toIntUnsigned(o.key()); + } + return o.getCardinality() - getCardinality(); + } + + @Override + public int getCardinality() { + return getContainer().getCardinality(); + } + + @Override + public Container getContainer() { + if (k >= RoaringArray.this.size) { + return null; + } + return RoaringArray.this.values[k]; + } + + + @Override + public boolean isBitmapContainer() { + return getContainer() instanceof BitmapContainer; + } + + @Override + public boolean isRunContainer() { + return getContainer() instanceof RunContainer; + } + + + @Override + public short key() { + return RoaringArray.this.keys[k]; + + } + }; + } + + // involves a binary search + protected int getIndex(short x) { + // before the binary search, we optimize for frequent cases + if ((size == 0) || (keys[size - 1] == x)) { + return size - 1; + } + // no luck we have to go through the list + return this.binarySearch(0, size, x); + } + + protected short getKeyAtIndex(int i) { + return this.keys[i]; + } + + @Override + public int hashCode() { + int hashvalue = 0; + for (int k = 0; k < this.size; ++k) { + hashvalue = 31 * hashvalue + keys[k] * 0xF0F0F0 + values[k].hashCode(); + } + return hashvalue; + } + + boolean hasRunContainer() { + for (int k = 0; k < size; ++k) { + Container ck = values[k]; + if (ck instanceof RunContainer) { + return true; + } + } + return false; + } + + protected int headerSize() { + if (hasRunContainer()) { + if (size < NO_OFFSET_THRESHOLD) {// for small bitmaps, we omit the offsets + return 4 + (size + 7) / 8 + 4 * size; + } + return 4 + (size + 7) / 8 + 8 * size;// - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * size; + } + } + + + // insert a new key, it is assumed that it does not exist + protected void insertNewKeyValueAt(int i, short key, Container value) { + extendArray(1); + System.arraycopy(keys, i, keys, i + 1, size - i); + keys[i] = key; + System.arraycopy(values, i, values, i + 1, size - i); + values[i] = value; + size++; + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + protected void removeAtIndex(int i) { + System.arraycopy(keys, i + 1, keys, i, size - i - 1); + keys[size - 1] = 0; + System.arraycopy(values, i + 1, values, i, size - i - 1); + values[size - 1] = null; + size--; + } + + protected void removeIndexRange(int begin, int end) { + if (end <= begin) { + return; + } + final int range = end - begin; + System.arraycopy(keys, end, keys, begin, size - end); + System.arraycopy(values, end, values, begin, size - end); + for (int i = 1; i <= range; ++i) { + keys[size - i] = 0; + values[size - i] = null; + } + size -= range; + } + + protected void replaceKeyAndContainerAtIndex(int i, short key, Container c) { + this.keys[i] = key; + this.values[i] = c; + } + + protected void resize(int newLength) { + Arrays.fill(this.keys, newLength, this.size, (short) 0); + Arrays.fill(this.values, newLength, this.size, null); + this.size = newLength; + } + + + /** + * Serialize. + *

+ * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException { + int startOffset = 0; + boolean hasrun = hasRunContainer(); + if (hasrun) { + out.writeInt(Integer.reverseBytes(SERIAL_COOKIE | ((size - 1) << 16))); + byte[] bitmapOfRunContainers = new byte[(size + 7) / 8]; + for (int i = 0; i < size; ++i) { + if (this.values[i] instanceof RunContainer) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); + } + } + out.write(bitmapOfRunContainers); + if (this.size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * this.size + bitmapOfRunContainers.length; + } else { + startOffset = 4 + 8 * this.size + bitmapOfRunContainers.length; + } + } else { // backwards compatibility + out.writeInt(Integer.reverseBytes(SERIAL_COOKIE_NO_RUNCONTAINER)); + out.writeInt(Integer.reverseBytes(size)); + startOffset = 4 + 4 + 4 * this.size + 4 * this.size; + } + for (int k = 0; k < size; ++k) { + out.writeShort(Short.reverseBytes(this.keys[k])); + out.writeShort(Short.reverseBytes((short) (this.values[k].getCardinality() - 1))); + } + if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) { + // writing the containers offsets + for (int k = 0; k < this.size; k++) { + out.writeInt(Integer.reverseBytes(startOffset)); + startOffset = startOffset + this.values[k].getArraySizeInBytes(); + } + } + for (int k = 0; k < size; ++k) { + values[k].writeArray(out); + } + } + + /** + * Report the number of bytes required for serialization. + * + * @return the size in bytes + */ + public int serializedSizeInBytes() { + int count = headerSize(); + for (int k = 0; k < size; ++k) { + count += values[k].getArraySizeInBytes(); + } + return count; + } + + protected void setContainerAtIndex(int i, Container c) { + this.values[i] = c; + } + + protected int size() { + return this.size; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringBitmap.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringBitmap.java new file mode 100644 index 000000000..e300cb8b8 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RoaringBitmap.java @@ -0,0 +1,2205 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +import com.fr.third.bitmap.roaringbitmap.buffer.ImmutableRoaringBitmap; +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableContainerPointer; +import com.fr.third.bitmap.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.io.Serializable; +import java.util.Iterator; + + +/** + * RoaringBitmap, a compressed alternative to the BitSet. + *

+ *

+ * {@code
+ *      import com.fr.swift.bitmap.roaringbitmap.*;
+ *
+ *      //...
+ *
+ *      RoaringBitmap rr = RoaringBitmap.bitmapOf(1,2,3,1000);
+ *      RoaringBitmap rr2 = new RoaringBitmap();
+ *      for(int k = 4000; k<4255;++k) rr2.add(k);
+ *      RoaringBitmap rror = RoaringBitmap.or(rr, rr2);
+ *
+ *      //...
+ *      DataOutputStream wheretoserialize = ...
+ *      rr.runOptimize(); // can help compression
+ *      rr.serialize(wheretoserialize);
+ * }
+ * 
+ *

+ * Integers are added in unsigned sorted order. That is, they are treated as unsigned integers (see + * Java 8's Integer.toUnsignedLong function). + *

+ * Bitmaps are limited to a maximum of Integer.MAX_VALUE entries. Trying to create larger bitmaps + * could result in undefined behaviors. + */ + + +public class RoaringBitmap implements Cloneable, Serializable, Iterable, Externalizable, + ImmutableBitmapDataProvider { + + private static final long serialVersionUID = 6L; + RoaringArray highLowContainer = null; + + /** + * Create an empty bitmap + */ + public RoaringBitmap() { + highLowContainer = new RoaringArray(); + } + + /** + * Create a RoaringBitmap from a MutableRoaringBitmap or ImmutableRoaringBitmap. The source is not + * modified. + * + * @param rb the original bitmap + */ + public RoaringBitmap(ImmutableRoaringBitmap rb) { + highLowContainer = new RoaringArray(); + MappeableContainerPointer cp = rb.getContainerPointer(); + while (cp.getContainer() != null) { + highLowContainer.append(cp.key(), cp.getContainer().toContainer()); + cp.advance(); + } + } + + private static void rangeSanityCheck(final long rangeStart, final long rangeEnd) { + if (rangeStart < 0 || rangeStart > (1L << 32) - 1) { + throw new IllegalArgumentException("rangeStart=" + rangeStart + + " should be in [0, 0xffffffff]"); + } + if (rangeEnd > (1L << 32) || rangeEnd < 0) { + throw new IllegalArgumentException("rangeEnd=" + rangeEnd + + " should be in [0, 0xffffffff + 1]"); + } + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) added. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + */ + public static RoaringBitmap add(RoaringBitmap rb, final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return rb.clone(); // empty range + } + + + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + + RoaringBitmap answer = new RoaringBitmap(); + answer.highLowContainer.appendCopiesUntil(rb.highLowContainer, (short) hbStart); + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + final Container c = + i >= 0 ? rb.highLowContainer.getContainerAtIndex(i).add(lbStart, lbLast + 1) + : Container.rangeOfOnes(lbStart, lbLast + 1); + answer.highLowContainer.append((short) hbStart, c); + answer.highLowContainer.appendCopiesAfter(rb.highLowContainer, (short) hbLast); + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + + { + final Container c = ifirst >= 0 + ? rb.highLowContainer.getContainerAtIndex(ifirst).add(lbStart, + Util.maxLowBitAsInteger() + 1) + : Container.rangeOfOnes(lbStart, Util.maxLowBitAsInteger() + 1); + answer.highLowContainer.append((short) hbStart, c); + } + for (int hb = hbStart + 1; hb < hbLast; ++hb) { + Container c = Container.rangeOfOnes(0, Util.maxLowBitAsInteger() + 1); + answer.highLowContainer.append((short) hb, c); + } + { + final Container c = + ilast >= 0 ? rb.highLowContainer.getContainerAtIndex(ilast).add(0, lbLast + 1) + : Container.rangeOfOnes(0, lbLast + 1); + answer.highLowContainer.append((short) hbLast, c); + } + answer.highLowContainer.appendCopiesAfter(rb.highLowContainer, (short) hbLast); + return answer; + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) added. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static RoaringBitmap add(RoaringBitmap rb, final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return add(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return add(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Bitwise AND (intersection) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see FastAggregation#and(RoaringBitmap...) + */ + public static RoaringBitmap and(final RoaringBitmap x1, final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final Container c = c1.and(c2); + if (c.getCardinality() > 0) { + answer.highLowContainer.append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return answer; + } + + /** + * Cardinality of Bitwise AND (intersection) operation. The provided bitmaps are *not* modified. + * This operation is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return as if you did and(x2,x2).getCardinality() + * @see FastAggregation#and(RoaringBitmap...) + */ + public static int andCardinality(final RoaringBitmap x1, final RoaringBitmap x2) { + int answer = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + // TODO: could be made faster if we did not have to materialize container + answer += c1.andCardinality(c2); + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return answer; + } + + /** + * Bitwise ANDNOT (difference) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static RoaringBitmap andNot(final RoaringBitmap x1, final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final Container c = c1.andNot(c2); + if (c.getCardinality() > 0) { + answer.highLowContainer.append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + final int nextPos1 = x1.highLowContainer.advanceUntil(s2, pos1); + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1, nextPos1); + pos1 = nextPos1; + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos2 == length2) { + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + /** + * Generate a bitmap with the specified values set to true. The provided integers values don't + * have to be in sorted order, but it may be preferable to sort them from a performance point of + * view. + * + * @param dat set values + * @return a new bitmap + */ + public static RoaringBitmap bitmapOf(final int... dat) { + final RoaringBitmap ans = new RoaringBitmap(); + ans.add(dat); + return ans; + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param bm bitmap being negated + * @param rangeStart inclusive beginning of range, in [0, 0xffffffff] + * @param rangeEnd exclusive ending of range, in [0, 0xffffffff + 1] + * @return a new Bitmap + */ + public static RoaringBitmap flip(RoaringBitmap bm, final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return bm.clone(); + } + RoaringBitmap answer = new RoaringBitmap(); + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + + // copy the containers before the active area + answer.highLowContainer.appendCopiesUntil(bm.highLowContainer, (short) hbStart); + + for (int hb = hbStart; hb <= hbLast; ++hb) { + final int containerStart = (hb == hbStart) ? lbStart : 0; + final int containerLast = (hb == hbLast) ? lbLast : Util.maxLowBitAsInteger(); + + final int i = bm.highLowContainer.getIndex((short) hb); + final int j = answer.highLowContainer.getIndex((short) hb); + assert j < 0; + + if (i >= 0) { + Container c = + bm.highLowContainer.getContainerAtIndex(i).not(containerStart, containerLast + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.insertNewKeyValueAt(-j - 1, (short) hb, c); + } + + } else { // *think* the range of ones must never be + // empty. + answer.highLowContainer.insertNewKeyValueAt(-j - 1, (short) hb, + Container.rangeOfOnes(containerStart, containerLast + 1)); + } + } + // copy the containers after the active area. + answer.highLowContainer.appendCopiesAfter(bm.highLowContainer, (short) hbLast); + return answer; + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param rb bitmap being negated + * @param rangeStart inclusive beginning of range, in [0, 0xffffffff] + * @param rangeEnd exclusive ending of range, in [0, 0xffffffff + 1] + * @return a new Bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static RoaringBitmap flip(RoaringBitmap rb, final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return flip(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return flip(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + + /** + * Checks whether the two bitmaps intersect. This can be much faster than calling "and" and + * checking the cardinality of the result. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return true if they intersect + */ + public static boolean intersects(final RoaringBitmap x1, final RoaringBitmap x2) { + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + if (c1.intersects(c2)) { + return true; + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return false; + } + + + // important: inputs should not have been computed lazily + protected static RoaringBitmap lazyor(final RoaringBitmap x1, final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + answer.highLowContainer.append(s1, x1.highLowContainer.getContainerAtIndex(pos1) + .lazyOR(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + // important: inputs should not be reused + protected static RoaringBitmap lazyorfromlazyinputs(final RoaringBitmap x1, + final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + if ((c2 instanceof BitmapContainer) && (!(c1 instanceof BitmapContainer))) { + Container tmp = c1; + c1 = c2; + c2 = tmp; + } + answer.highLowContainer.append(s1, c1.lazyIOR(c2)); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + answer.highLowContainer.append(s1, c1); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + answer.highLowContainer.append(s2, c2); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.highLowContainer.append(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.highLowContainer.append(x1.highLowContainer, pos1, length1); + } + return answer; + } + + + /** + * Compute overall OR between bitmaps. + *

+ * (Effectively calls {@link FastAggregation#or}) + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap or(Iterator bitmaps) { + return FastAggregation.or(bitmaps); + } + + /** + * Compute overall OR between bitmaps. + *

+ * (Effectively calls {@link FastAggregation#or}) + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static RoaringBitmap or(RoaringBitmap... bitmaps) { + return FastAggregation.or(bitmaps); + } + + /** + * Bitwise OR (union) operation. The provided bitmaps are *not* modified. This operation is + * thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see FastAggregation#or(RoaringBitmap...) + * @see FastAggregation#horizontal_or(RoaringBitmap...) + */ + public static RoaringBitmap or(final RoaringBitmap x1, final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + answer.highLowContainer.append(s1, x1.highLowContainer.getContainerAtIndex(pos1) + .or(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + /** + * Cardinality of the bitwise OR (union) operation. The provided bitmaps are *not* modified. This + * operation is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return cardinality of the union + * @see FastAggregation#or(RoaringBitmap...) + * @see FastAggregation#horizontal_or(RoaringBitmap...) + */ + public static int orCardinality(final RoaringBitmap x1, final RoaringBitmap x2) { + // we use the fact that the cardinality of the bitmaps is known so that + // the union is just the total cardinality minus the intersection + return x1.getCardinality() + x2.getCardinality() - andCardinality(x1, x2); + } + + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) removed. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + */ + public static RoaringBitmap remove(RoaringBitmap rb, final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return rb.clone(); // empty range + } + + + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + RoaringBitmap answer = new RoaringBitmap(); + answer.highLowContainer.appendCopiesUntil(rb.highLowContainer, (short) hbStart); + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + if (i >= 0) { + final Container c = rb.highLowContainer.getContainerAtIndex(i).remove(lbStart, lbLast + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbStart, c); + } + } + answer.highLowContainer.appendCopiesAfter(rb.highLowContainer, (short) hbLast); + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + if ((ifirst >= 0) && (lbStart != 0)) { + final Container c = rb.highLowContainer.getContainerAtIndex(ifirst).remove(lbStart, + Util.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbStart, c); + } + } + if ((ilast >= 0) && (lbLast != Util.maxLowBitAsInteger())) { + final Container c = rb.highLowContainer.getContainerAtIndex(ilast).remove(0, lbLast + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbLast, c); + } + } + answer.highLowContainer.appendCopiesAfter(rb.highLowContainer, (short) hbLast); + return answer; + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) removed. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static RoaringBitmap remove(RoaringBitmap rb, final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return remove(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return remove(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + + /** + * Bitwise XOR (symmetric difference) operation. The provided bitmaps are *not* modified. This + * operation is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see FastAggregation#xor(RoaringBitmap...) + * @see FastAggregation#horizontal_xor(RoaringBitmap...) + */ + public static RoaringBitmap xor(final RoaringBitmap x1, final RoaringBitmap x2) { + final RoaringBitmap answer = new RoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + final Container c = x1.highLowContainer.getContainerAtIndex(pos1) + .xor(x2.highLowContainer.getContainerAtIndex(pos2)); + if (c.getCardinality() > 0) { + answer.highLowContainer.append(s1, c); + } + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.highLowContainer.appendCopy(x1.highLowContainer, pos1, length1); + } + + return answer; + } + + /** + * Computes AND between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static RoaringBitmap and(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return FastAggregation.and(bitmapsIterator); + } + + /** + * Computes AND between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + * @deprecated use the version where longs specify the range. Negative range end are illegal. + */ + @Deprecated + public static RoaringBitmap and(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return and(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Bitwise ANDNOT (difference) operation for the given range, rangeStart (inclusive) and rangeEnd + * (exclusive). The provided bitmaps are *not* modified. This operation is thread-safe as long as + * the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @param rangeStart starting point of the range (inclusive) + * @param rangeEnd end point of the range (exclusive) + * @return result of the operation + */ + public static RoaringBitmap andNot(final RoaringBitmap x1, final RoaringBitmap x2, + long rangeStart, long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + + RoaringBitmap rb1 = selectRangeWithoutCopy(x1, rangeStart, rangeEnd); + RoaringBitmap rb2 = selectRangeWithoutCopy(x2, rangeStart, rangeEnd); + return andNot(rb1, rb2); + } + + /** + * Bitwise ANDNOT (difference) operation for the given range, rangeStart (inclusive) and rangeEnd + * (exclusive). The provided bitmaps are *not* modified. This operation is thread-safe as long as + * the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @param rangeStart starting point of the range (inclusive) + * @param rangeEnd end point of the range (exclusive) + * @return result of the operation + * @deprecated use the version where longs specify the range. Negative values for range + * endpoints are not allowed. + */ + @Deprecated + public static RoaringBitmap andNot(final RoaringBitmap x1, final RoaringBitmap x2, + final int rangeStart, final int rangeEnd) { + return andNot(x1, x2, (long) rangeStart, (long) rangeEnd); + } + + /** + * Computes OR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static RoaringBitmap or(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return or(bitmapsIterator); + } + + /** + * Computes OR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + * @deprecated use the version where longs specify the range. + * Negative range points are forbidden. + */ + @Deprecated + public static RoaringBitmap or(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return or(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Assume that one wants to store "cardinality" integers in [0, universe_size), this function + * returns an upper bound on the serialized size in bytes. + * + * @param cardinality maximal cardinality + * @param universe_size maximal value + * @return upper bound on the serialized size in bytes of the bitmap + */ + public static long maximumSerializedSize(long cardinality, long universe_size) { + long contnbr = (universe_size + 65535) / 65536; + if (contnbr > cardinality) { + contnbr = cardinality; + // we can't have more containers than we have values + } + final long headermax = Math.max(8, 4 + (contnbr + 7) / 8) + 8 * contnbr; + final long valsarray = 2 * cardinality; + final long valsbitmap = contnbr * 8192; + final long valsbest = Math.min(valsarray, valsbitmap); + return valsbest + headermax; + } + + /** + * Return new iterator with only values from rangeStart (inclusive) to rangeEnd (exclusive) + * + * @param bitmaps bitmaps iterator + * @param rangeStart inclusive + * @param rangeEnd exclusive + * @return new iterator of bitmaps + */ + private static Iterator selectRangeWithoutCopy(final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + Iterator bitmapsIterator; + bitmapsIterator = new Iterator() { + @Override + public boolean hasNext() { + return bitmaps.hasNext(); + } + + @Override + public RoaringBitmap next() { + RoaringBitmap next = (RoaringBitmap) bitmaps.next(); + return selectRangeWithoutCopy(next, rangeStart, rangeEnd); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Remove not supported"); + } + }; + return bitmapsIterator; + } + + /* + * In testing, original int-range code failed an assertion with some negative ranges + * so presumably nobody relies on negative ranges. rangeEnd=0 also failed. + */ + + /** + * Extracts the values in the specified range, rangeStart (inclusive) and rangeEnd (exclusive) + * while avoiding copies as much as possible. + * + * @param rb input bitmap + * @param rangeStart inclusive + * @param rangeEnd exclusive + * @return new bitmap + */ + + // had formerly failed if rangeEnd==0 + private static RoaringBitmap selectRangeWithoutCopy(RoaringBitmap rb, final long rangeStart, + final long rangeEnd) { + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + RoaringBitmap answer = new RoaringBitmap(); + + assert (rangeStart >= 0 && rangeEnd >= 0); + + if (rangeEnd <= rangeStart) { + return answer; + } + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + if (i >= 0) { + final Container c = rb.highLowContainer.getContainerAtIndex(i).remove(0, lbStart) + .iremove(lbLast + 1, Util.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbStart, c); + } + } + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + if (ifirst >= 0) { + final Container c = rb.highLowContainer.getContainerAtIndex(ifirst).remove(0, lbStart); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbStart, c); + } + } + + // revised to loop on ints + for (int hb = hbStart + 1; hb <= hbLast - 1; ++hb) { + final int i = rb.highLowContainer.getIndex((short) hb); + final int j = answer.highLowContainer.getIndex((short) hb); + assert j < 0; + + if (i >= 0) { + final Container c = rb.highLowContainer.getContainerAtIndex(i); + answer.highLowContainer.insertNewKeyValueAt(-j - 1, (short) hb, c); + } + } + + if (ilast >= 0) { + final Container c = rb.highLowContainer.getContainerAtIndex(ilast).remove(lbLast + 1, + Util.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + answer.highLowContainer.append((short) hbLast, c); + } + } + return answer; + } + + /** + * Computes XOR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static RoaringBitmap xor(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return FastAggregation.xor(bitmapsIterator); + } + + /** + * Computes XOR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bi + * @deprecated use the version where longs specify the range. + * Negative values not allowed for rangeStart and rangeEnd + */ + @Deprecated + public static RoaringBitmap xor(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return xor(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Set all the specified values to true. This can be expected to be slightly + * faster than calling "add" repeatedly. The provided integers values don't + * have to be in sorted order, but it may be preferable to sort them from a performance point of + * view. + * + * @param dat set values + */ + public void add(final int... dat) { + Container currentcont = null; + short currenthb = 0; + int currentcontainerindex = 0; + int j = 0; + if (j < dat.length) { + int val = dat[j]; + currenthb = Util.highbits(val); + currentcontainerindex = highLowContainer.getIndex(currenthb); + if (currentcontainerindex >= 0) { + currentcont = highLowContainer.getContainerAtIndex(currentcontainerindex); + Container newcont = currentcont.add(Util.lowbits(val)); + if (newcont != currentcont) { + highLowContainer.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currentcontainerindex = -currentcontainerindex - 1; + final ArrayContainer newac = new ArrayContainer(); + currentcont = newac.add(Util.lowbits(val)); + highLowContainer.insertNewKeyValueAt(currentcontainerindex, currenthb, currentcont); + } + j++; + } + for (; j < dat.length; ++j) { + int val = dat[j]; + short newhb = Util.highbits(val); + if (currenthb == newhb) {// easy case + // this could be quite frequent + Container newcont = currentcont.add(Util.lowbits(val)); + if (newcont != currentcont) { + highLowContainer.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currenthb = newhb; + currentcontainerindex = highLowContainer.getIndex(currenthb); + if (currentcontainerindex >= 0) { + currentcont = highLowContainer.getContainerAtIndex(currentcontainerindex); + Container newcont = currentcont.add(Util.lowbits(val)); + if (newcont != currentcont) { + highLowContainer.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currentcontainerindex = -currentcontainerindex - 1; + final ArrayContainer newac = new ArrayContainer(); + currentcont = newac.add(Util.lowbits(val)); + highLowContainer.insertNewKeyValueAt(currentcontainerindex, currenthb, currentcont); + } + } + } + } + + /** + * Add the value to the container (set the value to "true"), whether it already appears or not. + * + * @param x integer value + */ + public void add(final int x) { + final short hb = Util.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + highLowContainer.setContainerAtIndex(i, + highLowContainer.getContainerAtIndex(i).add(Util.lowbits(x))); + } else { + final ArrayContainer newac = new ArrayContainer(); + highLowContainer.insertNewKeyValueAt(-i - 1, hb, newac.add(Util.lowbits(x))); + } + } + + /** + * Add to the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void add(final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return; // empty range + } + + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + for (int hb = hbStart; hb <= hbLast; ++hb) { + + // first container may contain partial range + final int containerStart = (hb == hbStart) ? lbStart : 0; + // last container may contain partial range + final int containerLast = (hb == hbLast) ? lbLast : Util.maxLowBitAsInteger(); + final int i = highLowContainer.getIndex((short) hb); + + if (i >= 0) { + final Container c = + highLowContainer.getContainerAtIndex(i).iadd(containerStart, containerLast + 1); + highLowContainer.setContainerAtIndex(i, c); + } else { + highLowContainer.insertNewKeyValueAt(-i - 1, (short) hb, + Container.rangeOfOnes(containerStart, containerLast + 1)); + } + } + } + + /** + * Add to the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void add(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + add((long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + add(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * In-place bitwise AND (intersection) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void and(final RoaringBitmap x2) { + int pos1 = 0, pos2 = 0, intersectionSize = 0; + final int length1 = highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final Container c = c1.iand(c2); + if (c.getCardinality() > 0) { + highLowContainer.replaceKeyAndContainerAtIndex(intersectionSize++, s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + highLowContainer.resize(intersectionSize); + } + + /** + * In-place bitwise ANDNOT (difference) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void andNot(final RoaringBitmap x2) { + int pos1 = 0, pos2 = 0, intersectionSize = 0; + final int length1 = highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final Container c = c1.iandNot(c2); + if (c.getCardinality() > 0) { + highLowContainer.replaceKeyAndContainerAtIndex(intersectionSize++, s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + if (pos1 != intersectionSize) { + final Container c1 = highLowContainer.getContainerAtIndex(pos1); + highLowContainer.replaceKeyAndContainerAtIndex(intersectionSize, s1, c1); + } + ++intersectionSize; + ++pos1; + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos1 < length1) { + highLowContainer.copyRange(pos1, length1, intersectionSize); + intersectionSize += length1 - pos1; + } + highLowContainer.resize(intersectionSize); + } + + /** + * Add the value to the container (set the value to "true"), whether it already appears or not. + * + * @param x integer value + * @return true if the added int wasn't already contained in the bitmap. False otherwise. + */ + public boolean checkedAdd(final int x) { + final short hb = Util.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + Container c = highLowContainer.getContainerAtIndex(i); + int oldCard = c.getCardinality(); + // we need to keep the newContainer if a switch between containers type + // occur, in order to get the new cardinality + Container newCont = c.add(Util.lowbits(x)); + highLowContainer.setContainerAtIndex(i, newCont); + if (newCont.getCardinality() > oldCard) { + return true; + } + } else { + final ArrayContainer newac = new ArrayContainer(); + highLowContainer.insertNewKeyValueAt(-i - 1, hb, newac.add(Util.lowbits(x))); + return true; + } + return false; + } + + /** + * If present remove the specified integer (effectively, sets its bit value to false) + * + * @param x integer value representing the index in a bitmap + * @return true if the unset bit was already in the bitmap + */ + public boolean checkedRemove(final int x) { + final short hb = Util.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i < 0) { + return false; + } + Container C = highLowContainer.getContainerAtIndex(i); + int oldcard = C.getCardinality(); + C.remove(Util.lowbits(x)); + int newcard = C.getCardinality(); + if (newcard == oldcard) { + return false; + } + if (newcard > 0) { + highLowContainer.setContainerAtIndex(i, C); + } else { + highLowContainer.removeAtIndex(i); + } + return true; + } + + /** + * reset to an empty bitmap; result occupies as much space a newly created bitmap. + */ + public void clear() { + highLowContainer = new RoaringArray(); // lose references + } + + @Override + public RoaringBitmap clone() { + try { + final RoaringBitmap x = (RoaringBitmap) super.clone(); + x.highLowContainer = highLowContainer.clone(); + return x; + } catch (final CloneNotSupportedException e) { + throw new RuntimeException("shouldn't happen with clone", e); + } + } + + /** + * Checks whether the value in included, which is equivalent to checking if the corresponding bit + * is set (get in BitSet class). + * + * @param x integer value + * @return whether the integer value is included. + */ + @Override + public boolean contains(final int x) { + final short hb = Util.highbits(x); + final Container c = highLowContainer.getContainer(hb); + return c != null && c.contains(Util.lowbits(x)); + } + + /** + * Deserialize (retrieve) this bitmap. + *

+ * The current bitmap is overwritten. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.highLowContainer.deserialize(in); + } + + @Override + public boolean equals(Object o) { + if (o instanceof RoaringBitmap) { + final RoaringBitmap srb = (RoaringBitmap) o; + return srb.highLowContainer.equals(this.highLowContainer); + } + return false; + } + + /** + * Add the value if it is not already present, otherwise remove it. + * + * @param x integer value + */ + public void flip(final int x) { + final short hb = Util.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + Container c = highLowContainer.getContainerAtIndex(i).flip(Util.lowbits(x)); + if (c.getCardinality() > 0) { + highLowContainer.setContainerAtIndex(i, c); + } else { + highLowContainer.removeAtIndex(i); + } + } else { + final ArrayContainer newac = new ArrayContainer(); + highLowContainer.insertNewKeyValueAt(-i - 1, hb, newac.add(Util.lowbits(x))); + } + } + + /** + * Modifies the current bitmap by complementing the bits in the given range, from rangeStart + * (inclusive) rangeEnd (exclusive). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void flip(final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return; // empty range + } + + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + + // TODO:this can be accelerated considerably + for (int hb = hbStart; hb <= hbLast; ++hb) { + // first container may contain partial range + final int containerStart = (hb == hbStart) ? lbStart : 0; + // last container may contain partial range + final int containerLast = (hb == hbLast) ? lbLast : Util.maxLowBitAsInteger(); + final int i = highLowContainer.getIndex((short) hb); + + if (i >= 0) { + final Container c = + highLowContainer.getContainerAtIndex(i).inot(containerStart, containerLast + 1); + if (c.getCardinality() > 0) { + highLowContainer.setContainerAtIndex(i, c); + } else { + highLowContainer.removeAtIndex(i); + } + } else { + highLowContainer.insertNewKeyValueAt(-i - 1, (short) hb, + Container.rangeOfOnes(containerStart, containerLast + 1)); + } + } + } + + /** + * Modifies the current bitmap by complementing the bits in the given range, from rangeStart + * (inclusive) rangeEnd (exclusive). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void flip(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + flip((long) rangeStart, (long) rangeEnd); + } else { + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + flip(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + } + + /** + * Returns the number of distinct integers added to the bitmap (e.g., number of bits set). + * + * @return the cardinality + */ + @Override + public long getLongCardinality() { + long size = 0; + for (int i = 0; i < this.highLowContainer.size(); i++) { + size += this.highLowContainer.getContainerAtIndex(i).getCardinality(); + } + return size; + } + + @Override + public int getCardinality() { + return (int) getLongCardinality(); + } + + @Override + public void forEach(IntConsumer ic) { + for (int i = 0; i < this.highLowContainer.size(); i++) { + this.highLowContainer.getContainerAtIndex(i).forEach(this.highLowContainer.keys[i], ic); + } + } + + /** + * Return a low-level container pointer that can be used to access the underlying data structure. + * + * @return container pointer + */ + public ContainerPointer getContainerPointer() { + return this.highLowContainer.getContainerPointer(); + } + + /** + * For better performance, consider the Use the {@link #forEach forEach} method. + * + * @return a custom iterator over set bits, the bits are traversed in ascending sorted order + */ + @Override + public PeekableIntIterator getIntIterator() { + return new RoaringIntIterator(); + } + + /** + * @return a custom iterator over set bits, the bits are traversed in descending sorted order + */ + @Override + public IntIterator getReverseIntIterator() { + return new RoaringReverseIntIterator(); + } + + /** + * Estimate of the memory usage of this data structure. This can be expected to be within 1% of + * the true memory usage. + * + * @return estimated memory usage. + */ + @Override + public long getLongSizeInBytes() { + long size = 8; + for (int i = 0; i < this.highLowContainer.size(); i++) { + final Container c = this.highLowContainer.getContainerAtIndex(i); + size += 2 + c.getSizeInBytes(); + } + return size; + } + + @Override + public int getSizeInBytes() { + return (int) getLongSizeInBytes(); + } + + @Override + public int hashCode() { + return highLowContainer.hashCode(); + } + + /** + * Check whether this bitmap has had its runs compressed. + * + * @return whether this bitmap has run compression + */ + public boolean hasRunCompression() { + for (int i = 0; i < this.highLowContainer.size(); i++) { + Container c = this.highLowContainer.getContainerAtIndex(i); + if (c instanceof RunContainer) { + return true; + } + } + return false; + } + + /** + * Checks whether the bitmap is empty. + * + * @return true if this bitmap contains no set bit + */ + @Override + public boolean isEmpty() { + return highLowContainer.size() == 0; + } + + /** + * iterate over the positions of the true values. + * + * @return the iterator + */ + @Override + public Iterator iterator() { + return new Iterator() { + private int hs = 0; + + private ShortIterator iter; + + private int pos = 0; + + private int x; + + @Override + public boolean hasNext() { + return pos < RoaringBitmap.this.highLowContainer.size(); + } + + private Iterator init() { + if (pos < RoaringBitmap.this.highLowContainer.size()) { + iter = RoaringBitmap.this.highLowContainer.getContainerAtIndex(pos).getShortIterator(); + hs = RoaringBitmap.this.highLowContainer.getKeyAtIndex(pos) << 16; + } + return this; + } + + @Override + public Integer next() { + x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + ++pos; + init(); + } + return x; + } + + @Override + public void remove() { + if ((x & hs) == hs) {// still in same container + iter.remove(); + } else { + RoaringBitmap.this.remove(x); + } + } + + }.init(); + } + + // don't forget to call repairAfterLazy() afterward + // important: x2 should not have been computed lazily + protected void lazyor(final RoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + this.highLowContainer.setContainerAtIndex(pos1, highLowContainer.getContainerAtIndex(pos1) + .lazyIOR(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + highLowContainer.insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } + } + + // don't forget to call repairAfterLazy() afterward + // important: x2 should not have been computed lazily + // this method is like lazyor except that it will convert + // the current container to a bitset + public void naivelazyor(RoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + Container c1 = highLowContainer.getContainerAtIndex(pos1); + c1 = c1.toBitmapContainer(); + this.highLowContainer.setContainerAtIndex(pos1, + c1.lazyIOR(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + highLowContainer.insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } + } + + /** + * Create a new Roaring bitmap containing at most maxcardinality integers. + * + * @param maxcardinality maximal cardinality + * @return a new bitmap with cardinality no more than maxcardinality + */ + @Override + public RoaringBitmap limit(int maxcardinality) { + RoaringBitmap answer = new RoaringBitmap(); + int currentcardinality = 0; + for (int i = 0; (currentcardinality < maxcardinality) + && (i < this.highLowContainer.size()); i++) { + Container c = this.highLowContainer.getContainerAtIndex(i); + if (c.getCardinality() + currentcardinality <= maxcardinality) { + answer.highLowContainer.appendCopy(this.highLowContainer, i); + currentcardinality += c.getCardinality(); + } else { + int leftover = maxcardinality - currentcardinality; + Container limited = c.limit(leftover); + answer.highLowContainer.append(this.highLowContainer.getKeyAtIndex(i), limited); + break; + } + } + return answer; + } + + /** + * In-place bitwise OR (union) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void or(final RoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + this.highLowContainer.setContainerAtIndex(pos1, highLowContainer.getContainerAtIndex(pos1) + .ior(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + highLowContainer.insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } + } + + /** + * Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be + * GetCardinality()). + * + * @param x upper limit + * @return the rank + */ + @Override + public long rankLong(int x) { + long size = 0; + short xhigh = Util.highbits(x); + + for (int i = 0; i < this.highLowContainer.size(); i++) { + short key = this.highLowContainer.getKeyAtIndex(i); + if (Util.compareUnsigned(key, xhigh) < 0) { + size += this.highLowContainer.getContainerAtIndex(i).getCardinality(); + } else { + return size + this.highLowContainer.getContainerAtIndex(i).rank(Util.lowbits(x)); + } + } + return size; + } + + @Override + public int rank(int x) { + return (int) rankLong(x); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + this.highLowContainer.readExternal(in); + } + + /** + * If present remove the specified integer (effectively, sets its bit value to false) + * + * @param x integer value representing the index in a bitmap + */ + public void remove(final int x) { + final short hb = Util.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i < 0) { + return; + } + highLowContainer.setContainerAtIndex(i, + highLowContainer.getContainerAtIndex(i).remove(Util.lowbits(x))); + if (highLowContainer.getContainerAtIndex(i).getCardinality() == 0) { + highLowContainer.removeAtIndex(i); + } + } + + /** + * Remove from the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void remove(final long rangeStart, final long rangeEnd) { + + rangeSanityCheck(rangeStart, rangeEnd); + + if (rangeStart >= rangeEnd) { + return; // empty range + } + + + final int hbStart = Util.toIntUnsigned(Util.highbits(rangeStart)); + final int lbStart = Util.toIntUnsigned(Util.lowbits(rangeStart)); + final int hbLast = Util.toIntUnsigned(Util.highbits(rangeEnd - 1)); + final int lbLast = Util.toIntUnsigned(Util.lowbits(rangeEnd - 1)); + if (hbStart == hbLast) { + final int i = highLowContainer.getIndex((short) hbStart); + if (i < 0) { + return; + } + final Container c = highLowContainer.getContainerAtIndex(i).iremove(lbStart, lbLast + 1); + if (c.getCardinality() > 0) { + highLowContainer.setContainerAtIndex(i, c); + } else { + highLowContainer.removeAtIndex(i); + } + return; + } + int ifirst = highLowContainer.getIndex((short) hbStart); + int ilast = highLowContainer.getIndex((short) hbLast); + if (ifirst >= 0) { + if (lbStart != 0) { + final Container c = highLowContainer.getContainerAtIndex(ifirst).iremove(lbStart, + Util.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + highLowContainer.setContainerAtIndex(ifirst, c); + ifirst++; + } + } + } else { + ifirst = -ifirst - 1; + } + if (ilast >= 0) { + if (lbLast != Util.maxLowBitAsInteger()) { + final Container c = highLowContainer.getContainerAtIndex(ilast).iremove(0, lbLast + 1); + if (c.getCardinality() > 0) { + highLowContainer.setContainerAtIndex(ilast, c); + } else { + ilast++; + } + } else { + ilast++; + } + } else { + ilast = -ilast - 1; + } + highLowContainer.removeIndexRange(ifirst, ilast); + } + + /** + * Remove from the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void remove(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + remove((long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + remove(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Remove run-length encoding even when it is more space efficient + * + * @return whether a change was applied + */ + public boolean removeRunCompression() { + boolean answer = false; + for (int i = 0; i < this.highLowContainer.size(); i++) { + Container c = this.highLowContainer.getContainerAtIndex(i); + if (c instanceof RunContainer) { + Container newc = ((RunContainer) c).toBitmapOrArrayContainer(c.getCardinality()); + this.highLowContainer.setContainerAtIndex(i, newc); + answer = true; + } + } + return answer; + } + + // to be used with lazyor + public void repairAfterLazy() { + for (int k = 0; k < highLowContainer.size(); ++k) { + Container c = highLowContainer.getContainerAtIndex(k); + highLowContainer.setContainerAtIndex(k, c.repairAfterLazy()); + } + } + + /** + * Use a run-length encoding where it is more space efficient + * + * @return whether a change was applied + */ + public boolean runOptimize() { + boolean answer = false; + for (int i = 0; i < this.highLowContainer.size(); i++) { + Container c = this.highLowContainer.getContainerAtIndex(i).runOptimize(); + if (c instanceof RunContainer) { + answer = true; + } + this.highLowContainer.setContainerAtIndex(i, c); + } + return answer; + } + + /** + * Return the jth value stored in this bitmap. + * + * @param j index of the value + * @return the value + */ + @Override + public int select(int j) { + int leftover = j; + for (int i = 0; i < this.highLowContainer.size(); i++) { + Container c = this.highLowContainer.getContainerAtIndex(i); + int thiscard = c.getCardinality(); + if (thiscard > leftover) { + int keycontrib = this.highLowContainer.getKeyAtIndex(i) << 16; + int lowcontrib = Util.toIntUnsigned(c.select(leftover)); + return lowcontrib + keycontrib; + } + leftover -= thiscard; + } + throw new IllegalArgumentException( + "select " + j + " when the cardinality is " + this.getCardinality()); + } + + /** + * Serialize this bitmap. + *

+ * Consider calling {@link #runOptimize} before serialization to improve compression. + *

+ * The current bitmap is not modified. + *

+ * Advanced example: To serialize your bitmap to a ByteBuffer, you can do the following. + *

+ *

+     * {@code
+     *   //r is your bitmap
+     *
+     *   r.runOptimize(); // might improve compression
+     *   // next we create the ByteBuffer where the data will be stored
+     *   ByteBuffer outbb = ByteBuffer.allocate(r.serializedSizeInBytes());
+     *   // then we can serialize on a custom OutputStream
+     *   mrb.serialize(new DataOutputStream(new OutputStream(){
+     *       ByteBuffer mBB;
+     *       OutputStream init(ByteBuffer mbb) {mBB=mbb; return this;}
+     *       public void close() {}
+     *       public void flush() {}
+     *       public void write(int b) {
+     *         mBB.put((byte) b);}
+     *       public void write(byte[] b) {mBB.put(b);}
+     *       public void write(byte[] b, int off, int l) {mBB.put(b,off,l);}
+     *   }.init(outbb)));
+     *   // outbuff will now contain a serialized version of your bitmap
+     * }
+     * 
+ *

+ * Note: Java's data structures are in big endian format. Roaring serializes to a little endian + * format, so the bytes are flipped by the library during serialization to ensure that what is + * stored is in little endian---despite Java's big endianness. You can defeat this process by + * reflipping the bytes again in a custom DataOutput which could lead to serialized Roaring + * objects with an incorrect byte order. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + @Override + public void serialize(DataOutput out) throws IOException { + this.highLowContainer.serialize(out); + } + + /** + * Report the number of bytes required to serialize this bitmap. This is the number of bytes + * written out when using the serialize method. When using the writeExternal method, the count + * will be higher due to the overhead of Java serialization. + * + * @return the size in bytes + */ + @Override + public int serializedSizeInBytes() { + return this.highLowContainer.serializedSizeInBytes(); + } + + /** + * Return the set values as an array, if the cardinality is smaller than 2147483648. + * The integer values are in sorted order. + * + * @return array representing the set values. + */ + @Override + public int[] toArray() { + final int[] array = new int[(int) this.getCardinality()]; + int pos = 0, pos2 = 0; + while (pos < this.highLowContainer.size()) { + final int hs = this.highLowContainer.getKeyAtIndex(pos) << 16; + Container c = this.highLowContainer.getContainerAtIndex(pos++); + c.fillLeastSignificant16bits(array, pos2, hs); + pos2 += c.getCardinality(); + } + return array; + } + + + /** + * Convert (copies) to a mutable roaring bitmap. + * + * @return a copy of this bitmap as a MutableRoaringBitmap + */ + public MutableRoaringBitmap toMutableRoaringBitmap() { + return new MutableRoaringBitmap(this); + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + final StringBuilder answer = new StringBuilder(); + final IntIterator i = this.getIntIterator(); + answer.append("{"); + if (i.hasNext()) { + answer.append(i.next() & 0xFFFFFFFFL); + } + while (i.hasNext()) { + answer.append(","); + // to avoid using too much memory, we limit the size + if (answer.length() > 0x80000) { + answer.append("..."); + break; + } + answer.append(i.next() & 0xFFFFFFFFL); + + } + answer.append("}"); + return answer.toString(); + } + + /** + * Recover allocated but unused memory. + */ + public void trim() { + for (int i = 0; i < this.highLowContainer.size(); i++) { + this.highLowContainer.getContainerAtIndex(i).trim(); + } + } + + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + this.highLowContainer.writeExternal(out); + } + + /** + * In-place bitwise XOR (symmetric difference) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void xor(final RoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + final Container c = highLowContainer.getContainerAtIndex(pos1) + .ixor(x2.highLowContainer.getContainerAtIndex(pos2)); + if (c.getCardinality() > 0) { + this.highLowContainer.setContainerAtIndex(pos1, c); + pos1++; + } else { + highLowContainer.removeAtIndex(pos1); + --length1; + } + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + highLowContainer.insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + highLowContainer.appendCopy(x2.highLowContainer, pos2, length2); + } + } + + private final class RoaringIntIterator implements PeekableIntIterator { + private int hs = 0; + + private PeekableShortIterator iter; + + private int pos = 0; + + private RoaringIntIterator() { + nextContainer(); + } + + @Override + public PeekableIntIterator clone() { + try { + RoaringIntIterator x = (RoaringIntIterator) super.clone(); + x.iter = this.iter.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < RoaringBitmap.this.highLowContainer.size(); + } + + @Override + public int next() { + final int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + if (pos < RoaringBitmap.this.highLowContainer.size()) { + iter = RoaringBitmap.this.highLowContainer.getContainerAtIndex(pos).getShortIterator(); + hs = RoaringBitmap.this.highLowContainer.getKeyAtIndex(pos) << 16; + } + } + + @Override + public void advanceIfNeeded(int minval) { + while (hasNext() && ((hs >>> 16) < (minval >>> 16))) { + ++pos; + nextContainer(); + } + if (hasNext() && ((hs >>> 16) == (minval >>> 16))) { + iter.advanceIfNeeded(Util.lowbits(minval)); + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + } + } + + @Override + public int peekNext() { + return Util.toIntUnsigned(iter.peekNext()) | hs; + } + + + } + + private final class RoaringReverseIntIterator implements IntIterator { + + int hs = 0; + + ShortIterator iter; + + // don't need an int because we go to 0, not Short.MAX_VALUE, and signed shorts underflow well + // below zero + short pos = (short) (RoaringBitmap.this.highLowContainer.size() - 1); + + private RoaringReverseIntIterator() { + nextContainer(); + } + + @Override + public IntIterator clone() { + try { + RoaringReverseIntIterator clone = (RoaringReverseIntIterator) super.clone(); + clone.iter = this.iter.clone(); + return clone; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public int next() { + final int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + --pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + if (pos >= 0) { + iter = + RoaringBitmap.this.highLowContainer.getContainerAtIndex(pos).getReverseShortIterator(); + hs = RoaringBitmap.this.highLowContainer.getKeyAtIndex(pos) << 16; + } + } + + } + + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RunContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RunContainer.java new file mode 100644 index 000000000..c078b52a8 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/RunContainer.java @@ -0,0 +1,2518 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ +package com.fr.third.bitmap.roaringbitmap; + +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableContainer; +import com.fr.third.bitmap.roaringbitmap.buffer.MappeableRunContainer; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.Iterator; + + +/** + * This container takes the form of runs of consecutive values (effectively, run-length encoding). + *

+ * Adding and removing content from this container might make it wasteful so regular calls to + * "runOptimize" might be warranted. + */ +public final class RunContainer extends Container implements Cloneable { + private static final int DEFAULT_INIT_SIZE = 4; + private static final boolean ENABLE_GALLOPING_AND = false; + + private static final long serialVersionUID = 1L; + int nbrruns = 0;// how many runs, this number should fit in 16 bits. + private short[] valueslength;// we interleave values and lengths, so + + /** + * Create a container with default capacity + */ + public RunContainer() { + this(DEFAULT_INIT_SIZE); + } + + protected RunContainer(ArrayContainer arr, int nbrRuns) { + this.nbrruns = nbrRuns; + valueslength = new short[2 * nbrRuns]; + if (nbrRuns == 0) { + return; + } + + int prevVal = -2; + int runLen = 0; + int runCount = 0; + + for (int i = 0; i < arr.cardinality; i++) { + int curVal = Util.toIntUnsigned(arr.content[i]); + if (curVal == prevVal + 1) { + ++runLen; + } else { + if (runCount > 0) { + setLength(runCount - 1, (short) runLen); + } + setValue(runCount, (short) curVal); + runLen = 0; + ++runCount; + } + prevVal = curVal; + } + setLength(runCount - 1, (short) runLen); + } + + // convert a bitmap container to a run container somewhat efficiently. + protected RunContainer(BitmapContainer bc, int nbrRuns) { + this.nbrruns = nbrRuns; + valueslength = new short[2 * nbrRuns]; + if (nbrRuns == 0) { + return; + } + + int longCtr = 0; // index of current long in bitmap + long curWord = bc.bitmap[0]; // its value + int runCount = 0; + while (true) { + // potentially multiword advance to first 1 bit + while (curWord == 0L && longCtr < bc.bitmap.length - 1) { + curWord = bc.bitmap[++longCtr]; + } + + if (curWord == 0L) { + // wrap up, no more runs + return; + } + int localRunStart = Long.numberOfTrailingZeros(curWord); + int runStart = localRunStart + 64 * longCtr; + // stuff 1s into number's LSBs + long curWordWith1s = curWord | (curWord - 1); + + // find the next 0, potentially in a later word + int runEnd = 0; + while (curWordWith1s == -1L && longCtr < bc.bitmap.length - 1) { + curWordWith1s = bc.bitmap[++longCtr]; + } + + if (curWordWith1s == -1L) { + // a final unterminated run of 1s (32 of them) + runEnd = 64 + longCtr * 64; + setValue(runCount, (short) runStart); + setLength(runCount, (short) (runEnd - runStart - 1)); + return; + } + int localRunEnd = Long.numberOfTrailingZeros(~curWordWith1s); + runEnd = localRunEnd + longCtr * 64; + setValue(runCount, (short) runStart); + setLength(runCount, (short) (runEnd - runStart - 1)); + runCount++; + // now, zero out everything right of runEnd. + curWord = curWordWith1s & (curWordWith1s + 1); + // We've lathered and rinsed, so repeat... + } + } + // that if you have the values 11,12,13,14,15, you store that as 11,4 where 4 means that beyond 11 + // itself, there are + // 4 contiguous values that follows. + // Other example: e.g., 1, 10, 20,0, 31,2 would be a concise representation of 1, 2, ..., 11, 20, + // 31, 32, 33 + + /** + * Create an array container with specified capacity + * + * @param capacity The capacity of the container + */ + public RunContainer(final int capacity) { + valueslength = new short[2 * capacity]; + } + + + private RunContainer(int nbrruns, short[] valueslength) { + this.nbrruns = nbrruns; + this.valueslength = Arrays.copyOf(valueslength, valueslength.length); + } + + + /** + * Creates a new non-mappeable container from a mappeable one. This copies the data. + * + * @param bc the original container + */ + public RunContainer(MappeableRunContainer bc) { + this.nbrruns = bc.numberOfRuns(); + this.valueslength = bc.toShortArray(); + } + + /** + * Construct a new RunContainer backed by the provided array. Note that if you modify the + * RunContainer a new array may be produced. + * + * @param array array where the data is stored + * @param numRuns number of runs (each using 2 shorts in the buffer) + */ + public RunContainer(final short[] array, final int numRuns) { + if (array.length < 2 * numRuns) { + throw new RuntimeException("Mismatch between buffer and numRuns"); + } + this.nbrruns = numRuns; + this.valueslength = array; + } + + private static int branchyUnsignedInterleavedBinarySearch(final short[] array, final int begin, + final int end, final short k) { + int ikey = Util.toIntUnsigned(k); + int low = begin; + int high = end - 1; + while (low <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = Util.toIntUnsigned(array[2 * middleIndex]); + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); + } + + // starts with binary search and finishes with a sequential search + private static int hybridUnsignedInterleavedBinarySearch(final short[] array, final int begin, + final int end, final short k) { + int ikey = Util.toIntUnsigned(k); + int low = begin; + int high = end - 1; + // 16 in the next line matches the size of a cache line + while (low + 16 <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = Util.toIntUnsigned(array[2 * middleIndex]); + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + // we finish the job with a sequential search + int x = low; + for (; x <= high; ++x) { + final int val = Util.toIntUnsigned(array[2 * x]); + if (val >= ikey) { + if (val == ikey) { + return x; + } + break; + } + } + return -(x + 1); + } + + protected static int serializedSizeInBytes(int numberOfRuns) { + return 2 + 2 * 2 * numberOfRuns; // each run requires 2 2-byte entries. + } + + private static int unsignedInterleavedBinarySearch(final short[] array, final int begin, + final int end, final short k) { + if (Util.USE_HYBRID_BINSEARCH) { + return hybridUnsignedInterleavedBinarySearch(array, begin, end, k); + } else { + return branchyUnsignedInterleavedBinarySearch(array, begin, end, k); + } + + } + + @Override + public Container add(int begin, int end) { + RunContainer rc = (RunContainer) clone(); + return rc.iadd(begin, end); + } + + @Override + public Container add(short k) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).add(k) + // but note that some unit tests use this method to build up test runcontainers without calling + // runOptimize + int index = unsignedInterleavedBinarySearch(valueslength, 0, nbrruns, k); + if (index >= 0) { + return this;// already there + } + index = -index - 2;// points to preceding value, possibly -1 + if (index >= 0) {// possible match + int offset = Util.toIntUnsigned(k) - Util.toIntUnsigned(getValue(index)); + int le = Util.toIntUnsigned(getLength(index)); + if (offset <= le) { + return this; + } + if (offset == le + 1) { + // we may need to fuse + if (index + 1 < nbrruns) { + if (Util.toIntUnsigned(getValue(index + 1)) == Util.toIntUnsigned(k) + 1) { + // indeed fusion is needed + setLength(index, + (short) (getValue(index + 1) + getLength(index + 1) - getValue(index))); + recoverRoomAtIndex(index + 1); + return this; + } + } + incrementLength(index); + return this; + } + if (index + 1 < nbrruns) { + // we may need to fuse + if (Util.toIntUnsigned(getValue(index + 1)) == Util.toIntUnsigned(k) + 1) { + // indeed fusion is needed + setValue(index + 1, k); + setLength(index + 1, (short) (getLength(index + 1) + 1)); + return this; + } + } + } + if (index == -1) { + // we may need to extend the first run + if (0 < nbrruns) { + if (getValue(0) == k + 1) { + incrementLength(0); + decrementValue(0); + return this; + } + } + } + makeRoomAtIndex(index + 1); + setValue(index + 1, k); + setLength(index + 1, (short) 0); + return this; + } + + @Override + public Container and(ArrayContainer x) { + ArrayContainer ac = new ArrayContainer(x.cardinality); + if (this.nbrruns == 0) { + return ac; + } + int rlepos = 0; + int arraypos = 0; + + int rleval = Util.toIntUnsigned(this.getValue(rlepos)); + int rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = Util.toIntUnsigned(x.content[arraypos]); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return ac;// we are done + } + rleval = Util.toIntUnsigned(this.getValue(rlepos)); + rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = Util.advanceUntil(x.content, arraypos, x.cardinality, (short) rleval); + } else { + ac.content[ac.cardinality] = (short) arrayval; + ac.cardinality++; + arraypos++; + } + } + return ac; + } + + + @Override + public Container and(BitmapContainer x) { + // could be implemented as return toBitmapOrArrayContainer().iand(x); + int card = this.getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + // result can only be an array (assuming that we never make a RunContainer) + if (card > x.cardinality) { + card = x.cardinality; + } + ArrayContainer answer = new ArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) {// it looks like contains() should be cheap enough if + // accessed sequentially + answer.content[answer.cardinality++] = (short) runValue; + } + } + } + return answer; + } + // we expect the answer to be a bitmap (if we are lucky) + BitmapContainer answer = x.clone(); + int start = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int end = Util.toIntUnsigned(this.getValue(rlepos)); + Util.resetBitmapRange(answer.bitmap, start, end); // had been x.bitmap + start = end + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + Util.resetBitmapRange(answer.bitmap, start, Util.maxLowBitAsInteger() + 1); // had been x.bitmap + answer.computeCardinality(); + if (answer.getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public Container and(RunContainer x) { + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.nbrruns)], 0); + int rlepos = 0; + int xrlepos = 0; + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + if (ENABLE_GALLOPING_AND) { + rlepos = skipAhead(this, rlepos, xstart); // skip over runs until we have end > xstart (or + // rlepos is advanced beyond end) + } else { + ++rlepos; + } + + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + if (ENABLE_GALLOPING_AND) { + xrlepos = skipAhead(x, xrlepos, start); + } else { + ++xrlepos; + } + + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + final int lateststart = start > xstart ? start : xstart; + int earliestend; + if (end == xend) {// improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + + } else {// end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } + answer.valueslength[2 * answer.nbrruns] = (short) lateststart; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (earliestend - lateststart - 1); + answer.nbrruns++; + } + } + return answer.toEfficientContainer(); // subsequent trim() may be required to avoid wasted + // space. + } + + @Override + public int andCardinality(ArrayContainer x) { + if (this.nbrruns == 0) { + return x.cardinality; + } + int rlepos = 0; + int arraypos = 0; + int andCardinality = 0; + int rleval = Util.toIntUnsigned(this.getValue(rlepos)); + int rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = Util.toIntUnsigned(x.content[arraypos]); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return andCardinality;// we are done + } + rleval = Util.toIntUnsigned(this.getValue(rlepos)); + rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = Util.advanceUntil(x.content, arraypos, x.cardinality, this.getValue(rlepos)); + } else { + andCardinality++; + arraypos++; + } + } + return andCardinality; + } + + + @Override + public int andCardinality(BitmapContainer x) { + // could be implemented as return toBitmapOrArrayContainer().iand(x); + int cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) {// it looks like contains() should be cheap enough if + // accessed sequentially + cardinality++; + } + } + } + return cardinality; + } + + @Override + public int andCardinality(RunContainer x) { + int cardinality = 0; + int rlepos = 0; + int xrlepos = 0; + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + if (ENABLE_GALLOPING_AND) { + rlepos = skipAhead(this, rlepos, xstart); // skip over runs until we have end > xstart (or + // rlepos is advanced beyond end) + } else { + ++rlepos; + } + + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + if (ENABLE_GALLOPING_AND) { + xrlepos = skipAhead(x, xrlepos, start); + } else { + ++xrlepos; + } + + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + final int lateststart = start > xstart ? start : xstart; + int earliestend; + if (end == xend) {// improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + + } else {// end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } + // earliestend - lateststart are all values that are true. + cardinality += (short) (earliestend - lateststart); + } + } + return cardinality; + } + + @Override + public Container andNot(ArrayContainer x) { + // when x is small, we guess that the result will still be a run container + final int arbitrary_threshold = 32; // this is arbitrary + if (x.getCardinality() < arbitrary_threshold) { + return lazyandNot(x).toEfficientContainer(); + } + // otherwise we generate either an array or bitmap container + final int card = getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + // if the cardinality is small, we construct the solution in place + ArrayContainer ac = new ArrayContainer(card); + ac.cardinality = + Util.unsignedDifference(this.getShortIterator(), x.getShortIterator(), ac.content); + return ac; + } + // otherwise, we generate a bitmap + return toBitmapOrArrayContainer(card).iandNot(x); + } + + @Override + public Container andNot(BitmapContainer x) { + // could be implemented as toTemporaryBitmap().iandNot(x); + int card = this.getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + // result can only be an array (assuming that we never make a RunContainer) + ArrayContainer answer = new ArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (!x.contains((short) runValue)) {// it looks like contains() should be cheap enough if + // accessed sequentially + answer.content[answer.cardinality++] = (short) runValue; + } + } + } + return answer; + } + // we expect the answer to be a bitmap (if we are lucky) + BitmapContainer answer = x.clone(); + int lastPos = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.resetBitmapRange(answer.bitmap, lastPos, start); + Util.flipBitmapRange(answer.bitmap, start, end); + lastPos = end; + } + Util.resetBitmapRange(answer.bitmap, lastPos, answer.bitmap.length * 64); + answer.computeCardinality(); + if (answer.getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public Container andNot(RunContainer x) { + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.nbrruns)], 0); + int rlepos = 0; + int xrlepos = 0; + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + // output the first run + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else { + if (start < xstart) { + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (xstart - start - 1); + answer.nbrruns++; + } + if (xend < end) { + start = xend; + } else { + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } + } + } + if (rlepos < this.nbrruns) { + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + System.arraycopy(this.valueslength, 2 * rlepos, answer.valueslength, 2 * answer.nbrruns, + 2 * (this.nbrruns - rlepos)); + answer.nbrruns = answer.nbrruns + this.nbrruns - rlepos; + } + } + return answer.toEfficientContainer(); + } + + // Append a value length with all values until a given value + private void appendValueLength(int value, int index) { + int previousValue = Util.toIntUnsigned(getValue(index)); + int length = Util.toIntUnsigned(getLength(index)); + int offset = value - previousValue; + if (offset > length) { + setLength(index, (short) offset); + } + } + + // To check if a value length can be prepended with a given value + private boolean canPrependValueLength(int value, int index) { + if (index < this.nbrruns) { + int nextValue = Util.toIntUnsigned(getValue(index)); + return nextValue == value + 1; + } + return false; + } + + @Override + public void clear() { + nbrruns = 0; + } + + @Override + public Container clone() { + return new RunContainer(nbrruns, valueslength); + } + + // To set the last value of a value length + private void closeValueLength(int value, int index) { + int initialValue = Util.toIntUnsigned(getValue(index)); + setLength(index, (short) (value - initialValue)); + } + + @Override + public boolean contains(short x) { + int index = unsignedInterleavedBinarySearch(valueslength, 0, nbrruns, x); + if (index >= 0) { + return true; + } + index = -index - 2; // points to preceding value, possibly -1 + if (index != -1) {// possible match + int offset = Util.toIntUnsigned(x) - Util.toIntUnsigned(getValue(index)); + int le = Util.toIntUnsigned(getLength(index)); + return offset <= le; + } + return false; + } + + + // a very cheap check... if you have more than 4096, then you should use a bitmap container. + // this function avoids computing the cardinality + private Container convertToLazyBitmapIfNeeded() { + // when nbrruns exceed ArrayContainer.DEFAULT_MAX_SIZE, then we know it should be stored as a + // bitmap, always + if (this.nbrruns > ArrayContainer.DEFAULT_MAX_SIZE) { + BitmapContainer answer = new BitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = -1; + return answer; + } + return this; + } + + + // Push all values length to the end of the array (resize array if needed) + private void copyToOffset(int offset) { + final int minCapacity = 2 * (offset + nbrruns); + if (valueslength.length < minCapacity) { + // expensive case where we need to reallocate + int newCapacity = valueslength.length; + while (newCapacity < minCapacity) { + newCapacity = (newCapacity == 0) ? DEFAULT_INIT_SIZE + : newCapacity < 64 ? newCapacity * 2 + : newCapacity < 1024 ? newCapacity * 3 / 2 : newCapacity * 5 / 4; + } + short[] newvalueslength = new short[newCapacity]; + copyValuesLength(this.valueslength, 0, newvalueslength, offset, nbrruns); + this.valueslength = newvalueslength; + } else { + // efficient case where we just copy + copyValuesLength(this.valueslength, 0, this.valueslength, offset, nbrruns); + } + } + + private void copyValuesLength(short[] src, int srcIndex, short[] dst, int dstIndex, int length) { + System.arraycopy(src, 2 * srcIndex, dst, 2 * dstIndex, 2 * length); + } + + private void decrementLength(int index) { + valueslength[2 * index + 1]--;// caller is responsible to ensure that value is non-zero + } + + + private void decrementValue(int index) { + valueslength[2 * index]--; + } + + @Override + public void deserialize(DataInput in) throws IOException { + nbrruns = Short.reverseBytes(in.readShort()); + if (valueslength.length < 2 * nbrruns) { + valueslength = new short[2 * nbrruns]; + } + for (int k = 0; k < 2 * nbrruns; ++k) { + this.valueslength[k] = Short.reverseBytes(in.readShort()); + } + } + + // not actually used anywhere, but potentially useful + protected void ensureCapacity(int minNbRuns) { + final int minCapacity = 2 * minNbRuns; + if (valueslength.length < minCapacity) { + int newCapacity = valueslength.length; + while (newCapacity < minCapacity) { + newCapacity = (newCapacity == 0) ? DEFAULT_INIT_SIZE + : newCapacity < 64 ? newCapacity * 2 + : newCapacity < 1024 ? newCapacity * 3 / 2 : newCapacity * 5 / 4; + } + short[] nv = new short[newCapacity]; + copyValuesLength(valueslength, 0, nv, 0, nbrruns); + valueslength = nv; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof RunContainer) { + RunContainer srb = (RunContainer) o; + if (srb.nbrruns != this.nbrruns) { + return false; + } + for (int i = 0; i < nbrruns; ++i) { + if (this.getValue(i) != srb.getValue(i)) { + return false; + } + if (this.getLength(i) != srb.getLength(i)) { + return false; + } + } + return true; + } else if (o instanceof Container) { + if (((Container) o).getCardinality() != this.getCardinality()) { + return false; // should be a frequent branch if they differ + } + // next bit could be optimized if needed: + ShortIterator me = this.getShortIterator(); + ShortIterator you = ((Container) o).getShortIterator(); + while (me.hasNext()) { + if (me.next() != you.next()) { + return false; + } + } + return true; + } + return false; + } + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + int pos = i; + for (int k = 0; k < this.nbrruns; ++k) { + final int limit = Util.toIntUnsigned(this.getLength(k)); + final int base = Util.toIntUnsigned(this.getValue(k)); + for (int le = 0; le <= limit; ++le) { + x[pos++] = (base + le) | mask; + } + } + } + + @Override + public Container flip(short x) { + if (this.contains(x)) { + return this.remove(x); + } else { + return this.add(x); + } + } + + @Override + protected int getArraySizeInBytes() { + return 2 + 4 * this.nbrruns; // "array" includes its size + } + + + @Override + public int getCardinality() { + int sum = nbrruns;// lengths are returned -1 + for (int k = 0; k < nbrruns; ++k) { + sum = sum + Util.toIntUnsigned(getLength(k))/* + 1 */; + } + return sum; + } + + short getLength(int index) { + return valueslength[2 * index + 1]; + } + + @Override + public ShortIterator getReverseShortIterator() { + return new ReverseRunContainerShortIterator(this); + } + + @Override + public PeekableShortIterator getShortIterator() { + return new RunContainerShortIterator(this); + } + + @Override + public int getSizeInBytes() { + return this.nbrruns * 4 + 4; + } + + short getValue(int index) { + return valueslength[2 * index]; + } + + @Override + public int hashCode() { + int hash = 0; + for (int k = 0; k < nbrruns * 2; ++k) { + hash += 31 * hash + valueslength[k]; + } + return hash; + } + + @Override + public Container iadd(int begin, int end) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).iadd(begin,end) + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + + if (begin == end - 1) { + add((short) begin); + return this; + } + + int bIndex = unsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) begin); + int eIndex = + unsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) (end - 1)); + + if (bIndex >= 0 && eIndex >= 0) { + mergeValuesLength(bIndex, eIndex); + return this; + + } else if (bIndex >= 0 && eIndex < 0) { + eIndex = -eIndex - 2; + + if (canPrependValueLength(end - 1, eIndex + 1)) { + mergeValuesLength(bIndex, eIndex + 1); + return this; + } + + appendValueLength(end - 1, eIndex); + mergeValuesLength(bIndex, eIndex); + return this; + + } else if (bIndex < 0 && eIndex >= 0) { + bIndex = -bIndex - 2; + + if (bIndex >= 0) { + if (valueLengthContains(begin - 1, bIndex)) { + mergeValuesLength(bIndex, eIndex); + return this; + } + } + prependValueLength(begin, bIndex + 1); + mergeValuesLength(bIndex + 1, eIndex); + return this; + + } else { + bIndex = -bIndex - 2; + eIndex = -eIndex - 2; + + if (eIndex >= 0) { + if (bIndex >= 0) { + if (!valueLengthContains(begin - 1, bIndex)) { + if (bIndex == eIndex) { + if (canPrependValueLength(end - 1, eIndex + 1)) { + prependValueLength(begin, eIndex + 1); + return this; + } + makeRoomAtIndex(eIndex + 1); + setValue(eIndex + 1, (short) begin); + setLength(eIndex + 1, (short) (end - 1 - begin)); + return this; + + } else { + bIndex++; + prependValueLength(begin, bIndex); + } + } + } else { + bIndex = 0; + prependValueLength(begin, bIndex); + } + + if (canPrependValueLength(end - 1, eIndex + 1)) { + mergeValuesLength(bIndex, eIndex + 1); + return this; + } + + appendValueLength(end - 1, eIndex); + mergeValuesLength(bIndex, eIndex); + return this; + + } else { + if (canPrependValueLength(end - 1, 0)) { + prependValueLength(begin, 0); + } else { + makeRoomAtIndex(0); + setValue(0, (short) begin); + setLength(0, (short) (end - 1 - begin)); + } + return this; + } + } + } + + @Override + public Container iand(ArrayContainer x) { + return and(x); + } + + @Override + public Container iand(BitmapContainer x) { + return and(x); + } + + + @Override + public Container iand(RunContainer x) { + return and(x); + } + + + @Override + public Container iandNot(ArrayContainer x) { + return andNot(x); + } + + @Override + public Container iandNot(BitmapContainer x) { + return andNot(x); + } + + @Override + public Container iandNot(RunContainer x) { + return andNot(x); + } + + protected Container ilazyor(ArrayContainer x) { + if (isFull()) { + return this; // this can sometimes solve a lot of computation! + } + return ilazyorToRun(x); + } + + private Container ilazyorToRun(ArrayContainer x) { + if (isFull()) { + return this.clone(); + } + final int nbrruns = this.nbrruns; + final int offset = Math.max(nbrruns, x.getCardinality()); + copyToOffset(offset); + int rlepos = 0; + this.nbrruns = 0; + PeekableShortIterator i = x.getShortIterator(); + while (i.hasNext() && (rlepos < nbrruns)) { + if (Util.compareUnsigned(getValue(rlepos + offset), i.peekNext()) <= 0) { + smartAppend(getValue(rlepos + offset), getLength(rlepos + offset)); + rlepos++; + } else { + smartAppend(i.next()); + } + } + if (i.hasNext()) { + /* + * if(this.nbrruns>0) { // this might be useful if the run container has just one very large + * run int lastval = Util.toIntUnsigned(getValue(nbrruns + offset - 1)) + + * Util.toIntUnsigned(getLength(nbrruns + offset - 1)) + 1; i.advanceIfNeeded((short) + * lastval); } + */ + while (i.hasNext()) { + smartAppend(i.next()); + } + } else { + while (rlepos < nbrruns) { + smartAppend(getValue(rlepos + offset), getLength(rlepos + offset)); + rlepos++; + } + } + return convertToLazyBitmapIfNeeded(); + } + + private void increaseCapacity() { + int newCapacity = (valueslength.length == 0) ? DEFAULT_INIT_SIZE + : valueslength.length < 64 ? valueslength.length * 2 + : valueslength.length < 1024 ? valueslength.length * 3 / 2 + : valueslength.length * 5 / 4; + short[] nv = new short[newCapacity]; + System.arraycopy(valueslength, 0, nv, 0, 2 * nbrruns); + valueslength = nv; + } + + + private void incrementLength(int index) { + valueslength[2 * index + 1]++; + } + + + private void incrementValue(int index) { + valueslength[2 * index]++; + } + + // To set the first value of a value length + private void initValueLength(int value, int index) { + int initialValue = Util.toIntUnsigned(getValue(index)); + int length = Util.toIntUnsigned(getLength(index)); + setValue(index, (short) (value)); + setLength(index, (short) (length - (value - initialValue))); + } + + @Override + public Container inot(int rangeStart, int rangeEnd) { + if (rangeEnd <= rangeStart) { + return this; + } + + // TODO: write special case code for rangeStart=0; rangeEnd=65535 + // a "sliding" effect where each range records the gap adjacent it + // can probably be quite fast. Probably have 2 cases: start with a + // 0 run vs start with a 1 run. If you both start and end with 0s, + // you will require room for expansion. + + // the +1 below is needed in case the valueslength.length is odd + if (valueslength.length <= 2 * nbrruns + 1) { + // no room for expansion + // analyze whether this is a case that will require expansion (that we cannot do) + // this is a bit costly now (4 "contains" checks) + + boolean lastValueBeforeRange = false; + boolean firstValueInRange = false; + boolean lastValueInRange = false; + boolean firstValuePastRange = false; + + // contains is based on a binary search and is hopefully fairly fast. + // however, one binary search could *usually* suffice to find both + // lastValueBeforeRange AND firstValueInRange. ditto for + // lastVaueInRange and firstValuePastRange + + // find the start of the range + if (rangeStart > 0) { + lastValueBeforeRange = contains((short) (rangeStart - 1)); + } + firstValueInRange = contains((short) rangeStart); + + if (lastValueBeforeRange == firstValueInRange) { + // expansion is required if also lastValueInRange==firstValuePastRange + + // tougher to optimize out, but possible. + lastValueInRange = contains((short) (rangeEnd - 1)); + if (rangeEnd != 65536) { + firstValuePastRange = contains((short) rangeEnd); + } + + // there is definitely one more run after the operation. + if (lastValueInRange == firstValuePastRange) { + return not(rangeStart, rangeEnd); // can't do in-place: true space limit + } + } + } + // either no expansion required, or we have room to handle any required expansion for it. + + // remaining code is just a minor variation on not() + int myNbrRuns = nbrruns; + + RunContainer ans = this; // copy on top of self. + int k = 0; + ans.nbrruns = 0; // losing this.nbrruns, which is stashed in myNbrRuns. + + // could try using unsignedInterleavedBinarySearch(valueslength, 0, nbrruns, rangeStart) instead + // of sequential scan + // to find the starting location + + for (; (k < myNbrRuns) && (Util.toIntUnsigned(this.getValue(k)) < rangeStart); ++k) { + // since it is atop self, there is no copying needed + // ans.valueslength[2 * k] = this.valueslength[2 * k]; + // ans.valueslength[2 * k + 1] = this.valueslength[2 * k + 1]; + ans.nbrruns++; + } + // We will work left to right, with a read pointer that always stays + // left of the write pointer. However, we need to give the read pointer a head start. + // use local variables so we are always reading 1 location ahead. + + short bufferedValue = 0, bufferedLength = 0; // 65535 start and 65535 length would be illegal, + // could use as sentinel + short nextValue = 0, nextLength = 0; + if (k < myNbrRuns) { // prime the readahead variables + bufferedValue = getValue(k); + bufferedLength = getLength(k); + } + + ans.smartAppendExclusive((short) rangeStart, (short) (rangeEnd - rangeStart - 1)); + + for (; k < myNbrRuns; ++k) { + if (ans.nbrruns > k + 1) { + throw new RuntimeException( + "internal error in inot, writer has overtaken reader!! " + k + " " + ans.nbrruns); + } + if (k + 1 < myNbrRuns) { + nextValue = getValue(k + 1); // readahead for next iteration + nextLength = getLength(k + 1); + } + ans.smartAppendExclusive(bufferedValue, bufferedLength); + bufferedValue = nextValue; + bufferedLength = nextLength; + } + // the number of runs can increase by one, meaning (rarely) a bitmap will become better + // or the cardinality can decrease by a lot, making an array better + return ans.toEfficientContainer(); + } + + @Override + public boolean intersects(ArrayContainer x) { + if (this.nbrruns == 0) { + return false; + } + int rlepos = 0; + int arraypos = 0; + int rleval = Util.toIntUnsigned(this.getValue(rlepos)); + int rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = Util.toIntUnsigned(x.content[arraypos]); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return false; + } + rleval = Util.toIntUnsigned(this.getValue(rlepos)); + rlelength = Util.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = Util.advanceUntil(x.content, arraypos, x.cardinality, this.getValue(rlepos)); + } else { + return true; + } + } + return false; + } + + @Override + public boolean intersects(BitmapContainer x) { + // TODO: this is probably not optimally fast + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) { + return true; + } + } + } + return false; + } + + @Override + public boolean intersects(RunContainer x) { + int rlepos = 0; + int xrlepos = 0; + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + if (ENABLE_GALLOPING_AND) { + rlepos = skipAhead(this, rlepos, xstart); // skip over runs until we have end > xstart (or + // rlepos is advanced beyond end) + } else { + ++rlepos; + } + + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + if (ENABLE_GALLOPING_AND) { + xrlepos = skipAhead(x, xrlepos, start); + } else { + ++xrlepos; + } + + if (xrlepos < x.nbrruns) { + xstart = Util.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + Util.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + return true; + } + } + return false; + } + + @Override + public Container ior(ArrayContainer x) { + if (isFull()) { + return this; + } + final int nbrruns = this.nbrruns; + final int offset = Math.max(nbrruns, x.getCardinality()); + copyToOffset(offset); + int rlepos = 0; + this.nbrruns = 0; + PeekableShortIterator i = x.getShortIterator(); + while (i.hasNext() && (rlepos < nbrruns)) { + if (Util.compareUnsigned(getValue(rlepos + offset), i.peekNext()) <= 0) { + smartAppend(getValue(rlepos + offset), getLength(rlepos + offset)); + rlepos++; + } else { + smartAppend(i.next()); + } + } + if (i.hasNext()) { + /* + * if(this.nbrruns>0) { // this might be useful if the run container has just one very large + * run int lastval = Util.toIntUnsigned(getValue(nbrruns + offset - 1)) + + * Util.toIntUnsigned(getLength(nbrruns + offset - 1)) + 1; i.advanceIfNeeded((short) + * lastval); } + */ + while (i.hasNext()) { + smartAppend(i.next()); + } + } else { + while (rlepos < nbrruns) { + smartAppend(getValue(rlepos + offset), getLength(rlepos + offset)); + rlepos++; + } + } + return toEfficientContainer(); + } + + @Override + public Container ior(BitmapContainer x) { + if (isFull()) { + return this; + } + return or(x); + } + + @Override + public Container ior(RunContainer x) { + if (isFull()) { + return this; + } + + final int nbrruns = this.nbrruns; + final int xnbrruns = x.nbrruns; + final int offset = Math.max(nbrruns, xnbrruns); + + // Push all values length to the end of the array (resize array if needed) + copyToOffset(offset); + // Aggregate and store the result at the beginning of the array + this.nbrruns = 0; + int rlepos = 0; + int xrlepos = 0; + + // Add values length (smaller first) + while ((rlepos < nbrruns) && (xrlepos < xnbrruns)) { + final short value = this.getValue(offset + rlepos); + final short xvalue = x.getValue(xrlepos); + final short length = this.getLength(offset + rlepos); + final short xlength = x.getLength(xrlepos); + + if (Util.compareUnsigned(value, xvalue) <= 0) { + this.smartAppend(value, length); + ++rlepos; + } else { + this.smartAppend(xvalue, xlength); + ++xrlepos; + } + } + + while (rlepos < nbrruns) { + this.smartAppend(this.getValue(offset + rlepos), this.getLength(offset + rlepos)); + ++rlepos; + } + + while (xrlepos < xnbrruns) { + this.smartAppend(x.getValue(xrlepos), x.getLength(xrlepos)); + ++xrlepos; + } + return this.toBitmapIfNeeded(); + } + + @Override + public Container iremove(int begin, int end) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).iremove(begin,end) + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + if (begin == end - 1) { + remove((short) begin); + return this; + } + + int bIndex = unsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) begin); + int eIndex = + unsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) (end - 1)); + + // note, eIndex is looking for (end-1) + + if (bIndex >= 0) { // beginning marks beginning of a run + if (eIndex < 0) { + eIndex = -eIndex - 2; + } + // eIndex could be a run that begins exactly at "end" + // or it might be an earlier run + + // if the end is before the first run, we'd have eIndex==-1. But bIndex makes this impossible. + + if (valueLengthContains(end, eIndex)) { + initValueLength(end, eIndex); // there is something left in the run + recoverRoomsInRange(bIndex - 1, eIndex - 1); + } else { + recoverRoomsInRange(bIndex - 1, eIndex); // nothing left in the run + } + + } else if (bIndex < 0 && eIndex >= 0) { + // start does not coincide to a run start, but end does. + bIndex = -bIndex - 2; + + if (bIndex >= 0) { + if (valueLengthContains(begin, bIndex)) { + closeValueLength(begin - 1, bIndex); + } + } + + // last run is one shorter + if (getLength(eIndex) == 0) {// special case where we remove last run + recoverRoomsInRange(eIndex, eIndex + 1); + } else { + incrementValue(eIndex); + decrementLength(eIndex); + } + recoverRoomsInRange(bIndex, eIndex - 1); + + } else { + bIndex = -bIndex - 2; + eIndex = -eIndex - 2; + + if (eIndex >= 0) { // end-1 is not before first run. + if (bIndex >= 0) { // nor is begin + if (bIndex == eIndex) { // all removal nested properly between + // one run start and the next + if (valueLengthContains(begin, bIndex)) { + if (valueLengthContains(end, eIndex)) { + // proper nesting within a run, generates 2 sub-runs + makeRoomAtIndex(bIndex); + closeValueLength(begin - 1, bIndex); + initValueLength(end, bIndex + 1); + return this; + } + // removed area extends beyond run. + closeValueLength(begin - 1, bIndex); + } + } else { // begin in one run area, end in a later one. + if (valueLengthContains(begin, bIndex)) { + closeValueLength(begin - 1, bIndex); + // this cannot leave the bIndex run empty. + } + if (valueLengthContains(end, eIndex)) { + // there is additional stuff in the eIndex run + initValueLength(end, eIndex); + eIndex--; + } else { + // run ends at or before the range being removed, can deleteById it + } + recoverRoomsInRange(bIndex, eIndex); + } + + } else { + // removed range begins before the first run + if (valueLengthContains(end, eIndex)) { // had been end-1 + initValueLength(end, eIndex); + recoverRoomsInRange(bIndex, eIndex - 1); + } else { // removed range includes all the last run + recoverRoomsInRange(bIndex, eIndex); + } + } + + } else { + // eIndex == -1: whole range is before first run, nothing to deleteById... + } + + } + return this; + } + + protected boolean isFull() { + return (this.nbrruns == 1) && (this.getValue(0) == 0) && (this.getLength(0) == -1); + } + + @Override + public Iterator iterator() { + final ShortIterator i = getShortIterator(); + return new Iterator() { + + @Override + public boolean hasNext() { + return i.hasNext(); + } + + @Override + public Short next() { + return i.next(); + } + + @Override + public void remove() { + i.remove(); + } + }; + + } + + @Override + public Container ixor(ArrayContainer x) { + return xor(x); + } + + @Override + public Container ixor(BitmapContainer x) { + return xor(x); + } + + + @Override + public Container ixor(RunContainer x) { + return xor(x); + } + + private RunContainer lazyandNot(ArrayContainer x) { + if (x.getCardinality() == 0) { + return this; + } + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.cardinality)], 0); + int rlepos = 0; + int xrlepos = 0; + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = Util.toIntUnsigned(x.content[xrlepos]); + while ((rlepos < this.nbrruns) && (xrlepos < x.cardinality)) { + if (end <= xstart) { + // output the first run + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xstart + 1 <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.cardinality) { + xstart = Util.toIntUnsigned(x.content[xrlepos]); + } + } else { + if (start < xstart) { + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (xstart - start - 1); + answer.nbrruns++; + } + if (xstart + 1 < end) { + start = xstart + 1; + } else { + rlepos++; + if (rlepos < this.nbrruns) { + start = Util.toIntUnsigned(this.getValue(rlepos)); + end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } + } + } + if (rlepos < this.nbrruns) { + answer.valueslength[2 * answer.nbrruns] = (short) start; + answer.valueslength[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + System.arraycopy(this.valueslength, 2 * rlepos, answer.valueslength, 2 * answer.nbrruns, + 2 * (this.nbrruns - rlepos)); + answer.nbrruns = answer.nbrruns + this.nbrruns - rlepos; + } + } + return answer; + } + + protected Container lazyor(ArrayContainer x) { + return lazyorToRun(x); + } + + private Container lazyorToRun(ArrayContainer x) { + if (isFull()) { + return this.clone(); + } + // TODO: should optimize for the frequent case where we have a single run + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.getCardinality())], 0); + int rlepos = 0; + PeekableShortIterator i = x.getShortIterator(); + + while (i.hasNext() && (rlepos < this.nbrruns)) { + if (Util.compareUnsigned(getValue(rlepos), i.peekNext()) <= 0) { + answer.smartAppend(getValue(rlepos), getLength(rlepos)); + // in theory, this next code could help, in practice it doesn't. + /* + * int lastval = Util.toIntUnsigned(answer.getValue(answer.nbrruns - 1)) + + * Util.toIntUnsigned(answer.getLength(answer.nbrruns - 1)) + 1; i.advanceIfNeeded((short) + * lastval); + */ + + rlepos++; + } else { + answer.smartAppend(i.next()); + } + } + if (i.hasNext()) { + /* + * if(answer.nbrruns>0) { this might be useful if the run container has just one very large + * run int lastval = Util.toIntUnsigned(answer.getValue(answer.nbrruns - 1)) + + * Util.toIntUnsigned(answer.getLength(answer.nbrruns - 1)) + 1; i.advanceIfNeeded((short) + * lastval); } + */ + while (i.hasNext()) { + answer.smartAppend(i.next()); + } + } else { + while (rlepos < this.nbrruns) { + answer.smartAppend(getValue(rlepos), getLength(rlepos)); + rlepos++; + } + } + return answer.convertToLazyBitmapIfNeeded(); + } + + private Container lazyxor(ArrayContainer x) { + if (x.getCardinality() == 0) { + return this; + } + if (this.nbrruns == 0) { + return x; + } + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.getCardinality())], 0); + int rlepos = 0; + ShortIterator i = x.getShortIterator(); + short cv = i.next(); + + while (true) { + if (Util.compareUnsigned(getValue(rlepos), cv) < 0) { + answer.smartAppendExclusive(getValue(rlepos), getLength(rlepos)); + rlepos++; + if (rlepos == this.nbrruns) { + answer.smartAppendExclusive(cv); + while (i.hasNext()) { + answer.smartAppendExclusive(i.next()); + } + break; + } + } else { + answer.smartAppendExclusive(cv); + if (!i.hasNext()) { + while (rlepos < this.nbrruns) { + answer.smartAppendExclusive(getValue(rlepos), getLength(rlepos)); + rlepos++; + } + break; + } else { + cv = i.next(); + } + } + } + return answer; + } + + + @Override + public Container limit(int maxcardinality) { + if (maxcardinality >= getCardinality()) { + return clone(); + } + + int r; + int cardinality = 0; + for (r = 0; r < this.nbrruns; ++r) { + cardinality += Util.toIntUnsigned(getLength(r)) + 1; + if (maxcardinality <= cardinality) { + break; + } + } + + RunContainer rc = new RunContainer(Arrays.copyOf(valueslength, 2 * (r + 1)), r + 1); + rc.setLength(r, + (short) (Util.toIntUnsigned(rc.getLength(r)) - cardinality + maxcardinality)); + return rc; + } + + private void makeRoomAtIndex(int index) { + if (2 * (nbrruns + 1) > valueslength.length) { + increaseCapacity(); + } + copyValuesLength(valueslength, index, valueslength, index + 1, nbrruns - index); + nbrruns++; + } + + // To merge values length from begin(inclusive) to end(inclusive) + private void mergeValuesLength(int begin, int end) { + if (begin < end) { + int bValue = Util.toIntUnsigned(getValue(begin)); + int eValue = Util.toIntUnsigned(getValue(end)); + int eLength = Util.toIntUnsigned(getLength(end)); + int newLength = eValue - bValue + eLength; + setLength(begin, (short) newLength); + recoverRoomsInRange(begin, end); + } + } + + @Override + public Container not(int rangeStart, int rangeEnd) { + if (rangeEnd <= rangeStart) { + return this.clone(); + } + RunContainer ans = new RunContainer(nbrruns + 1); + int k = 0; + for (; (k < this.nbrruns) && (Util.toIntUnsigned(this.getValue(k)) < rangeStart); ++k) { + ans.valueslength[2 * k] = this.valueslength[2 * k]; + ans.valueslength[2 * k + 1] = this.valueslength[2 * k + 1]; + ans.nbrruns++; + } + ans.smartAppendExclusive((short) rangeStart, (short) (rangeEnd - rangeStart - 1)); + for (; k < this.nbrruns; ++k) { + ans.smartAppendExclusive(getValue(k), getLength(k)); + } + // the number of runs can increase by one, meaning (rarely) a bitmap will become better + // or the cardinality can decrease by a lot, making an array better + return ans.toEfficientContainer(); + } + + @Override + public int numberOfRuns() { + return nbrruns; + } + + @Override + public Container or(ArrayContainer x) { + // we guess that, often, the result will still be efficiently expressed as a run container + return lazyor(x).repairAfterLazy(); + } + + @Override + public Container or(BitmapContainer x) { + if (isFull()) { + return clone(); + } + // could be implemented as return toTemporaryBitmap().ior(x); + BitmapContainer answer = x.clone(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.setBitmapRange(answer.bitmap, start, end); + } + answer.computeCardinality(); + return answer; + } + + @Override + public Container or(RunContainer x) { + if (isFull()) { + return clone(); + } + if (x.isFull()) { + return x.clone(); // cheap case that can save a lot of computation + } + // we really ought to optimize the rest of the code for the frequent case where there is a + // single run + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.nbrruns)], 0); + int rlepos = 0; + int xrlepos = 0; + + while ((xrlepos < x.nbrruns) && (rlepos < this.nbrruns)) { + if (Util.compareUnsigned(getValue(rlepos), x.getValue(xrlepos)) <= 0) { + answer.smartAppend(getValue(rlepos), getLength(rlepos)); + rlepos++; + } else { + answer.smartAppend(x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + } + while (xrlepos < x.nbrruns) { + answer.smartAppend(x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + while (rlepos < this.nbrruns) { + answer.smartAppend(getValue(rlepos), getLength(rlepos)); + rlepos++; + } + + return answer.toBitmapIfNeeded(); + } + + // Prepend a value length with all values starting from a given value + private void prependValueLength(int value, int index) { + int initialValue = Util.toIntUnsigned(getValue(index)); + int length = Util.toIntUnsigned(getLength(index)); + setValue(index, (short) value); + setLength(index, (short) (initialValue - value + length)); + } + + @Override + public int rank(short lowbits) { + int x = Util.toIntUnsigned(lowbits); + int answer = 0; + for (int k = 0; k < this.nbrruns; ++k) { + int value = Util.toIntUnsigned(getValue(k)); + int length = Util.toIntUnsigned(getLength(k)); + if (x < value) { + return answer; + } else if (value + length + 1 >= x) { + return answer + x - value + 1; + } + answer += length + 1; + } + return answer; + } + + @Override + public void readExternal(ObjectInput in) throws IOException { + deserialize(in); + } + + private void recoverRoomAtIndex(int index) { + copyValuesLength(valueslength, index + 1, valueslength, index, nbrruns - index - 1); + nbrruns--; + } + + // To recover rooms between begin(exclusive) and end(inclusive) + private void recoverRoomsInRange(int begin, int end) { + if (end + 1 < this.nbrruns) { + copyValuesLength(this.valueslength, end + 1, this.valueslength, begin + 1, + this.nbrruns - 1 - end); + } + this.nbrruns -= end - begin; + } + + @Override + public Container remove(int begin, int end) { + RunContainer rc = (RunContainer) clone(); + return rc.iremove(begin, end); + } + + @Override + public Container remove(short x) { + int index = unsignedInterleavedBinarySearch(valueslength, 0, nbrruns, x); + if (index >= 0) { + if (getLength(index) == 0) { + recoverRoomAtIndex(index); + } else { + incrementValue(index); + decrementLength(index); + } + return this;// already there + } + index = -index - 2;// points to preceding value, possibly -1 + if (index >= 0) {// possible match + int offset = Util.toIntUnsigned(x) - Util.toIntUnsigned(getValue(index)); + int le = Util.toIntUnsigned(getLength(index)); + if (offset < le) { + // need to break in two + this.setLength(index, (short) (offset - 1)); + // need to insert + int newvalue = Util.toIntUnsigned(x) + 1; + int newlength = le - offset - 1; + makeRoomAtIndex(index + 1); + this.setValue(index + 1, (short) newvalue); + this.setLength(index + 1, (short) newlength); + return this; + + } else if (offset == le) { + decrementLength(index); + } + } + // no match + return this; + } + + @Override + public Container repairAfterLazy() { + return toEfficientContainer(); + } + + /** + * Convert to Array or Bitmap container if the serialized form would be shorter. Exactly the same + * functionality as toEfficientContainer. + */ + + @Override + public Container runOptimize() { + return toEfficientContainer(); + } + + @Override + public short select(int j) { + int offset = 0; + for (int k = 0; k < this.nbrruns; ++k) { + int nextOffset = offset + Util.toIntUnsigned(getLength(k)) + 1; + if (nextOffset > j) { + return (short) (getValue(k) + (j - offset)); + } + offset = nextOffset; + } + throw new IllegalArgumentException( + "Cannot select " + j + " since cardinality is " + getCardinality()); + } + + @Override + public void serialize(DataOutput out) throws IOException { + writeArray(out); + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(nbrruns); + } + + private void setLength(int index, short v) { + setLength(valueslength, index, v); + } + + + private void setLength(short[] valueslength, int index, short v) { + valueslength[2 * index + 1] = v; + } + + private void setValue(int index, short v) { + setValue(valueslength, index, v); + } + + private void setValue(short[] valueslength, int index, short v) { + valueslength[2 * index] = v; + } + + + // bootstrapping (aka "galloping") binary search. Always skips at least one. + // On our "real data" benchmarks, enabling galloping is a minor loss + // .."ifdef ENABLE_GALLOPING_AND" :) + private int skipAhead(RunContainer skippingOn, int pos, int targetToExceed) { + int left = pos; + int span = 1; + int probePos = 0; + int end; + // jump ahead to find a spot where end > targetToExceed (if it exists) + do { + probePos = left + span; + if (probePos >= skippingOn.nbrruns - 1) { + // expect it might be quite common to find the container cannot be advanced as far as + // requested. Optimize for it. + probePos = skippingOn.nbrruns - 1; + end = Util.toIntUnsigned(skippingOn.getValue(probePos)) + + Util.toIntUnsigned(skippingOn.getLength(probePos)) + 1; + if (end <= targetToExceed) { + return skippingOn.nbrruns; + } + } + end = Util.toIntUnsigned(skippingOn.getValue(probePos)) + + Util.toIntUnsigned(skippingOn.getLength(probePos)) + 1; + span *= 2; + } while (end <= targetToExceed); + int right = probePos; + // left and right are both valid positions. Invariant: left <= targetToExceed && right > + // targetToExceed + // do a binary search to discover the spot where left and right are separated by 1, and + // invariant is maintained. + while (right - left > 1) { + int mid = (right + left) / 2; + int midVal = Util.toIntUnsigned(skippingOn.getValue(mid)) + + Util.toIntUnsigned(skippingOn.getLength(mid)) + 1; + if (midVal > targetToExceed) { + right = mid; + } else { + left = mid; + } + } + return right; + } + + private void smartAppend(short val) { + int oldend; + if ((nbrruns == 0) + || (Util.toIntUnsigned(val) > (oldend = Util.toIntUnsigned(valueslength[2 * (nbrruns - 1)]) + + Util.toIntUnsigned(valueslength[2 * (nbrruns - 1) + 1])) + 1)) { // we add a new one + valueslength[2 * nbrruns] = val; + valueslength[2 * nbrruns + 1] = 0; + nbrruns++; + return; + } + if (val == (short) (oldend + 1)) { // we merge + valueslength[2 * (nbrruns - 1) + 1]++; + } + } + + private void smartAppend(short start, short length) { + int oldend; + if ((nbrruns == 0) || (Util.toIntUnsigned(start) > (oldend = + Util.toIntUnsigned(getValue(nbrruns - 1)) + Util.toIntUnsigned(getLength(nbrruns - 1))) + + 1)) { // we add a new one + valueslength[2 * nbrruns] = start; + valueslength[2 * nbrruns + 1] = length; + nbrruns++; + return; + } + int newend = Util.toIntUnsigned(start) + Util.toIntUnsigned(length) + 1; + if (newend > oldend) { // we merge + setLength(nbrruns - 1, (short) (newend - 1 - Util.toIntUnsigned(getValue(nbrruns - 1)))); + } + } + + private void smartAppendExclusive(short val) { + int oldend; + if ((nbrruns == 0) + || (Util.toIntUnsigned(val) > (oldend = Util.toIntUnsigned(getValue(nbrruns - 1)) + + Util.toIntUnsigned(getLength(nbrruns - 1)) + 1))) { // we add a new one + valueslength[2 * nbrruns] = val; + valueslength[2 * nbrruns + 1] = 0; + nbrruns++; + return; + } + if (oldend == Util.toIntUnsigned(val)) { + // we merge + valueslength[2 * (nbrruns - 1) + 1]++; + return; + } + int newend = Util.toIntUnsigned(val) + 1; + + if (Util.toIntUnsigned(val) == Util.toIntUnsigned(getValue(nbrruns - 1))) { + // we wipe out previous + if (newend != oldend) { + setValue(nbrruns - 1, (short) newend); + setLength(nbrruns - 1, (short) (oldend - newend - 1)); + return; + } else { // they cancel out + nbrruns--; + return; + } + } + setLength(nbrruns - 1, (short) (val - Util.toIntUnsigned(getValue(nbrruns - 1)) - 1)); + if (newend < oldend) { + setValue(nbrruns, (short) newend); + setLength(nbrruns, (short) (oldend - newend - 1)); + nbrruns++; + } else if (oldend < newend) { + setValue(nbrruns, (short) oldend); + setLength(nbrruns, (short) (newend - oldend - 1)); + nbrruns++; + } + + } + + private void smartAppendExclusive(short start, short length) { + int oldend; + if ((nbrruns == 0) + || (Util.toIntUnsigned(start) > (oldend = Util.toIntUnsigned(getValue(nbrruns - 1)) + + Util.toIntUnsigned(getLength(nbrruns - 1)) + 1))) { // we add a new one + valueslength[2 * nbrruns] = start; + valueslength[2 * nbrruns + 1] = length; + nbrruns++; + return; + } + if (oldend == Util.toIntUnsigned(start)) { + // we merge + valueslength[2 * (nbrruns - 1) + 1] += length + 1; + return; + } + + int newend = Util.toIntUnsigned(start) + Util.toIntUnsigned(length) + 1; + + if (Util.toIntUnsigned(start) == Util.toIntUnsigned(getValue(nbrruns - 1))) { + // we wipe out previous + if (newend < oldend) { + setValue(nbrruns - 1, (short) newend); + setLength(nbrruns - 1, (short) (oldend - newend - 1)); + return; + } else if (newend > oldend) { + setValue(nbrruns - 1, (short) oldend); + setLength(nbrruns - 1, (short) (newend - oldend - 1)); + return; + } else { // they cancel out + nbrruns--; + return; + } + } + setLength(nbrruns - 1, (short) (start - Util.toIntUnsigned(getValue(nbrruns - 1)) - 1)); + if (newend < oldend) { + setValue(nbrruns, (short) newend); + setLength(nbrruns, (short) (oldend - newend - 1)); + nbrruns++; + } else if (newend > oldend) { + setValue(nbrruns, (short) oldend); + setLength(nbrruns, (short) (newend - oldend - 1)); + nbrruns++; + } + } + + // convert to bitmap *if needed* (useful if you know it can't be an array) + private Container toBitmapIfNeeded() { + int sizeAsRunContainer = RunContainer.serializedSizeInBytes(this.nbrruns); + int sizeAsBitmapContainer = BitmapContainer.serializedSizeInBytes(0); + if (sizeAsBitmapContainer > sizeAsRunContainer) { + return this; + } + return toBitmapContainer(); + } + + /** + * Convert the container to either a Bitmap or an Array Container, depending on the cardinality. + * + * @param card the current cardinality + * @return new container + */ + Container toBitmapOrArrayContainer(int card) { + // int card = this.getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + ArrayContainer answer = new ArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + answer.content[answer.cardinality++] = (short) runValue; + } + } + return answer; + } + BitmapContainer answer = new BitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + + // convert to bitmap or array *if needed* + private Container toEfficientContainer() { + int sizeAsRunContainer = RunContainer.serializedSizeInBytes(this.nbrruns); + int sizeAsBitmapContainer = BitmapContainer.serializedSizeInBytes(0); + int card = this.getCardinality(); + int sizeAsArrayContainer = ArrayContainer.serializedSizeInBytes(card); + if (sizeAsRunContainer <= Math.min(sizeAsBitmapContainer, sizeAsArrayContainer)) { + return this; + } + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + ArrayContainer answer = new ArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = Util.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + Util.toIntUnsigned(this.getLength(rlepos)); + + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + answer.content[answer.cardinality++] = (short) runValue; + } + } + return answer; + } + BitmapContainer answer = new BitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + + @Override + public MappeableContainer toMappeableContainer() { + return new MappeableRunContainer(this); + } + + /** + * Return the content of this container as a ShortBuffer. This creates a copy and might be + * relatively slow. + * + * @return the ShortBuffer + */ + public ShortBuffer toShortBuffer() { + ShortBuffer sb = ShortBuffer.allocate(this.nbrruns * 2); + sb.put(this.valueslength, 0, this.nbrruns * 2); + return sb; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + for (int k = 0; k < this.nbrruns; ++k) { + sb.append("["); + sb.append(Util.toIntUnsigned(this.getValue(k))); + sb.append(","); + sb.append(Util.toIntUnsigned(this.getValue(k)) + Util.toIntUnsigned(this.getLength(k))); + sb.append("]"); + } + return sb.toString(); + } + + + @Override + public void trim() { + if (valueslength.length == 2 * nbrruns) { + return; + } + valueslength = Arrays.copyOf(valueslength, 2 * nbrruns); + } + + + // To check if a value length contains a given value + private boolean valueLengthContains(int value, int index) { + int initialValue = Util.toIntUnsigned(getValue(index)); + int length = Util.toIntUnsigned(getLength(index)); + + return value <= initialValue + length; + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + out.writeShort(Short.reverseBytes((short) this.nbrruns)); + for (int k = 0; k < 2 * this.nbrruns; ++k) { + out.writeShort(Short.reverseBytes(this.valueslength[k])); + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + + } + + @Override + public Container xor(ArrayContainer x) { + // if the cardinality of the array is small, guess that the output will still be a run container + final int arbitrary_threshold = 32; // 32 is arbitrary here + if (x.getCardinality() < arbitrary_threshold) { + return lazyxor(x).repairAfterLazy(); + } + // otherwise, we expect the output to be either an array or bitmap + final int card = getCardinality(); + if (card <= ArrayContainer.DEFAULT_MAX_SIZE) { + // if the cardinality is small, we construct the solution in place + return x.xor(this.getShortIterator()); + } + // otherwise, we generate a bitmap (even if runcontainer would be better) + return toBitmapOrArrayContainer(card).ixor(x); + } + + @Override + public Container xor(BitmapContainer x) { + // could be implemented as return toTemporaryBitmap().ixor(x); + BitmapContainer answer = x.clone(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.flipBitmapRange(answer.bitmap, start, end); + } + answer.computeCardinality(); + if (answer.getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public Container xor(RunContainer x) { + if (x.nbrruns == 0) { + return this.clone(); + } + if (this.nbrruns == 0) { + return x.clone(); + } + RunContainer answer = new RunContainer(new short[2 * (this.nbrruns + x.nbrruns)], 0); + int rlepos = 0; + int xrlepos = 0; + + while (true) { + if (Util.compareUnsigned(getValue(rlepos), x.getValue(xrlepos)) < 0) { + answer.smartAppendExclusive(getValue(rlepos), getLength(rlepos)); + rlepos++; + + if (rlepos == this.nbrruns) { + while (xrlepos < x.nbrruns) { + answer.smartAppendExclusive(x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + break; + } + } else { + answer.smartAppendExclusive(x.getValue(xrlepos), x.getLength(xrlepos)); + + xrlepos++; + if (xrlepos == x.nbrruns) { + while (rlepos < this.nbrruns) { + answer.smartAppendExclusive(getValue(rlepos), getLength(rlepos)); + rlepos++; + } + break; + } + } + } + return answer.toEfficientContainer(); + } + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + for (int k = 0; k < this.nbrruns; ++k) { + int base = (this.getValue(k) & 0xFFFF) | high; + int le = this.getLength(k) & 0xFFFF; + for (int l = base; l <= base + le; ++l) { + ic.accept(l); + } + } + } + + @Override + public BitmapContainer toBitmapContainer() { + int card = this.getCardinality(); + BitmapContainer answer = new BitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = Util.toIntUnsigned(this.getValue(rlepos)); + int end = start + Util.toIntUnsigned(this.getLength(rlepos)) + 1; + Util.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + +} + + +final class RunContainerShortIterator implements PeekableShortIterator { + int pos; + int le = 0; + int maxlength; + int base; + + RunContainer parent; + + RunContainerShortIterator() { + } + + RunContainerShortIterator(RunContainer p) { + wrap(p); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < parent.nbrruns; + } + + @Override + public short next() { + short ans = (short) (base + le); + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public int nextAsInt() { + int ans = base + le; + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(RunContainer p) { + parent = p; + pos = 0; + le = 0; + if (pos < parent.nbrruns) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + + @Override + public void advanceIfNeeded(short minval) { + while (base + maxlength < Util.toIntUnsigned(minval)) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } else { + return; + } + } + if (base > Util.toIntUnsigned(minval)) { + return; + } + le = Util.toIntUnsigned(minval) - base; + } + + + @Override + public short peekNext() { + return (short) (base + le); + } + +} + + +final class ReverseRunContainerShortIterator implements ShortIterator { + int pos; + int le; + RunContainer parent; + int maxlength; + int base; + + + ReverseRunContainerShortIterator() { + } + + ReverseRunContainerShortIterator(RunContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public short next() { + short ans = (short) (base + maxlength - le); + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public int nextAsInt() { + int ans = base + maxlength - le; + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(RunContainer p) { + parent = p; + pos = parent.nbrruns - 1; + le = 0; + if (pos >= 0) { + maxlength = Util.toIntUnsigned(parent.getLength(pos)); + base = Util.toIntUnsigned(parent.getValue(pos)); + } + } + +} + diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ShortIterator.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ShortIterator.java new file mode 100644 index 000000000..2e1591858 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/ShortIterator.java @@ -0,0 +1,39 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * Iterator over short values. + */ +public interface ShortIterator extends Cloneable { + /** + * Creates a copy of the iterator. + * + * @return a clone of the current iterator + */ + ShortIterator clone(); + + /** + * @return whether there is another value + */ + boolean hasNext(); + + + /** + * @return next short value + */ + short next(); + + /** + * @return next short value as int value (using the least significant 16 bits) + */ + int nextAsInt(); + + /** + * If possible, remove the current value + */ + void remove(); + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Util.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Util.java new file mode 100644 index 000000000..fad7f9a62 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/Util.java @@ -0,0 +1,931 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap; + +/** + * Various useful methods for roaring bitmaps. + */ +public final class Util { + + /** + * optimization flag: whether to use hybrid binary search: hybrid formats + * combine a binary search with a sequential search + */ + public static boolean USE_HYBRID_BINSEARCH = true; + + + /** + * Private constructor to prevent instantiation of utility class + */ + private Util() { + } + + /** + * Find the smallest integer larger than pos such that array[pos]>= min. If none can be found, + * return length. Based on code by O. Kaser. + * + * @param array array to search within + * @param pos starting position of the search + * @param length length of the array to search + * @param min minimum value + * @return x greater than pos such that array[pos] is at least as large as min, pos is is equal to + * length if it is not possible. + */ + public static int advanceUntil(short[] array, int pos, int length, short min) { + int lower = pos + 1; + + // special handling for a possibly common sequential case + if (lower >= length || toIntUnsigned(array[lower]) >= toIntUnsigned(min)) { + return lower; + } + + int spansize = 1; // could set larger + // bootstrap an upper limit + + while (lower + spansize < length + && toIntUnsigned(array[lower + spansize]) < toIntUnsigned(min)) { + spansize *= 2; // hoping for compiler will reduce to + } + // shift + int upper = (lower + spansize < length) ? lower + spansize : length - 1; + + // maybe we are lucky (could be common case when the seek ahead + // expected + // to be small and sequential will otherwise make us look bad) + if (array[upper] == min) { + return upper; + } + + if (toIntUnsigned(array[upper]) < toIntUnsigned(min)) {// means + // array + // has no + // item + // >= min + // pos = array.length; + return length; + } + + // we know that the next-smallest span was too small + lower += (spansize / 2); + + // else begin binary search + // invariant: array[lower]min + while (lower + 1 != upper) { + int mid = (lower + upper) / 2; + short arraymid = array[mid]; + if (arraymid == min) { + return mid; + } else if (toIntUnsigned(arraymid) < toIntUnsigned(min)) { + lower = mid; + } else { + upper = mid; + } + } + return upper; + + } + + protected static int branchyUnsignedBinarySearch(final short[] array, final int begin, + final int end, final short k) { + int ikey = toIntUnsigned(k); + // next line accelerates the possibly common case where the value would + // be inserted at the end + if ((end > 0) && (toIntUnsigned(array[end - 1]) < ikey)) { + return -end - 1; + } + int low = begin; + int high = end - 1; + while (low <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = toIntUnsigned(array[middleIndex]); + + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); + } + + /** + * Compares the two specified {@code short} values, treating them as unsigned values between + * {@code 0} and {@code 2^16 - 1} inclusive. + * + * @param a the first unsigned {@code short} to compare + * @param b the second unsigned {@code short} to compare + * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is + * greater than {@code b}; or zero if they are equal + */ + public static int compareUnsigned(short a, short b) { + return toIntUnsigned(a) - toIntUnsigned(b); + } + + /** + * Compute the bitwise AND between two long arrays and write the set bits in the container. + * + * @param container where we write + * @param bitmap1 first bitmap + * @param bitmap2 second bitmap + */ + public static void fillArrayAND(final short[] container, final long[] bitmap1, + final long[] bitmap2) { + int pos = 0; + if (bitmap1.length != bitmap2.length) { + throw new IllegalArgumentException("not supported"); + } + for (int k = 0; k < bitmap1.length; ++k) { + long bitset = bitmap1[k] & bitmap2[k]; + while (bitset != 0) { + long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + + /** + * Compute the bitwise ANDNOT between two long arrays and write the set bits in the container. + * + * @param container where we write + * @param bitmap1 first bitmap + * @param bitmap2 second bitmap + */ + public static void fillArrayANDNOT(final short[] container, final long[] bitmap1, + final long[] bitmap2) { + int pos = 0; + if (bitmap1.length != bitmap2.length) { + throw new IllegalArgumentException("not supported"); + } + for (int k = 0; k < bitmap1.length; ++k) { + long bitset = bitmap1[k] & (~bitmap2[k]); + while (bitset != 0) { + long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + + /** + * Compute the bitwise XOR between two long arrays and write the set bits in the container. + * + * @param container where we write + * @param bitmap1 first bitmap + * @param bitmap2 second bitmap + */ + public static void fillArrayXOR(final short[] container, final long[] bitmap1, + final long[] bitmap2) { + int pos = 0; + if (bitmap1.length != bitmap2.length) { + throw new IllegalArgumentException("not supported"); + } + for (int k = 0; k < bitmap1.length; ++k) { + long bitset = bitmap1[k] ^ bitmap2[k]; + while (bitset != 0) { + long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + + /** + * flip bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void flipBitmapRange(long[] bitmap, int start, int end) { + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + bitmap[firstword] ^= ~(~0L << start); + for (int i = firstword; i < endword; i++) { + bitmap[i] = ~bitmap[i]; + } + bitmap[endword] ^= ~0L >>> -end; + } + + /** + * Hamming weight of the 64-bit words involved in the range + * start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return the hamming weight + */ + public static int cardinalityInBitmapWordRange(long[] bitmap, int start, int end) { + if (start == end) { + return 0; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + int answer = 0; + for (int i = firstword; i <= endword; i++) { + answer += Long.bitCount(bitmap[i]); + } + return answer; + } + + protected static short highbits(int x) { + return (short) (x >>> 16); + } + + protected static short highbits(long x) { + return (short) (x >>> 16); + } + + // starts with binary search and finishes with a sequential search + protected static int hybridUnsignedBinarySearch(final short[] array, final int begin, + final int end, final short k) { + int ikey = toIntUnsigned(k); + // next line accelerates the possibly common case where the value would + // be inserted at the end + if ((end > 0) && (toIntUnsigned(array[end - 1]) < ikey)) { + return -end - 1; + } + int low = begin; + int high = end - 1; + // 32 in the next line matches the size of a cache line + while (low + 32 <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = toIntUnsigned(array[middleIndex]); + + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + // we finish the job with a sequential search + int x = low; + for (; x <= high; ++x) { + final int val = toIntUnsigned(array[x]); + if (val >= ikey) { + if (val == ikey) { + return x; + } + break; + } + } + return -(x + 1); + } + + protected static short lowbits(int x) { + return (short) (x & 0xFFFF); + } + + protected static short lowbits(long x) { + return (short) (x & 0xFFFF); + } + + protected static short maxLowBit() { + return (short) 0xFFFF; + } + + protected static int maxLowBitAsInteger() { + return 0xFFFF; + } + + /** + * clear bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void resetBitmapRange(long[] bitmap, int start, int end) { + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + + if (firstword == endword) { + bitmap[firstword] &= ~((~0L << start) & (~0L >>> -end)); + return; + } + bitmap[firstword] &= ~(~0L << start); + for (int i = firstword + 1; i < endword; i++) { + bitmap[i] = 0; + } + bitmap[endword] &= ~(~0L >>> -end); + + } + + /** + * Given a word w, return the position of the jth true bit. + * + * @param w word + * @param j index + * @return position of jth true bit in w + */ + public static int select(long w, int j) { + int seen = 0; + // Divide 64bit + int part = (int) (w & 0xFFFFFFFF); + int n = Integer.bitCount(part); + if (n <= j) { + part = (int) (w >>> 32); + seen += 32; + j -= n; + } + int ww = part; + + // Divide 32bit + part = ww & 0xFFFF; + + n = Integer.bitCount(part); + if (n <= j) { + + part = ww >>> 16; + seen += 16; + j -= n; + } + ww = part; + + // Divide 16bit + part = ww & 0xFF; + n = Integer.bitCount(part); + if (n <= j) { + part = ww >>> 8; + seen += 8; + j -= n; + } + ww = part; + + // Lookup in final byte + int counter; + for (counter = 0; counter < 8; counter++) { + j -= (ww >>> counter) & 1; + if (j < 0) { + break; + } + } + return seen + counter; + } + + /** + * set bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void setBitmapRange(long[] bitmap, int start, int end) { + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + if (firstword == endword) { + bitmap[firstword] |= (~0L << start) & (~0L >>> -end); + return; + } + bitmap[firstword] |= ~0L << start; + for (int i = firstword + 1; i < endword; i++) { + bitmap[i] = ~0L; + } + bitmap[endword] |= ~0L >>> -end; + } + + /** + * set bits at start, start+1,..., end-1 and report the + * cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int setBitmapRangeAndCardinalityChange(long[] bitmap, int start, int end) { + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + setBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + /** + * flip bits at start, start+1,..., end-1 and report the + * cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int flipBitmapRangeAndCardinalityChange(long[] bitmap, int start, int end) { + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + flipBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + /** + * reset bits at start, start+1,..., end-1 and report the + * cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int resetBitmapRangeAndCardinalityChange(long[] bitmap, int start, int end) { + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + resetBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + protected static int toIntUnsigned(short x) { + return x & 0xFFFF; + } + + /** + * Look for value k in array in the range [begin,end). If the value is found, return its index. If + * not, return -(i+1) where i is the index where the value would be inserted. The array is assumed + * to contain sorted values where shorts are interpreted as unsigned integers. + * + * @param array array where we search + * @param begin first index (inclusive) + * @param end last index (exclusive) + * @param k value we search for + * @return count + */ + public static int unsignedBinarySearch(final short[] array, final int begin, final int end, + final short k) { + if (USE_HYBRID_BINSEARCH) { + return hybridUnsignedBinarySearch(array, begin, end, k); + } else { + return branchyUnsignedBinarySearch(array, begin, end, k); + } + } + + /** + * Compute the difference between two sorted lists and write the result to the provided output + * array + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @param buffer output array + * @return cardinality of the difference + */ + public static int unsignedDifference(final short[] set1, final int length1, final short[] set2, + final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + System.arraycopy(set1, 0, buffer, 0, length1); + return length1; + } + if (0 == length1) { + return 0; + } + short s1 = set1[k1]; + short s2 = set2[k2]; + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + break; + } + s1 = set1[k1]; + } else if (toIntUnsigned(s1) == toIntUnsigned(s2)) { + ++k1; + ++k2; + if (k1 >= length1) { + break; + } + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1[k1]; + s2 = set2[k2]; + } else {// if (val1>val2) + ++k2; + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2[k2]; + } + } + return pos; + } + + /** + * Compute the difference between two sorted lists and write the result to the provided output + * array + * + * @param set1 first array + * @param set2 second array + * @param buffer output array + * @return cardinality of the difference + */ + public static int unsignedDifference(ShortIterator set1, ShortIterator set2, + final short[] buffer) { + int pos = 0; + if (!set2.hasNext()) { + while (set1.hasNext()) { + buffer[pos++] = set1.next(); + } + return pos; + } + if (!set1.hasNext()) { + return 0; + } + short v1 = set1.next(); + short v2 = set2.next(); + while (true) { + if (toIntUnsigned(v1) < toIntUnsigned(v2)) { + buffer[pos++] = v1; + if (!set1.hasNext()) { + return pos; + } + v1 = set1.next(); + } else if (v1 == v2) { + if (!set1.hasNext()) { + break; + } + if (!set2.hasNext()) { + while (set1.hasNext()) { + buffer[pos++] = set1.next(); + } + return pos; + } + v1 = set1.next(); + v2 = set2.next(); + } else {// if (val1>val2) + if (!set2.hasNext()) { + buffer[pos++] = v1; + while (set1.hasNext()) { + buffer[pos++] = set1.next(); + } + return pos; + } + v2 = set2.next(); + } + } + return pos; + } + + /** + * Compute the exclusive union of two sorted lists and write the result to the provided output + * array + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @param buffer output array + * @return cardinality of the exclusive union + */ + public static int unsignedExclusiveUnion2by2(final short[] set1, final int length1, + final short[] set2, final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + System.arraycopy(set1, 0, buffer, 0, length1); + return length1; + } + if (0 == length1) { + System.arraycopy(set2, 0, buffer, 0, length2); + return length2; + } + short s1 = set1[k1]; + short s2 = set2[k2]; + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + System.arraycopy(set2, k2, buffer, pos, length2 - k2); + return pos + length2 - k2; + } + s1 = set1[k1]; + } else if (toIntUnsigned(s1) == toIntUnsigned(s2)) { + ++k1; + ++k2; + if (k1 >= length1) { + System.arraycopy(set2, k2, buffer, pos, length2 - k2); + return pos + length2 - k2; + } + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1[k1]; + s2 = set2[k2]; + } else {// if (val1>val2) + buffer[pos++] = s2; + ++k2; + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2[k2]; + } + } + // return pos; + } + + /** + * Intersect two sorted lists and write the result to the provided output array + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @param buffer output array + * @return cardinality of the intersection + */ + public static int unsignedIntersect2by2(final short[] set1, final int length1, final short[] set2, + final int length2, final short[] buffer) { + if (set1.length * 64 < set2.length) { + return unsignedOneSidedGallopingIntersect2by2(set1, length1, set2, length2, buffer); + } else if (set2.length * 64 < set1.length) { + return unsignedOneSidedGallopingIntersect2by2(set2, length2, set1, length1, buffer); + } else { + return unsignedLocalIntersect2by2(set1, length1, set2, length2, buffer); + } + } + + /** + * Checks if two arrays intersect + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @return true if they intersect + */ + public static boolean unsignedIntersects(short[] set1, int length1, short[] set2, int length2) { + // galloping might be faster, but we do not expect this function to be slow + if ((0 == length1) || (0 == length2)) { + return false; + } + int k1 = 0; + int k2 = 0; + short s1 = set1[k1]; + short s2 = set2[k2]; + mainwhile: + while (true) { + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2[k2]; + } while (toIntUnsigned(s2) < toIntUnsigned(s1)); + } + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1[k1]; + } while (toIntUnsigned(s1) < toIntUnsigned(s2)); + } else { + return true; + } + } + return false; + } + + protected static int unsignedLocalIntersect2by2(final short[] set1, final int length1, + final short[] set2, final int length2, final short[] buffer) { + if ((0 == length1) || (0 == length2)) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + short s1 = set1[k1]; + short s2 = set2[k2]; + + mainwhile: + while (true) { + int v1 = toIntUnsigned(s1); + int v2 = toIntUnsigned(s2); + if (v2 < v1) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2[k2]; + v2 = toIntUnsigned(s2); + } while (v2 < v1); + } + if (v1 < v2) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1[k1]; + v1 = toIntUnsigned(s1); + } while (v1 < v2); + } else { + // (set2[k2] == set1[k1]) + buffer[pos++] = s1; + ++k1; + if (k1 == length1) { + break; + } + ++k2; + if (k2 == length2) { + break; + } + s1 = set1[k1]; + s2 = set2[k2]; + } + } + return pos; + } + + /** + * Compute the cardinality of the intersection + * + * @param set1 first set + * @param length1 how many values to consider in the first set + * @param set2 second set + * @param length2 how many values to consider in the second set + * @return cardinality of the intersection + */ + public static int unsignedLocalIntersect2by2Cardinality(final short[] set1, final int length1, + final short[] set2, final int length2) { + if ((0 == length1) || (0 == length2)) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + short s1 = set1[k1]; + short s2 = set2[k2]; + + mainwhile: + while (true) { + int v1 = toIntUnsigned(s1); + int v2 = toIntUnsigned(s2); + if (v2 < v1) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2[k2]; + v2 = toIntUnsigned(s2); + } while (v2 < v1); + } + if (v1 < v2) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1[k1]; + v1 = toIntUnsigned(s1); + } while (v1 < v2); + } else { + // (set2[k2] == set1[k1]) + pos++; + ++k1; + if (k1 == length1) { + break; + } + ++k2; + if (k2 == length2) { + break; + } + s1 = set1[k1]; + s2 = set2[k2]; + } + } + return pos; + } + + protected static int unsignedOneSidedGallopingIntersect2by2(final short[] smallSet, + final int smallLength, final short[] largeSet, final int largeLength, final short[] buffer) { + if (0 == smallLength) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + short s1 = largeSet[k1]; + short s2 = smallSet[k2]; + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + k1 = advanceUntil(largeSet, k1, largeLength, s2); + if (k1 == largeLength) { + break; + } + s1 = largeSet[k1]; + } + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + ++k2; + if (k2 == smallLength) { + break; + } + s2 = smallSet[k2]; + } else { + // (set2[k2] == set1[k1]) + buffer[pos++] = s2; + ++k2; + if (k2 == smallLength) { + break; + } + s2 = smallSet[k2]; + k1 = advanceUntil(largeSet, k1, largeLength, s2); + if (k1 == largeLength) { + break; + } + s1 = largeSet[k1]; + } + + } + return pos; + + } + + /** + * Unite two sorted lists and write the result to the provided output array + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @param buffer output array + * @return cardinality of the union + */ + public static int unsignedUnion2by2(final short[] set1, final int length1, final short[] set2, + final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + System.arraycopy(set1, 0, buffer, 0, length1); + return length1; + } + if (0 == length1) { + System.arraycopy(set2, 0, buffer, 0, length2); + return length2; + } + short s1 = set1[k1]; + short s2 = set2[k2]; + while (true) { + int v1 = toIntUnsigned(s1); + int v2 = toIntUnsigned(s2); + if (v1 < v2) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + System.arraycopy(set2, k2, buffer, pos, length2 - k2); + return pos + length2 - k2; + } + s1 = set1[k1]; + } else if (v1 == v2) { + buffer[pos++] = s1; + ++k1; + ++k2; + if (k1 >= length1) { + System.arraycopy(set2, k2, buffer, pos, length2 - k2); + return pos + length2 - k2; + } + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1[k1]; + s2 = set2[k2]; + } else {// if (set1[k1]>set2[k2]) + buffer[pos++] = s2; + ++k2; + if (k2 >= length2) { + System.arraycopy(set1, k1, buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2[k2]; + } + } + // return pos; + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferBitSetUtil.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferBitSetUtil.java new file mode 100644 index 000000000..3012c9a65 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferBitSetUtil.java @@ -0,0 +1,113 @@ +package com.fr.third.bitmap.roaringbitmap.buffer; + + +import com.fr.third.bitmap.roaringbitmap.IntIterator; + +import java.nio.LongBuffer; +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.BitSet; + + +/*** + * + * This class provides convenience functions to manipulate BitSet and MutableRoaringBitmap objects. + * + */ +public class BufferBitSetUtil { + // todo: add a method to convert an ImmutableRoaringBitmap to a BitSet using BitSet.valueOf + + // a block consists has a maximum of 1024 words, each representing 64 bits, + // thus representing at maximum 65536 bits + static final private int BLOCK_LENGTH = MappeableBitmapContainer.MAX_CAPACITY / Long.SIZE; // + // 64-bit + // word + + private static MappeableArrayContainer arrayContainerOf(final int from, final int to, + final int cardinality, final long[] words) { + // precondition: cardinality is max 4096 + final short[] content = new short[cardinality]; + int index = 0; + + for (int i = from, socket = 0; i < to; ++i, socket += Long.SIZE) { + long word = words[i]; + while (word != 0) { + long t = word & -word; + content[index++] = (short) (socket + Long.bitCount(t - 1)); + word ^= t; + } + } + return new MappeableArrayContainer(ShortBuffer.wrap(content), cardinality); + } + + /** + * Generate a MutableRoaringBitmap out of a long[], each long using little-endian representation + * of its bits + * + * @param words array of longs (will not be modified) + * @return roaring bitmap + * @see BitSet#toLongArray() for an equivalent + */ + public static MutableRoaringBitmap bitmapOf(final long[] words) { + // split long[] into blocks. + // each block becomes a single container, if any bit is set + final MutableRoaringBitmap ans = new MutableRoaringBitmap(); + int containerIndex = 0; + for (int from = 0; from < words.length; from += BLOCK_LENGTH) { + final int to = Math.min(from + BLOCK_LENGTH, words.length); + final int blockCardinality = cardinality(from, to, words); + if (blockCardinality > 0) { + ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++, + BufferUtil.highbits(from * Long.SIZE), + BufferBitSetUtil.containerOf(from, to, blockCardinality, words)); + } + } + return ans; + } + + private static int cardinality(final int from, final int to, final long[] words) { + int sum = 0; + for (int i = from; i < to; i++) { + sum += Long.bitCount(words[i]); + } + return sum; + } + + + private static MappeableContainer containerOf(final int from, final int to, + final int blockCardinality, final long[] words) { + // find the best container available + if (blockCardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // containers with DEFAULT_MAX_SIZE or less integers should be + // ArrayContainers + return arrayContainerOf(from, to, blockCardinality, words); + } else { + // otherwise use bitmap container + return new MappeableBitmapContainer( + LongBuffer.wrap(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH)), blockCardinality); + } + } + + + /** + * Compares a RoaringBitmap and a BitSet. They are equal if and only if they contain the same set + * of integers. + * + * @param bitset first object to be compared + * @param bitmap second object to be compared + * @return whether they are equal + */ + public static boolean equals(final BitSet bitset, final ImmutableRoaringBitmap bitmap) { + if (bitset.cardinality() != bitmap.getCardinality()) { + return false; + } + final IntIterator it = bitmap.getIntIterator(); + while (it.hasNext()) { + int val = it.next(); + if (!bitset.get(val)) { + return false; + } + } + return true; + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferFastAggregation.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferFastAggregation.java new file mode 100644 index 000000000..78d609675 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferFastAggregation.java @@ -0,0 +1,631 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.PriorityQueue; + + +/** + * Fast algorithms to aggregate many bitmaps. + * + * @author Daniel Lemire + */ +public final class BufferFastAggregation { + + + /** + * Private constructor to prevent instantiation of utility class + */ + private BufferFastAggregation() { + } + + /** + * Compute the AND aggregate. + *

+ * In practice, calls {#link naive_and} + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap and(ImmutableRoaringBitmap... bitmaps) { + return naive_and(bitmaps); + } + + /** + * Compute the AND aggregate. + *

+ * In practice, calls {#link naive_and} + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap and(@SuppressWarnings("rawtypes") Iterator bitmaps) { + return naive_and(bitmaps); + } + + /** + * Compute the AND aggregate. + *

+ * In practice, calls {#link naive_and} + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap and(MutableRoaringBitmap... bitmaps) { + return and(convertToImmutable(bitmaps)); + } + + /** + * Convenience method converting one type of iterator into another, to avoid unnecessary warnings. + * + * @param i input bitmaps + * @return an iterator over the provided iterator, with a different type + */ + public static Iterator convertToImmutable( + final Iterator i) { + return new Iterator() { + + @Override + public boolean hasNext() { + return i.hasNext(); + } + + @Override + public ImmutableRoaringBitmap next() { + return i.next(); + } + + @Override + public void remove() { + } + + ; + + }; + + } + + private static ImmutableRoaringBitmap[] convertToImmutable(MutableRoaringBitmap[] array) { + ImmutableRoaringBitmap[] answer = new ImmutableRoaringBitmap[array.length]; + for (int k = 0; k < answer.length; ++k) { + answer[k] = array[k]; + } + return answer; + } + + /** + * Minimizes memory usage while computing the or aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #or(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap horizontal_or(ImmutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + if (bitmaps.length == 0) { + return answer; + } + PriorityQueue pq = new PriorityQueue(bitmaps.length); + for (int k = 0; k < bitmaps.length; ++k) { + MappeableContainerPointer x = bitmaps[k].highLowContainer.getContainerPointer(); + if (x.getContainer() != null) { + pq.add(x); + } + } + + while (!pq.isEmpty()) { + MappeableContainerPointer x1 = pq.poll(); + if (pq.isEmpty() || (pq.peek().key() != x1.key())) { + answer.getMappeableRoaringArray().append(x1.key(), x1.getContainer().clone()); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + continue; + } + MappeableContainerPointer x2 = pq.poll(); + MappeableContainer newc = x1.getContainer().lazyOR(x2.getContainer()); + while (!pq.isEmpty() && (pq.peek().key() == x1.key())) { + + MappeableContainerPointer x = pq.poll(); + newc = newc.lazyIOR(x.getContainer()); + x.advance(); + if (x.getContainer() != null) { + pq.add(x); + } else if (pq.isEmpty()) { + break; + } + } + newc = newc.repairAfterLazy(); + answer.getMappeableRoaringArray().append(x1.key(), newc); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + x2.advance(); + if (x2.getContainer() != null) { + pq.add(x2); + } + } + return answer; + } + + /** + * Calls naive_or. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + @Deprecated + public static MutableRoaringBitmap horizontal_or(@SuppressWarnings("rawtypes") Iterator bitmaps) { + return naive_or(bitmaps); + } + + /** + * Minimizes memory usage while computing the or aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #or(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap horizontal_or(MutableRoaringBitmap... bitmaps) { + return horizontal_or(convertToImmutable(bitmaps)); + } + + /** + * Minimizes memory usage while computing the xor aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #xor(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap horizontal_xor(ImmutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + if (bitmaps.length == 0) { + return answer; + } + PriorityQueue pq = new PriorityQueue(bitmaps.length); + for (int k = 0; k < bitmaps.length; ++k) { + MappeableContainerPointer x = bitmaps[k].highLowContainer.getContainerPointer(); + if (x.getContainer() != null) { + pq.add(x); + } + } + + while (!pq.isEmpty()) { + MappeableContainerPointer x1 = pq.poll(); + if (pq.isEmpty() || (pq.peek().key() != x1.key())) { + answer.getMappeableRoaringArray().append(x1.key(), x1.getContainer().clone()); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + continue; + } + MappeableContainerPointer x2 = pq.poll(); + MappeableContainer newc = x1.getContainer().xor(x2.getContainer()); + while (!pq.isEmpty() && (pq.peek().key() == x1.key())) { + + MappeableContainerPointer x = pq.poll(); + newc = newc.ixor(x.getContainer()); + x.advance(); + if (x.getContainer() != null) { + pq.add(x); + } else if (pq.isEmpty()) { + break; + } + } + answer.getMappeableRoaringArray().append(x1.key(), newc); + x1.advance(); + if (x1.getContainer() != null) { + pq.add(x1); + } + x2.advance(); + if (x2.getContainer() != null) { + pq.add(x2); + } + } + return answer; + } + + /** + * Minimizes memory usage while computing the xor aggregate on a moderate number of bitmaps. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #xor(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap horizontal_xor(MutableRoaringBitmap... bitmaps) { + return horizontal_xor(convertToImmutable(bitmaps)); + } + + /** + * Compute overall AND between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_and(ImmutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer; + + if (bitmaps.length > 0) { + answer = (bitmaps[0]).toMutableRoaringBitmap(); + for (int k = 1; k < bitmaps.length; ++k) { + answer = ImmutableRoaringBitmap.and(answer, bitmaps[k]); + } + } else { + answer = new MutableRoaringBitmap(); + } + + return answer; + } + + /** + * Compute overall AND between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_and(@SuppressWarnings("rawtypes") Iterator bitmaps) { + if (!bitmaps.hasNext()) { + return new MutableRoaringBitmap(); + } + MutableRoaringBitmap answer = + ((ImmutableRoaringBitmap) bitmaps.next()).toMutableRoaringBitmap(); + while (bitmaps.hasNext()) { + answer.and((ImmutableRoaringBitmap) bitmaps.next()); + } + return answer; + } + + /** + * Compute overall AND between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_and(MutableRoaringBitmap... bitmaps) { + if (bitmaps.length == 0) { + return new MutableRoaringBitmap(); + } + MutableRoaringBitmap answer = bitmaps[0].clone(); + for (int k = 1; k < bitmaps.length; ++k) { + answer.and(bitmaps[k]); + } + return answer; + } + + /** + * Compute overall OR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_or(ImmutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.naivelazyor(bitmaps[k]); + } + answer.repairAfterLazy(); + return answer; + } + + /** + * Compute overall OR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_or(@SuppressWarnings("rawtypes") Iterator bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + while (bitmaps.hasNext()) { + answer.naivelazyor((ImmutableRoaringBitmap) bitmaps.next()); + } + answer.repairAfterLazy(); + return answer; + } + + /** + * Compute overall OR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_or(MutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.lazyor(bitmaps[k]); + } + answer.repairAfterLazy(); + return answer; + } + + /** + * Compute overall XOR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_xor(ImmutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.xor(bitmaps[k]); + } + return answer; + } + + /** + * Compute overall XOR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_xor(@SuppressWarnings("rawtypes") Iterator bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + while (bitmaps.hasNext()) { + answer.xor((ImmutableRoaringBitmap) bitmaps.next()); + } + return answer; + } + + /** + * Compute overall XOR between bitmaps two-by-two. + *

+ * This function runs in linear time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap naive_xor(MutableRoaringBitmap... bitmaps) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + for (int k = 0; k < bitmaps.length; ++k) { + answer.xor(bitmaps[k]); + } + return answer; + } + + /** + * Compute overall OR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(ImmutableRoaringBitmap... bitmaps) { + return naive_or(bitmaps); + } + + /** + * Compute overall OR between bitmaps. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(@SuppressWarnings("rawtypes") Iterator bitmaps) { + return naive_or(bitmaps); + } + + /** + * Compute overall OR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(MutableRoaringBitmap... bitmaps) { + return naive_or(bitmaps); + } + + /** + * Uses a priority queue to compute the or aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_or(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap priorityqueue_or(ImmutableRoaringBitmap... bitmaps) { + if (bitmaps.length == 0) { + return new MutableRoaringBitmap(); + } else if (bitmaps.length == 1) { + return bitmaps[0].toMutableRoaringBitmap(); + } + // we buffer the call to getLongSizeInBytes(), hence the code complexity + final ImmutableRoaringBitmap[] buffer = Arrays.copyOf(bitmaps, bitmaps.length); + final int[] sizes = new int[buffer.length]; + final boolean[] istmp = new boolean[buffer.length]; + for (int k = 0; k < sizes.length; ++k) { + sizes[k] = buffer[k].serializedSizeInBytes(); + } + PriorityQueue pq = new PriorityQueue(128, new Comparator() { + @Override + public int compare(Integer a, Integer b) { + return sizes[a] - sizes[b]; + } + }); + for (int k = 0; k < sizes.length; ++k) { + pq.add(k); + } + while (pq.size() > 1) { + Integer x1 = pq.poll(); + Integer x2 = pq.poll(); + if (istmp[x1] && istmp[x2]) { + buffer[x1] = MutableRoaringBitmap.lazyorfromlazyinputs((MutableRoaringBitmap) buffer[x1], + (MutableRoaringBitmap) buffer[x2]); + sizes[x1] = buffer[x1].serializedSizeInBytes(); + pq.add(x1); + } else if (istmp[x2]) { + ((MutableRoaringBitmap) buffer[x2]).lazyor(buffer[x1]); + sizes[x2] = buffer[x2].serializedSizeInBytes(); + pq.add(x2); + } else if (istmp[x1]) { + ((MutableRoaringBitmap) buffer[x1]).lazyor(buffer[x2]); + sizes[x1] = buffer[x1].serializedSizeInBytes(); + pq.add(x1); + } else { + buffer[x1] = ImmutableRoaringBitmap.lazyor(buffer[x1], buffer[x2]); + sizes[x1] = buffer[x1].serializedSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } + } + MutableRoaringBitmap answer = (MutableRoaringBitmap) buffer[pq.poll()]; + answer.repairAfterLazy(); + return answer; + } + + /** + * Uses a priority queue to compute the or aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_or(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap priorityqueue_or( + @SuppressWarnings("rawtypes") Iterator bitmaps) { + if (!bitmaps.hasNext()) { + return new MutableRoaringBitmap(); + } + // we buffer the call to getLongSizeInBytes(), hence the code complexity + ArrayList buffer = new ArrayList(); + while (bitmaps.hasNext()) { + buffer.add((ImmutableRoaringBitmap) bitmaps.next()); + } + final long[] sizes = new long[buffer.size()]; + final boolean[] istmp = new boolean[buffer.size()]; + for (int k = 0; k < sizes.length; ++k) { + sizes[k] = buffer.get(k).getLongSizeInBytes(); + } + PriorityQueue pq = new PriorityQueue(128, new Comparator() { + @Override + public int compare(Integer a, Integer b) { + return (int) (sizes[a] - sizes[b]); + } + }); + for (int k = 0; k < sizes.length; ++k) { + pq.add(k); + } + if (pq.size() == 1) { + return buffer.get(pq.poll()).toMutableRoaringBitmap(); + } + while (pq.size() > 1) { + Integer x1 = pq.poll(); + Integer x2 = pq.poll(); + if (istmp[x1] && istmp[x2]) { + buffer.set(x1, MutableRoaringBitmap.lazyorfromlazyinputs( + (MutableRoaringBitmap) buffer.get(x1), (MutableRoaringBitmap) buffer.get(x2))); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + pq.add(x1); + } else if (istmp[x2]) { + ((MutableRoaringBitmap) buffer.get(x2)).lazyor(buffer.get(x1)); + sizes[x2] = buffer.get(x2).getLongSizeInBytes(); + pq.add(x2); + } else if (istmp[x1]) { + ((MutableRoaringBitmap) buffer.get(x1)).lazyor(buffer.get(x2)); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + pq.add(x1); + } else { + buffer.set(x1, ImmutableRoaringBitmap.lazyor(buffer.get(x1), buffer.get(x2))); + sizes[x1] = buffer.get(x1).getLongSizeInBytes(); + istmp[x1] = true; + pq.add(x1); + } + } + MutableRoaringBitmap answer = (MutableRoaringBitmap) buffer.get(pq.poll()); + answer.repairAfterLazy(); + return answer; + } + + /** + * Uses a priority queue to compute the xor aggregate. + *

+ * This function runs in linearithmic (O(n log n)) time with respect to the number of bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + * @see #horizontal_xor(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap priorityqueue_xor(ImmutableRoaringBitmap... bitmaps) { + // code could be faster, see priorityqueue_or + if (bitmaps.length < 2) { + throw new IllegalArgumentException("Expecting at least 2 bitmaps"); + } + final PriorityQueue pq = + new PriorityQueue(bitmaps.length, new Comparator() { + @Override + public int compare(ImmutableRoaringBitmap a, ImmutableRoaringBitmap b) { + return (int) (a.getLongSizeInBytes() - b.getLongSizeInBytes()); + } + }); + Collections.addAll(pq, bitmaps); + while (pq.size() > 1) { + final ImmutableRoaringBitmap x1 = pq.poll(); + final ImmutableRoaringBitmap x2 = pq.poll(); + pq.add(ImmutableRoaringBitmap.xor(x1, x2)); + } + return (MutableRoaringBitmap) pq.poll(); + } + + /** + * Compute overall XOR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap xor(ImmutableRoaringBitmap... bitmaps) { + return naive_xor(bitmaps); + } + + /** + * Compute overall XOR between bitmaps. + * + * @param bitmaps input bitmaps (ImmutableRoaringBitmap or MutableRoaringBitmap) + * @return aggregated bitmap + */ + public static MutableRoaringBitmap xor(@SuppressWarnings("rawtypes") Iterator bitmaps) { + return naive_xor(bitmaps); + } + + /** + * Compute overall XOR between bitmaps. + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap xor(MutableRoaringBitmap... bitmaps) { + return naive_xor(bitmaps); + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferIntIteratorFlyweight.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferIntIteratorFlyweight.java new file mode 100644 index 000000000..c193a27d4 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferIntIteratorFlyweight.java @@ -0,0 +1,132 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.PeekableIntIterator; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; + +/** + * Fast iterator minimizing the stress on the garbage collector. You can create one reusable + * instance of this class and then {@link #wrap(ImmutableRoaringBitmap)} + *

+ * For better performance, consider the {@link ImmutableRoaringBitmap#forEach} method. + * + * @author Borislav Ivanov + **/ +public class BufferIntIteratorFlyweight implements PeekableIntIterator { + + private int hs; + + private PeekableShortIterator iter; + + private MappeableArrayContainerShortIterator arrIter = new MappeableArrayContainerShortIterator(); + + private MappeableBitmapContainerShortIterator bitmapIter = + new MappeableBitmapContainerShortIterator(); + + private MappeableRunContainerShortIterator runIter = new MappeableRunContainerShortIterator(); + + + private int pos; + + private ImmutableRoaringBitmap roaringBitmap = null; + + /** + * Creates an instance that is not ready for iteration. You must first call + * {@link #wrap(ImmutableRoaringBitmap)}. + */ + public BufferIntIteratorFlyweight() { + + } + + /** + * Creates an instance that is ready for iteration. + * + * @param r bitmap to be iterated over + */ + public BufferIntIteratorFlyweight(ImmutableRoaringBitmap r) { + wrap(r); + } + + @Override + public PeekableIntIterator clone() { + try { + BufferIntIteratorFlyweight x = (BufferIntIteratorFlyweight) super.clone(); + x.iter = this.iter.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < this.roaringBitmap.highLowContainer.size(); + } + + @Override + public int next() { + int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + if (pos < this.roaringBitmap.highLowContainer.size()) { + + MappeableContainer container = this.roaringBitmap.highLowContainer.getContainerAtIndex(pos); + + if (container instanceof MappeableBitmapContainer) { + bitmapIter.wrap((MappeableBitmapContainer) container); + iter = bitmapIter; + } else if (container instanceof MappeableRunContainer) { + runIter.wrap((MappeableRunContainer) container); + iter = runIter; + } else { + arrIter.wrap((MappeableArrayContainer) container); + iter = arrIter; + } + + hs = BufferUtil.toIntUnsigned(this.roaringBitmap.highLowContainer.getKeyAtIndex(pos)) << 16; + } + } + + /** + * Prepares a bitmap for iteration + * + * @param r bitmap to be iterated over + */ + public void wrap(ImmutableRoaringBitmap r) { + this.hs = 0; + this.pos = 0; + this.roaringBitmap = r; + this.nextContainer(); + } + + @Override + public void advanceIfNeeded(int minval) { + while (hasNext() && ((hs >>> 16) < (minval >>> 16))) { + ++pos; + nextContainer(); + } + if (hasNext() && ((hs >>> 16) == (minval >>> 16))) { + iter.advanceIfNeeded(BufferUtil.lowbits(minval)); + if (!iter.hasNext()) { + ++pos; + nextContainer(); + } + } + } + + @Override + public int peekNext() { + return BufferUtil.toIntUnsigned(iter.peekNext()) | hs; + } + + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferReverseIntIteratorFlyweight.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferReverseIntIteratorFlyweight.java new file mode 100644 index 000000000..677aba388 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferReverseIntIteratorFlyweight.java @@ -0,0 +1,116 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.IntIterator; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; + +/** + * Fast iterator minimizing the stress on the garbage collector. You can create one reusable + * instance of this class and then {@link #wrap(ImmutableRoaringBitmap)} + *

+ * This iterator enumerates the stored values in reverse (starting from the end). + * + * @author Borislav Ivanov + **/ +public class BufferReverseIntIteratorFlyweight implements IntIterator { + + private int hs; + + private ShortIterator iter; + + private ReverseMappeableArrayContainerShortIterator arrIter = + new ReverseMappeableArrayContainerShortIterator(); + + private ReverseMappeableBitmapContainerShortIterator bitmapIter = + new ReverseMappeableBitmapContainerShortIterator(); + + private ReverseMappeableRunContainerShortIterator runIter = + new ReverseMappeableRunContainerShortIterator(); + + private short pos; + + private ImmutableRoaringBitmap roaringBitmap = null; + + + /** + * Creates an instance that is not ready for iteration. You must first call + * {@link #wrap(ImmutableRoaringBitmap)}. + */ + public BufferReverseIntIteratorFlyweight() { + + } + + /** + * Creates an instance that is ready for iteration. + * + * @param r bitmap to be iterated over + */ + public BufferReverseIntIteratorFlyweight(ImmutableRoaringBitmap r) { + wrap(r); + } + + @Override + public IntIterator clone() { + try { + BufferReverseIntIteratorFlyweight x = (BufferReverseIntIteratorFlyweight) super.clone(); + x.iter = this.iter.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + + @Override + public int next() { + final int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + --pos; + nextContainer(); + } + return x; + } + + private void nextContainer() { + + if (pos >= 0) { + + MappeableContainer container = this.roaringBitmap.highLowContainer.getContainerAtIndex(pos); + + if (container instanceof MappeableBitmapContainer) { + bitmapIter.wrap((MappeableBitmapContainer) container); + iter = bitmapIter; + } else if (container instanceof MappeableRunContainer) { + runIter.wrap((MappeableRunContainer) container); + iter = runIter; + } else { + arrIter.wrap((MappeableArrayContainer) container); + iter = arrIter; + } + + hs = BufferUtil.toIntUnsigned(this.roaringBitmap.highLowContainer.getKeyAtIndex(pos)) << 16; + } + } + + /** + * Prepares a bitmap for iteration + * + * @param r bitmap to be iterated over + */ + public void wrap(ImmutableRoaringBitmap r) { + this.roaringBitmap = r; + this.hs = 0; + this.pos = (short) (this.roaringBitmap.highLowContainer.size() - 1); + this.nextContainer(); + } + +} + diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferUtil.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferUtil.java new file mode 100644 index 000000000..7fd96590a --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/BufferUtil.java @@ -0,0 +1,825 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.nio.Buffer; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; + +/** + * Various useful methods for roaring bitmaps. + *

+ * This class is similar to Util but meant to be used with memory mapping. + */ +public final class BufferUtil { + + + /** + * Private constructor to prevent instantiation of utility class + */ + private BufferUtil() { + } + + /** + * Find the smallest integer larger than pos such that array[pos]>= min. If none can be found, + * return length. Based on code by O. Kaser. + * + * @param array container where we search + * @param pos initial position + * @param min minimal threshold + * @param length how big should the array consider to be + * @return x greater than pos such that array[pos] is at least as large as min, pos is is equal to + * length if it is not possible. + */ + protected static int advanceUntil(ShortBuffer array, int pos, int length, short min) { + int lower = pos + 1; + + // special handling for a possibly common sequential case + if (lower >= length || toIntUnsigned(array.get(lower)) >= toIntUnsigned(min)) { + return lower; + } + + int spansize = 1; // could set larger + // bootstrap an upper limit + + while (lower + spansize < length + && toIntUnsigned(array.get(lower + spansize)) < toIntUnsigned(min)) { + spansize *= 2; // hoping for compiler will reduce to + } + // shift + int upper = (lower + spansize < length) ? lower + spansize : length - 1; + + // maybe we are lucky (could be common case when the seek ahead + // expected + // to be small and sequential will otherwise make us look bad) + if (array.get(upper) == min) { + return upper; + } + + if (toIntUnsigned(array.get(upper)) < toIntUnsigned(min)) {// means + // array + // has no + // item + // >= min + // pos = array.length; + return length; + } + + // we know that the next-smallest span was too small + lower += (spansize / 2); + + // else begin binary search + // invariant: array[lower]min + while (lower + 1 != upper) { + int mid = (lower + upper) / 2; + short arraymid = array.get(mid); + if (arraymid == min) { + return mid; + } else if (toIntUnsigned(arraymid) < toIntUnsigned(min)) { + lower = mid; + } else { + upper = mid; + } + } + return upper; + + } + + protected static void arraycopy(ShortBuffer src, int srcPos, ShortBuffer dest, int destPos, + int length) { + if (BufferUtil.isBackedBySimpleArray(src) && BufferUtil.isBackedBySimpleArray(dest)) { + System.arraycopy(src.array(), srcPos, dest.array(), destPos, length); + } else { + if (srcPos < destPos) { + for (int k = length - 1; k >= 0; --k) { + dest.put(destPos + k, src.get(k + srcPos)); + } + } else { + for (int k = 0; k < length; ++k) { + dest.put(destPos + k, src.get(k + srcPos)); + } + } + } + } + + protected static int branchyUnsignedBinarySearch(final ShortBuffer array, final int begin, + final int end, final short k) { + final int ikey = toIntUnsigned(k); + // next line accelerates the possibly common case where the value would be inserted at the end + if ((end > 0) && (toIntUnsigned(array.get(end - 1)) < ikey)) { + return -end - 1; + } + int low = begin; + int high = end - 1; + while (low <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = toIntUnsigned(array.get(middleIndex)); + + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); + } + + /** + * Compares the two specified {@code short} values, treating them as unsigned values between + * {@code 0} and {@code 2^16 - 1} inclusive. + * + * @param a the first unsigned {@code short} to compare + * @param b the second unsigned {@code short} to compare + * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is + * greater than {@code b}; or zero if they are equal + */ + public static int compareUnsigned(short a, short b) { + return toIntUnsigned(a) - toIntUnsigned(b); + } + + protected static void fillArrayAND(short[] container, LongBuffer bitmap1, LongBuffer bitmap2) { + int pos = 0; + if (bitmap1.limit() != bitmap2.limit()) { + throw new IllegalArgumentException("not supported"); + } + if (BufferUtil.isBackedBySimpleArray(bitmap1) && BufferUtil.isBackedBySimpleArray(bitmap2)) { + int len = bitmap1.limit(); + long[] b1 = bitmap1.array(); + long[] b2 = bitmap2.array(); + for (int k = 0; k < len; ++k) { + long bitset = b1[k] & b2[k]; + while (bitset != 0) { + final long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } else { + int len = bitmap1.limit(); + for (int k = 0; k < len; ++k) { + long bitset = bitmap1.get(k) & bitmap2.get(k); + while (bitset != 0) { + final long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + } + + protected static void fillArrayANDNOT(short[] container, LongBuffer bitmap1, LongBuffer bitmap2) { + int pos = 0; + if (bitmap1.limit() != bitmap2.limit()) { + throw new IllegalArgumentException("not supported"); + } + if (BufferUtil.isBackedBySimpleArray(bitmap1) && BufferUtil.isBackedBySimpleArray(bitmap2)) { + int len = bitmap1.limit(); + long[] b1 = bitmap1.array(); + long[] b2 = bitmap2.array(); + for (int k = 0; k < len; ++k) { + long bitset = b1[k] & (~b2[k]); + while (bitset != 0) { + final long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } else { + int len = bitmap1.limit(); + for (int k = 0; k < len; ++k) { + long bitset = bitmap1.get(k) & (~bitmap2.get(k)); + while (bitset != 0) { + final long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + } + + protected static void fillArrayXOR(short[] container, LongBuffer bitmap1, LongBuffer bitmap2) { + int pos = 0; + if (bitmap1.limit() != bitmap2.limit()) { + throw new IllegalArgumentException("not supported"); + } + if (BufferUtil.isBackedBySimpleArray(bitmap1) && BufferUtil.isBackedBySimpleArray(bitmap2)) { + Util.fillArrayXOR(container, bitmap1.array(), bitmap2.array()); + } else { + int len = bitmap1.limit(); + for (int k = 0; k < len; ++k) { + long bitset = bitmap1.get(k) ^ bitmap2.get(k); + while (bitset != 0) { + final long t = bitset & -bitset; + container[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + bitset ^= t; + } + } + } + } + + /** + * flip bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void flipBitmapRange(LongBuffer bitmap, int start, int end) { + if (isBackedBySimpleArray(bitmap)) { + Util.flipBitmapRange(bitmap.array(), start, end); + return; + } + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + bitmap.put(firstword, bitmap.get(firstword) ^ ~(~0L << start)); + for (int i = firstword; i < endword; i++) { + bitmap.put(i, ~bitmap.get(i)); + } + bitmap.put(endword, bitmap.get(endword) ^ (~0L >>> -end)); + } + + /** + * Hamming weight of the 64-bit words involved in the range start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + private static int cardinalityInBitmapWordRange(LongBuffer bitmap, int start, int end) { + if (isBackedBySimpleArray(bitmap)) { + return Util.cardinalityInBitmapWordRange(bitmap.array(), start, end); + } + if (start == end) { + return 0; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + int answer = 0; + for (int i = firstword; i <= endword; i++) { + answer += Long.bitCount(bitmap.get(i)); + } + return answer; + } + + /** + * set bits at start, start+1,..., end-1 and report the cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int setBitmapRangeAndCardinalityChange(LongBuffer bitmap, int start, int end) { + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + return Util.setBitmapRangeAndCardinalityChange(bitmap.array(), start, end); + } + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + setBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + /** + * flip bits at start, start+1,..., end-1 and report the cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int flipBitmapRangeAndCardinalityChange(LongBuffer bitmap, int start, int end) { + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + return Util.flipBitmapRangeAndCardinalityChange(bitmap.array(), start, end); + } + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + flipBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + /** + * reset bits at start, start+1,..., end-1 and report the cardinality change + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + * @return cardinality change + */ + public static int resetBitmapRangeAndCardinalityChange(LongBuffer bitmap, int start, int end) { + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + return Util.resetBitmapRangeAndCardinalityChange(bitmap.array(), start, end); + } + int cardbefore = cardinalityInBitmapWordRange(bitmap, start, end); + resetBitmapRange(bitmap, start, end); + int cardafter = cardinalityInBitmapWordRange(bitmap, start, end); + return cardafter - cardbefore; + } + + /** + * From the cardinality of a container, compute the corresponding size in bytes of the container. + * Additional information is required if the container is run encoded. + * + * @param card the cardinality if this is not run encoded, otherwise ignored + * @param numRuns number of runs if run encoded, othewise ignored + * @param isRunEncoded boolean + * @return the size in bytes + */ + protected static int getSizeInBytesFromCardinalityEtc(int card, int numRuns, + boolean isRunEncoded) { + if (isRunEncoded) { + return 2 + numRuns * 2 * 2; // each run uses 2 shorts, plus the initial short giving num runs + } + boolean isBitmap = card > MappeableArrayContainer.DEFAULT_MAX_SIZE; + if (isBitmap) { + return MappeableBitmapContainer.MAX_CAPACITY / 8; + } else { + return card * 2; + } + + } + + protected static short highbits(int x) { + return (short) (x >>> 16); + } + + protected static short highbits(long x) { + return (short) (x >>> 16); + } + + /** + * Checks whether the Buffer is backed by a simple array. In java, a Buffer is an abstraction that + * can represent various data, from data on disk all the way to native Java arrays. Like all + * abstractions, a Buffer might carry a performance penalty. Thus, we sometimes check whether the + * Buffer is simply a wrapper around a Java array. In these instances, it might be best, from a + * performance point of view, to access the underlying array (using the array()) method. + * + * @param b the provided Buffer + * @return whether the Buffer is backed by a simple array + */ + protected static boolean isBackedBySimpleArray(Buffer b) { + return b.hasArray() && (b.arrayOffset() == 0); + } + + protected static short lowbits(int x) { + return (short) (x & 0xFFFF); + } + + protected static short lowbits(long x) { + return (short) (x & 0xFFFF); + } + + protected static short maxLowBit() { + return (short) 0xFFFF; + } + + protected static int maxLowBitAsInteger() { + return 0xFFFF; + } + + /** + * clear bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void resetBitmapRange(LongBuffer bitmap, int start, int end) { + if (isBackedBySimpleArray(bitmap)) { + Util.resetBitmapRange(bitmap.array(), start, end); + return; + } + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + if (firstword == endword) { + bitmap.put(firstword, bitmap.get(firstword) & ~((~0L << start) & (~0L >>> -end))); + return; + } + bitmap.put(firstword, bitmap.get(firstword) & (~(~0L << start))); + for (int i = firstword + 1; i < endword; i++) { + bitmap.put(i, 0L); + } + bitmap.put(endword, bitmap.get(endword) & (~(~0L >>> -end))); + } + + /** + * set bits at start, start+1,..., end-1 + * + * @param bitmap array of words to be modified + * @param start first index to be modified (inclusive) + * @param end last index to be modified (exclusive) + */ + public static void setBitmapRange(LongBuffer bitmap, int start, int end) { + if (isBackedBySimpleArray(bitmap)) { + Util.setBitmapRange(bitmap.array(), start, end); + return; + } + if (start == end) { + return; + } + int firstword = start / 64; + int endword = (end - 1) / 64; + if (firstword == endword) { + bitmap.put(firstword, bitmap.get(firstword) | ((~0L << start) & (~0L >>> -end))); + + return; + } + bitmap.put(firstword, bitmap.get(firstword) | (~0L << start)); + for (int i = firstword + 1; i < endword; i++) { + bitmap.put(i, ~0L); + } + bitmap.put(endword, bitmap.get(endword) | (~0L >>> -end)); + } + + protected static int toIntUnsigned(short x) { + return x & 0xFFFF; + } + + /** + * Look for value k in buffer in the range [begin,end). If the value is found, return its index. + * If not, return -(i+1) where i is the index where the value would be inserted. The buffer is + * assumed to contain sorted values where shorts are interpreted as unsigned integers. + * + * @param array buffer where we search + * @param begin first index (inclusive) + * @param end last index (exclusive) + * @param k value we search for + * @return count + */ + public static int unsignedBinarySearch(final ShortBuffer array, final int begin, final int end, + final short k) { + return branchyUnsignedBinarySearch(array, begin, end, k); + } + + protected static int unsignedDifference(final ShortBuffer set1, final int length1, + final ShortBuffer set2, final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + set1.get(buffer, 0, length1); + return length1; + } + if (0 == length1) { + return 0; + } + short s1 = set1.get(k1); + short s2 = set2.get(k2); + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + break; + } + s1 = set1.get(k1); + } else if (toIntUnsigned(s1) == toIntUnsigned(s2)) { + ++k1; + ++k2; + if (k1 >= length1) { + break; + } + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1.get(k1); + s2 = set2.get(k2); + } else {// if (val1>val2) + ++k2; + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2.get(k2); + } + } + return pos; + } + + protected static int unsignedExclusiveUnion2by2(final ShortBuffer set1, final int length1, + final ShortBuffer set2, final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + set1.get(buffer, 0, length1); + return length1; + } + if (0 == length1) { + set2.get(buffer, 0, length2); + return length2; + } + short s1 = set1.get(k1); + short s2 = set2.get(k2); + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + set2.position(k2); + set2.get(buffer, pos, length2 - k2); + return pos + length2 - k2; + } + s1 = set1.get(k1); + } else if (toIntUnsigned(s1) == toIntUnsigned(s2)) { + ++k1; + ++k2; + if (k1 >= length1) { + set2.position(k2); + set2.get(buffer, pos, length2 - k2); + return pos + length2 - k2; + } + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1.get(k1); + s2 = set2.get(k2); + } else {// if (val1>val2) + buffer[pos++] = s2; + ++k2; + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2.get(k2); + } + } + // return pos; + } + + protected static int unsignedIntersect2by2(final ShortBuffer set1, final int length1, + final ShortBuffer set2, final int length2, final short[] buffer) { + if (length1 * 64 < length2) { + return unsignedOneSidedGallopingIntersect2by2(set1, length1, set2, length2, buffer); + } else if (length2 * 64 < length1) { + return unsignedOneSidedGallopingIntersect2by2(set2, length2, set1, length1, buffer); + } else { + return unsignedLocalIntersect2by2(set1, length1, set2, length2, buffer); + } + } + + /** + * Checks if two arrays intersect + * + * @param set1 first array + * @param length1 length of first array + * @param set2 second array + * @param length2 length of second array + * @return true if they intersect + */ + public static boolean unsignedIntersects(ShortBuffer set1, int length1, ShortBuffer set2, + int length2) { + if ((0 == length1) || (0 == length2)) { + return false; + } + int k1 = 0; + int k2 = 0; + + // could be more efficient with galloping + short s1 = set1.get(k1); + short s2 = set2.get(k2); + + mainwhile: + while (true) { + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2.get(k2); + } while (toIntUnsigned(s2) < toIntUnsigned(s1)); + } + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1.get(k1); + } while (toIntUnsigned(s1) < toIntUnsigned(s2)); + } else { + return true; + } + } + return false; + } + + protected static int unsignedLocalIntersect2by2(final ShortBuffer set1, final int length1, + final ShortBuffer set2, final int length2, final short[] buffer) { + if ((0 == length1) || (0 == length2)) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + short s1 = set1.get(k1); + short s2 = set2.get(k2); + + mainwhile: + while (true) { + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2.get(k2); + + } while (toIntUnsigned(s2) < toIntUnsigned(s1)); + } + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1.get(k1); + + } while (toIntUnsigned(s1) < toIntUnsigned(s2)); + } else { + // (set2.get(k2) == set1.get(k1)) + buffer[pos++] = s1; + ++k1; + if (k1 == length1) { + break; + } + s1 = set1.get(k1); + ++k2; + if (k2 == length2) { + break; + } + s2 = set2.get(k2); + + } + } + return pos; + } + + protected static int unsignedLocalIntersect2by2Cardinality(final ShortBuffer set1, + final int length1, final ShortBuffer set2, final int length2) { + if ((0 == length1) || (0 == length2)) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + short s1 = set1.get(k1); + short s2 = set2.get(k2); + + mainwhile: + while (true) { + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + do { + ++k2; + if (k2 == length2) { + break mainwhile; + } + s2 = set2.get(k2); + + } while (toIntUnsigned(s2) < toIntUnsigned(s1)); + } + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + do { + ++k1; + if (k1 == length1) { + break mainwhile; + } + s1 = set1.get(k1); + + } while (toIntUnsigned(s1) < toIntUnsigned(s2)); + } else { + ++pos; + ++k1; + if (k1 == length1) { + break; + } + s1 = set1.get(k1); + ++k2; + if (k2 == length2) { + break; + } + s2 = set2.get(k2); + + } + } + return pos; + } + + protected static int unsignedOneSidedGallopingIntersect2by2(final ShortBuffer smallSet, + final int smallLength, final ShortBuffer largeSet, final int largeLength, + final short[] buffer) { + if (0 == smallLength) { + return 0; + } + int k1 = 0; + int k2 = 0; + int pos = 0; + + short s1 = largeSet.get(k1); + short s2 = smallSet.get(k2); + while (true) { + if (toIntUnsigned(s1) < toIntUnsigned(s2)) { + k1 = advanceUntil(largeSet, k1, largeLength, s2); + if (k1 == largeLength) { + break; + } + s1 = largeSet.get(k1); + } + if (toIntUnsigned(s2) < toIntUnsigned(s1)) { + ++k2; + if (k2 == smallLength) { + break; + } + s2 = smallSet.get(k2); + } else { + // (set2.get(k2) == set1.get(k1)) + buffer[pos++] = s2; + ++k2; + if (k2 == smallLength) { + break; + } + s2 = smallSet.get(k2); + k1 = advanceUntil(largeSet, k1, largeLength, s2); + if (k1 == largeLength) { + break; + } + s1 = largeSet.get(k1); + } + + } + return pos; + + } + + protected static int unsignedUnion2by2(final ShortBuffer set1, final int length1, + final ShortBuffer set2, final int length2, final short[] buffer) { + int pos = 0; + int k1 = 0, k2 = 0; + if (0 == length2) { + set1.get(buffer, 0, length1); + return length1; + } + if (0 == length1) { + set2.get(buffer, 0, length2); + return length2; + } + short s1 = set1.get(k1); + short s2 = set2.get(k2); + while (true) { + int v1 = toIntUnsigned(s1); + int v2 = toIntUnsigned(s2); + if (v1 < v2) { + buffer[pos++] = s1; + ++k1; + if (k1 >= length1) { + set2.position(k2); + set2.get(buffer, pos, length2 - k2); + return pos + length2 - k2; + } + s1 = set1.get(k1); + } else if (v1 == v2) { + buffer[pos++] = s1; + ++k1; + ++k2; + if (k1 >= length1) { + set2.position(k2); + set2.get(buffer, pos, length2 - k2); + return pos + length2 - k2; + } + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s1 = set1.get(k1); + s2 = set2.get(k2); + } else {// if (set1.get(k1)>set2.get(k2)) + buffer[pos++] = s2; + ++k2; + if (k2 >= length2) { + set1.position(k1); + set1.get(buffer, pos, length1 - k1); + return pos + length1 - k1; + } + s2 = set2.get(k2); + } + } + // return pos; + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringArray.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringArray.java new file mode 100644 index 000000000..6cf43dee3 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringArray.java @@ -0,0 +1,463 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + + +/** + * This is the underlying data structure for an ImmutableRoaringBitmap. This class is not meant for + * end-users. + */ +public final class ImmutableRoaringArray implements PointableRoaringArray { + + protected static final short SERIAL_COOKIE = MutableRoaringArray.SERIAL_COOKIE; + protected static final short SERIAL_COOKIE_NO_RUNCONTAINER = + MutableRoaringArray.SERIAL_COOKIE_NO_RUNCONTAINER; + private final static int startofrunbitmap = 4; // if there is a runcontainer bitmap + + ByteBuffer buffer; + int size; + + /** + * Create an array based on a previously serialized ByteBuffer. The input ByteBuffer is + * effectively copied (with the slice operation) so you should expect the provided ByteBuffer to + * remain unchanged. + * + * @param bbf The source ByteBuffer + */ + protected ImmutableRoaringArray(ByteBuffer bbf) { + buffer = bbf.slice(); + buffer.order(ByteOrder.LITTLE_ENDIAN); + final int cookie = buffer.getInt(0); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + throw new RuntimeException("I failed to find one of the right cookies. " + cookie); + } + boolean hasRunContainers = (cookie & 0xFFFF) == SERIAL_COOKIE; + this.size = hasRunContainers ? (cookie >>> 16) + 1 : buffer.getInt(4); + int theLimit = size > 0 ? computeSerializedSizeInBytes() : headerSize(hasRunContainers); + buffer.limit(theLimit); + } + + @Override + public int advanceUntil(short x, int pos) { + int lower = pos + 1; + + // special handling for a possibly common sequential case + if (lower >= size || getKey(lower) >= BufferUtil.toIntUnsigned(x)) { + return lower; + } + + int spansize = 1; // could set larger + // bootstrap an upper limit + + while (lower + spansize < size && getKey(lower + spansize) < BufferUtil.toIntUnsigned(x)) { + spansize *= 2; // hoping for compiler will reduce to shift + } + int upper = (lower + spansize < size) ? lower + spansize : size - 1; + + // maybe we are lucky (could be common case when the seek ahead + // expected to be small and sequential will otherwise make us look bad) + if (getKey(upper) == BufferUtil.toIntUnsigned(x)) { + return upper; + } + + if (getKey(upper) < BufferUtil.toIntUnsigned(x)) {// means array has no item key >= x + return size; + } + + // we know that the next-smallest span was too small + lower += (spansize / 2); + + // else begin binary search + // invariant: array[lower]x + while (lower + 1 != upper) { + int mid = (lower + upper) / 2; + if (getKey(mid) == BufferUtil.toIntUnsigned(x)) { + return mid; + } else if (getKey(mid) < BufferUtil.toIntUnsigned(x)) { + lower = mid; + } else { + upper = mid; + } + } + return upper; + } + + private int branchyUnsignedBinarySearch(final short k) { + int low = 0; + int high = this.size - 1; + final int ikey = BufferUtil.toIntUnsigned(k); + while (low <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = getKey(middleIndex); + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); + } + + + @Override + public ImmutableRoaringArray clone() { + ImmutableRoaringArray sa; + try { + sa = (ImmutableRoaringArray) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// should never happen + } + return sa; + } + + + private int computeSerializedSizeInBytes() { + if (this.size == 0) { + return headerSize(hasRunCompression()); + } + int CardinalityOfLastContainer = getCardinality(this.size - 1); + int PositionOfLastContainer = getOffsetContainer(this.size - 1); + int SizeOfLastContainer; + boolean hasrun = hasRunCompression(); + if (isRunContainer(this.size - 1, hasrun)) { + int nbrruns = BufferUtil.toIntUnsigned(buffer.getShort(PositionOfLastContainer)); + SizeOfLastContainer = BufferUtil.getSizeInBytesFromCardinalityEtc(0, nbrruns, true); + } else { + SizeOfLastContainer = + BufferUtil.getSizeInBytesFromCardinalityEtc(CardinalityOfLastContainer, 0, false); + } + return SizeOfLastContainer + PositionOfLastContainer; + } + + @Override + public int getCardinality(int k) { + if ((k < 0) || (k >= this.size)) { + throw new IllegalArgumentException( + "out of range container index: " + k + " (report as a bug)"); + } + return BufferUtil.toIntUnsigned(buffer.getShort(this.getStartOfKeys() + 4 * k + 2)) + 1; + } + + + // involves a binary search + @Override + public MappeableContainer getContainer(short x) { + final int i = unsignedBinarySearch(x); + if (i < 0) { + return null; + } + return getContainerAtIndex(i); + } + + @Override + public MappeableContainer getContainerAtIndex(int i) { + int cardinality = getCardinality(i); + final boolean isBitmap = cardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE; // if not a + // runcontainer + ByteBuffer tmp = buffer.duplicate();// sad but ByteBuffer is not thread-safe so it is either a + // duplicate or a lock + // note that tmp will indeed be garbage-collected some time after the end of this function + tmp.order(buffer.order()); + tmp.position(getOffsetContainer(i)); + boolean hasrun = hasRunCompression(); + if (isRunContainer(i, hasrun)) { + // first, we have a short giving the number of runs + int nbrruns = BufferUtil.toIntUnsigned(tmp.getShort()); + final ShortBuffer shortArray = tmp.asShortBuffer(); + shortArray.limit(2 * nbrruns); + return new MappeableRunContainer(shortArray, nbrruns); + } + if (isBitmap) { + final LongBuffer bitmapArray = tmp.asLongBuffer(); + bitmapArray.limit(MappeableBitmapContainer.MAX_CAPACITY / 64); + return new MappeableBitmapContainer(bitmapArray, cardinality); + } else { + final ShortBuffer shortArray = tmp.asShortBuffer(); + shortArray.limit(cardinality); + return new MappeableArrayContainer(shortArray, cardinality); + } + } + + + @Override + public MappeableContainerPointer getContainerPointer() { + return getContainerPointer(0); + } + + @Override + public MappeableContainerPointer getContainerPointer(final int startIndex) { + final boolean hasrun = isEmpty() ? false : hasRunCompression(); + return new MappeableContainerPointer() { + int k = startIndex; + + @Override + public void advance() { + ++k; + } + + + @Override + public MappeableContainerPointer clone() { + try { + return (MappeableContainerPointer) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public int compareTo(MappeableContainerPointer o) { + if (key() != o.key()) { + return BufferUtil.toIntUnsigned(key()) - BufferUtil.toIntUnsigned(o.key()); + } + return o.getCardinality() - this.getCardinality(); + } + + @Override + public int getCardinality() { + return ImmutableRoaringArray.this.getCardinality(k); + } + + @Override + public MappeableContainer getContainer() { + if (k >= ImmutableRoaringArray.this.size) { + return null; + } + return ImmutableRoaringArray.this.getContainerAtIndex(k); + } + + + @Override + public int getSizeInBytes() { + // might be a tad expensive + if (ImmutableRoaringArray.this.isRunContainer(k, hasrun)) { + int pos = getOffsetContainer(k); + int nbrruns = BufferUtil.toIntUnsigned(buffer.getShort(pos)); + return BufferUtil.getSizeInBytesFromCardinalityEtc(0, nbrruns, true); + } else { + int CardinalityOfLastContainer = getCardinality(); + return BufferUtil.getSizeInBytesFromCardinalityEtc(CardinalityOfLastContainer, 0, false); + } + } + + @Override + public boolean hasContainer() { + return 0 <= k & k < ImmutableRoaringArray.this.size; + } + + @Override + public boolean isBitmapContainer() { + if (ImmutableRoaringArray.this.isRunContainer(k, hasrun)) { + return false; + } + return getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE; + } + + @Override + public boolean isRunContainer() { + return ImmutableRoaringArray.this.isRunContainer(k, hasrun); + } + + @Override + public short key() { + return ImmutableRoaringArray.this.getKeyAtIndex(k); + + } + + + @Override + public void previous() { + --k; + } + }; + } + + // involves a binary search + @Override + public int getIndex(short x) { + return unsignedBinarySearch(x); + } + + private int getKey(int k) { + return BufferUtil.toIntUnsigned(buffer.getShort(getStartOfKeys() + 4 * k)); + } + + @Override + public short getKeyAtIndex(int i) { + return buffer.getShort(4 * i + getStartOfKeys()); + } + + private int getOffsetContainer(int k) { + if ((k < 0) || (k >= this.size)) { + throw new IllegalArgumentException( + "out of range container index: " + k + " (report as a bug)"); + } + if (hasRunCompression()) { // account for size of runcontainer bitmap + if (this.size < MutableRoaringArray.NO_OFFSET_THRESHOLD) { + // we do it the hard way + return getOffsetContainerSlow(k); + } + return buffer.getInt(4 + 4 * this.size + ((this.size + 7) / 8) + 4 * k); + } else { + return buffer.getInt(4 + 4 + 4 * this.size + 4 * k); + } + } + + + private int getOffsetContainerSlow(int k) { + boolean hasrun = hasRunCompression(); + int pos = this.headerSize(hasrun); + for (int z = 0; z < k; ++z) { + if (isRunContainer(z, hasrun)) { + int nbrruns = BufferUtil.toIntUnsigned(buffer.getShort(pos)); + int SizeOfLastContainer = BufferUtil.getSizeInBytesFromCardinalityEtc(0, nbrruns, true); + pos += SizeOfLastContainer; + } else { + int CardinalityOfLastContainer = this.getCardinality(z); + int SizeOfLastContainer = + BufferUtil.getSizeInBytesFromCardinalityEtc(CardinalityOfLastContainer, 0, false); + pos += SizeOfLastContainer; + } + } + return pos; + } + + private int getStartOfKeys() { + if (hasRunCompression()) { // info is in the buffer + return 4 + ((this.size + 7) / 8); + } else { + return 8; + } + } + + @Override + public int hashCode() { + MappeableContainerPointer cp = this.getContainerPointer(); + int hashvalue = 0; + while (cp.hasContainer()) { + int th = cp.key() * 0xF0F0F0 + cp.getContainer().hashCode(); + hashvalue = 31 * hashvalue + th; + cp.advance(); + } + return hashvalue; + } + + @Override + public boolean hasRunCompression() { + return (buffer.getInt(0) & 0xFFFF) == SERIAL_COOKIE; + } + + // hasrun should be equal to hasRunCompression() + protected int headerSize(boolean hasrun) { + if (hasrun) { + if (size < MutableRoaringArray.NO_OFFSET_THRESHOLD) {// for small bitmaps, we omit the offsets + return 4 + (size + 7) / 8 + 4 * size; + } + return 4 + (size + 7) / 8 + 8 * size;// - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * size; + } + } + + // starts with binary search and finishes with a sequential search + /*private int hybridUnsignedBinarySearch(final short k) { + int low = 0; + int high = this.size - 1; + final int ikey = BufferUtil.toIntUnsigned(k); + // 32 in the next line matches the size of a cache line + while (low + 16 <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = getKey(middleIndex); + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + // we finish the job with a sequential search + int x = low; + for (; x <= high; ++x) { + final int val = getKey(x); + if (val >= ikey) { + if (val == ikey) { + return x; + } + break; + } + } + return -(x + 1); + }*/ + + /** + * Returns true if this bitmap is empty. + * + * @return true if empty + */ + public boolean isEmpty() { + return this.size == 0; + } + + // hasrun should be initialized with hasRunCompression() + private boolean isRunContainer(int i, boolean hasrun) { + if (hasrun) { // info is in the buffer + int j = buffer.get(startofrunbitmap + i / 8); + int mask = 1 << (i % 8); + return (j & mask) != 0; + } else { + return false; + } + } + + /** + * Serialize. + *

+ * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + @Override + public void serialize(DataOutput out) throws IOException { + if (buffer.hasArray()) { + out.write(buffer.array(), buffer.arrayOffset(), buffer.limit()); + } else { + ByteBuffer tmp = buffer.duplicate(); + tmp.position(0); + WritableByteChannel channel = Channels.newChannel((OutputStream) out); + channel.write(tmp); + } + } + + /** + * @return the size that the data structure occupies on disk + */ + @Override + public int serializedSizeInBytes() { + return buffer.limit(); + } + + @Override + public int size() { + return this.size; + } + + private int unsignedBinarySearch(short k) { + return branchyUnsignedBinarySearch(k); + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringBitmap.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringBitmap.java new file mode 100644 index 000000000..d631a142d --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/ImmutableRoaringBitmap.java @@ -0,0 +1,1320 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.ImmutableBitmapDataProvider; +import com.fr.third.bitmap.roaringbitmap.IntConsumer; +import com.fr.third.bitmap.roaringbitmap.IntIterator; +import com.fr.third.bitmap.roaringbitmap.PeekableIntIterator; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; +import com.fr.third.bitmap.roaringbitmap.RoaringBitmap; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataOutput; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * ImmutableRoaringBitmap provides a compressed immutable (cannot be modified) bitmap. It is meant + * to be used with MutableRoaringBitmap, a derived class that adds methods + * to modify the bitmap. + *

+ *

+ * {@code
+ *       import com.fr.swift.bitmap.roaringbitmap.buffer.*;
+ *
+ *       //...
+ *
+ *       MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(1, 2, 3, 1000);
+ *       MutableRoaringBitmap rr2 = MutableRoaringBitmap.bitmapOf( 2, 3, 1010);
+ *       ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ *       DataOutputStream dos = new DataOutputStream(bos);
+ *       // could call "rr1.runOptimize()" and "rr2.runOptimize" if there
+ *       // there were runs to compress
+ *       rr1.serialize(dos);
+ *       rr2.serialize(dos);
+ *       dos.close();
+ *       ByteBuffer bb = ByteBuffer.wrap(bos.toByteArray());
+ *       ImmutableRoaringBitmap rrback1 = new ImmutableRoaringBitmap(bb);
+ *       bb.position(bb.position() + rrback1.serializedSizeInBytes());
+ *       ImmutableRoaringBitmap rrback2 = new ImmutableRoaringBitmap(bb);
+ * }
+ * 
+ *

+ * It can also be constructed from a ByteBuffer (useful for memory mapping). + *

+ * Objects of this class may reside almost entirely in memory-map files. + * + * @see MutableRoaringBitmap + */ +public class ImmutableRoaringBitmap + implements Iterable, Cloneable, ImmutableBitmapDataProvider { + + PointableRoaringArray highLowContainer = null; + + + protected ImmutableRoaringBitmap() { + + } + + /** + * Constructs a new ImmutableRoaringBitmap starting at this ByteBuffer's position(). Only + * meta-data is loaded to RAM. The rest is mapped to the ByteBuffer. + *

+ * It is not necessary that limit() on the input ByteBuffer indicates the end of the serialized + * data. + *

+ * After creating this ImmutableRoaringBitmap, you can advance to the rest of the data (if there + * is more) by setting b.position(b.position() + bitmap.serializedSizeInBytes()); + *

+ * Note that the input ByteBuffer is effectively copied (with the slice operation) so you should + * expect the provided ByteBuffer to remain unchanged. + * + * @param b data source + */ + public ImmutableRoaringBitmap(final ByteBuffer b) { + highLowContainer = new ImmutableRoaringArray(b); + } + + /** + * Computes AND between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static MutableRoaringBitmap and(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + MutableRoaringBitmap.rangeSanityCheck(rangeStart, rangeEnd); + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return BufferFastAggregation.and(bitmapsIterator); + } + + /** + * Computes AND between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + * @deprecated use the version where longs specify the range. Negative range end are illegal. + */ + @Deprecated + public static MutableRoaringBitmap and(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return and(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Bitwise AND (intersection) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see BufferFastAggregation#and(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap and(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.and(c2); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return answer; + } + + /** + * Cardinality of Bitwise AND (intersection) operation. The provided bitmaps are *not* modified. + * This operation is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return as if you did and(x2,x2).getCardinality() + * @see BufferFastAggregation#and(ImmutableRoaringBitmap...) + */ + public static int andCardinality(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + int answer = 0; + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + answer += c1.andCardinality(c2); + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return answer; + } + + /** + * Bitwise ANDNOT (difference) operation for the given range, rangeStart (inclusive) and rangeEnd + * (exclusive). The provided bitmaps are *not* modified. This operation is thread-safe as long as + * the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @param rangeStart beginning of the range (inclusive) + * @param rangeEnd end of range (exclusive) + * @return result of the operation + */ + public static MutableRoaringBitmap andNot(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2, long rangeStart, long rangeEnd) { + MutableRoaringBitmap.rangeSanityCheck(rangeStart, rangeEnd); + MutableRoaringBitmap rb1 = selectRangeWithoutCopy(x1, rangeStart, rangeEnd); + MutableRoaringBitmap rb2 = selectRangeWithoutCopy(x2, rangeStart, rangeEnd); + return andNot(rb1, rb2); + } + + /** + * Bitwise ANDNOT (difference) operation for the given range, rangeStart (inclusive) and rangeEnd + * (exclusive). The provided bitmaps are *not* modified. This operation is thread-safe as long as + * the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @param rangeStart beginning of the range (inclusive) + * @param rangeEnd end of range (exclusive) + * @return result of the operation + * @deprecated use the version where longs specify the range. Negative values for range + * endpoints are not allowed. + */ + @Deprecated + public static MutableRoaringBitmap andNot(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2, + final int rangeStart, final int rangeEnd) { + return andNot(x1, x2, (long) rangeStart, (long) rangeEnd); + } + + /** + * Bitwise ANDNOT (difference) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static MutableRoaringBitmap andNot(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.andNot(c2); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + final int nextPos1 = x1.highLowContainer.advanceUntil(s2, pos1); + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, nextPos1); + pos1 = nextPos1; + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos2 == length2) { + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + /** + * Generate a bitmap with the specified values set to true. The provided integers values don't + * have to be in sorted order, but it may be preferable to sort them from a performance point of + * view. + *

+ * This function is equivalent to : + *

+ *

+     * {@code
+     *       (ImmutableRoaringBitmap) MutableRoaringBitmap.bitmapOf(data)
+     * }
+     * 
+ * + * @param data set values + * @return a new bitmap + */ + public static ImmutableRoaringBitmap bitmapOf(final int... data) { + return MutableRoaringBitmap.bitmapOf(data); + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param bm bitmap being negated + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return a new Bitmap + */ + public static MutableRoaringBitmap flip(ImmutableRoaringBitmap bm, final long rangeStart, + final long rangeEnd) { + MutableRoaringBitmap.rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + throw new RuntimeException("Invalid range " + rangeStart + " -- " + rangeEnd); + } + + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + final short hbStart = BufferUtil.highbits(rangeStart); + final short lbStart = BufferUtil.lowbits(rangeStart); + final short hbLast = BufferUtil.highbits(rangeEnd - 1); + final short lbLast = BufferUtil.lowbits(rangeEnd - 1); + + // copy the containers before the active area + answer.getMappeableRoaringArray().appendCopiesUntil(bm.highLowContainer, hbStart); + + final int max = BufferUtil.toIntUnsigned(BufferUtil.maxLowBit()); + for (short hb = hbStart; hb <= hbLast; ++hb) { + final int containerStart = (hb == hbStart) ? BufferUtil.toIntUnsigned(lbStart) : 0; + final int containerLast = (hb == hbLast) ? BufferUtil.toIntUnsigned(lbLast) : max; + + final int i = bm.highLowContainer.getIndex(hb); + final int j = answer.getMappeableRoaringArray().getIndex(hb); + assert j < 0; + + if (i >= 0) { + final MappeableContainer c = + bm.highLowContainer.getContainerAtIndex(i).not(containerStart, containerLast + 1); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().insertNewKeyValueAt(-j - 1, hb, c); + } + + } else { // *think* the range of ones must never be + // empty. + answer.getMappeableRoaringArray().insertNewKeyValueAt(-j - 1, hb, + MappeableContainer.rangeOfOnes(containerStart, containerLast + 1)); + } + } + // copy the containers after the active area. + answer.getMappeableRoaringArray().appendCopiesAfter(bm.highLowContainer, hbLast); + + return answer; + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param bm bitmap being negated + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return a new Bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static MutableRoaringBitmap flip(ImmutableRoaringBitmap bm, + final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return flip(bm, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return flip(bm, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Return new iterator with only values from rangeStart (inclusive) to rangeEnd (exclusive) + * + * @param input bitmaps iterator + * @param rangeStart inclusive + * @param rangeEnd exclusive + * @return new iterator of bitmaps + */ + private static Iterator selectRangeWithoutCopy(final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + Iterator bitmapsIterator; + bitmapsIterator = new Iterator() { + @Override + public boolean hasNext() { + return bitmaps.hasNext(); + } + + @Override + public ImmutableRoaringBitmap next() { + ImmutableRoaringBitmap next = (ImmutableRoaringBitmap) bitmaps.next(); + return selectRangeWithoutCopy(next, rangeStart, rangeEnd); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Remove not supported"); + } + }; + return bitmapsIterator; + } + + /** + * Extracts the values in the specified range, rangeStart (inclusive) and rangeEnd (exclusive) + * while avoiding copies as much as possible. + * + * @param rb input bitmap + * @param rangeStart inclusive + * @param rangeEnd exclusive + * @return new bitmap + */ + + private static MutableRoaringBitmap selectRangeWithoutCopy(ImmutableRoaringBitmap rb, + final long rangeStart, final long rangeEnd) { + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + + if (rangeEnd <= rangeStart) { + return answer; + } + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + if (i >= 0) { + final MappeableContainer c = rb.highLowContainer.getContainerAtIndex(i).remove(0, lbStart) + .iremove(lbLast + 1, BufferUtil.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + } + } + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + if (ifirst >= 0) { + final MappeableContainer c = + rb.highLowContainer.getContainerAtIndex(ifirst).remove(0, lbStart); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + } + } + + for (int hb = hbStart + 1; hb <= hbLast - 1; ++hb) { + final int i = rb.highLowContainer.getIndex((short) hb); + final int j = answer.getMappeableRoaringArray().getIndex((short) hb); + assert j < 0; + + if (i >= 0) { + final MappeableContainer c = rb.highLowContainer.getContainerAtIndex(i); + answer.getMappeableRoaringArray().insertNewKeyValueAt(-j - 1, (short) hb, c); + } + } + + if (ilast >= 0) { + final MappeableContainer c = rb.highLowContainer.getContainerAtIndex(ilast).remove(lbLast + 1, + BufferUtil.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbLast, c); + } + } + return answer; + + } + + /** + * Checks whether the two bitmaps intersect. This can be much faster than calling "and" and + * checking the cardinality of the result. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return true if they intersect + */ + public static boolean intersects(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + if (c1.intersects(c2)) { + return true; + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return false; + } + + // important: inputs should not be reused + protected static MutableRoaringBitmap lazyor(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + MappeableContainerPointer i1 = x1.highLowContainer.getContainerPointer(); + MappeableContainerPointer i2 = x2.highLowContainer.getContainerPointer(); + main: + if (i1.hasContainer() && i2.hasContainer()) { + while (true) { + if (i1.key() == i2.key()) { + answer.getMappeableRoaringArray().append(i1.key(), + i1.getContainer().lazyOR(i2.getContainer())); + i1.advance(); + i2.advance(); + if (!i1.hasContainer() || !i2.hasContainer()) { + break main; + } + } else if (Util.compareUnsigned(i1.key(), i2.key()) < 0) { // i1.key() < i2.key() + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + if (!i1.hasContainer()) { + break main; + } + } else { // i1.key() > i2.key() + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + if (!i2.hasContainer()) { + break main; + } + } + } + } + if (!i1.hasContainer()) { + while (i2.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + } + } else if (!i2.hasContainer()) { + while (i1.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + } + } + return answer; + } + + /** + * Compute overall OR between bitmaps. + *

+ * (Effectively calls {@link BufferFastAggregation#or}) + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(ImmutableRoaringBitmap... bitmaps) { + return BufferFastAggregation.or(bitmaps); + } + + /** + * Bitwise OR (union) operation. The provided bitmaps are *not* modified. This operation is + * thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see BufferFastAggregation#or(ImmutableRoaringBitmap...) + * @see BufferFastAggregation#horizontal_or(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap or(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + MappeableContainerPointer i1 = x1.highLowContainer.getContainerPointer(); + MappeableContainerPointer i2 = x2.highLowContainer.getContainerPointer(); + main: + if (i1.hasContainer() && i2.hasContainer()) { + while (true) { + if (i1.key() == i2.key()) { + answer.getMappeableRoaringArray().append(i1.key(), + i1.getContainer().or(i2.getContainer())); + i1.advance(); + i2.advance(); + if (!i1.hasContainer() || !i2.hasContainer()) { + break main; + } + } else if (Util.compareUnsigned(i1.key(), i2.key()) < 0) { // i1.key() < i2.key() + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + if (!i1.hasContainer()) { + break main; + } + } else { // i1.key() > i2.key() + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + if (!i2.hasContainer()) { + break main; + } + } + } + } + if (!i1.hasContainer()) { + while (i2.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + } + } else if (!i2.hasContainer()) { + while (i1.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + } + } + return answer; + } + + /** + * Compute overall OR between bitmaps. + *

+ * (Effectively calls {@link BufferFastAggregation#or}) + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(@SuppressWarnings("rawtypes") Iterator bitmaps) { + return BufferFastAggregation.or(bitmaps); + } + + /** + * Computes OR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static MutableRoaringBitmap or(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + MutableRoaringBitmap.rangeSanityCheck(rangeStart, rangeEnd); + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return or(bitmapsIterator); + } + + /** + * Computes OR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + * @deprecated use the version where longs specify the range. + * Negative range points are forbidden. + */ + @Deprecated + public static MutableRoaringBitmap or(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return or(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Cardinality of the bitwise OR (union) operation. The provided bitmaps are *not* modified. This + * operation is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return cardinality of the union + * @see BufferFastAggregation#or(ImmutableRoaringBitmap...) + * @see BufferFastAggregation#horizontal_or(ImmutableRoaringBitmap...) + */ + public static int orCardinality(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + // we use the fact that the cardinality of the bitmaps is known so that + // the union is just the total cardinality minus the intersection + return x1.getCardinality() + x2.getCardinality() - andCardinality(x1, x2); + } + + /** + * Computes XOR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + */ + public static MutableRoaringBitmap xor(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final long rangeStart, final long rangeEnd) { + Iterator bitmapsIterator; + bitmapsIterator = selectRangeWithoutCopy(bitmaps, rangeStart, rangeEnd); + return BufferFastAggregation.xor(bitmapsIterator); + } + + /** + * Computes XOR between input bitmaps in the given range, from rangeStart (inclusive) to rangeEnd + * (exclusive) + * + * @param bitmaps input bitmaps, these are not modified + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new result bitmap + * @deprecated use the version where longs specify the range. + * Negative values not allowed for rangeStart and rangeEnd + */ + @Deprecated + public static MutableRoaringBitmap xor(@SuppressWarnings("rawtypes") final Iterator bitmaps, + final int rangeStart, final int rangeEnd) { + return xor(bitmaps, (long) rangeStart, (long) rangeEnd); + } + + /** + * Bitwise XOR (symmetric difference) operation. The provided bitmaps are *not* modified. This + * operation is thread-safe as long as the provided bitmaps remain unchanged. + *

+ * If you have more than 2 bitmaps, consider using the FastAggregation class. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + * @see BufferFastAggregation#xor(ImmutableRoaringBitmap...) + * @see BufferFastAggregation#horizontal_xor(ImmutableRoaringBitmap...) + */ + public static MutableRoaringBitmap xor(final ImmutableRoaringBitmap x1, + final ImmutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + MappeableContainerPointer i1 = x1.highLowContainer.getContainerPointer(); + MappeableContainerPointer i2 = x2.highLowContainer.getContainerPointer(); + main: + if (i1.hasContainer() && i2.hasContainer()) { + while (true) { + if (i1.key() == i2.key()) { + final MappeableContainer c = i1.getContainer().xor(i2.getContainer()); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(i1.key(), c); + } + i1.advance(); + i2.advance(); + if (!i1.hasContainer() || !i2.hasContainer()) { + break main; + } + } else if (Util.compareUnsigned(i1.key(), i2.key()) < 0) { // i1.key() < i2.key() + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + if (!i1.hasContainer()) { + break main; + } + } else { // i1.key() < i2.key() + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + if (!i2.hasContainer()) { + break main; + } + } + } + } + if (!i1.hasContainer()) { + while (i2.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i2.key(), i2.getContainer()); + i2.advance(); + } + } else if (!i2.hasContainer()) { + while (i1.hasContainer()) { + answer.getMappeableRoaringArray().appendCopy(i1.key(), i1.getContainer()); + i1.advance(); + } + } + + return answer; + } + + @Override + public ImmutableRoaringBitmap clone() { + try { + final ImmutableRoaringBitmap x = (ImmutableRoaringBitmap) super.clone(); + x.highLowContainer = highLowContainer.clone(); + return x; + } catch (final CloneNotSupportedException e) { + throw new RuntimeException("shouldn't happen with clone", e); + } + } + + /** + * Checks whether the value in included, which is equivalent to checking if the corresponding bit + * is set (get in BitSet class). + * + * @param x integer value + * @return whether the integer value is included. + */ + @Override + public boolean contains(final int x) { + final short hb = BufferUtil.highbits(x); + final MappeableContainer c = highLowContainer.getContainer(hb); + return c != null && c.contains(BufferUtil.lowbits(x)); + } + + @Override + public boolean equals(Object o) { + if (o instanceof ImmutableRoaringBitmap) { + if (this.highLowContainer.size() != ((ImmutableRoaringBitmap) o).highLowContainer.size()) { + return false; + } + MappeableContainerPointer mp1 = this.highLowContainer.getContainerPointer(); + MappeableContainerPointer mp2 = + ((ImmutableRoaringBitmap) o).highLowContainer.getContainerPointer(); + while (mp1.hasContainer()) { + if (mp1.key() != mp2.key()) { + return false; + } + if (mp1.getCardinality() != mp2.getCardinality()) { + return false; + } + if (!mp1.getContainer().equals(mp2.getContainer())) { + return false; + } + mp1.advance(); + mp2.advance(); + } + return true; + } + return false; + } + + /** + * Returns the number of distinct integers added to the bitmap (e.g., number of bits set). + * + * @return the cardinality + */ + @Override + public long getLongCardinality() { + long size = 0; + for (int i = 0; i < this.highLowContainer.size(); ++i) { + size += this.highLowContainer.getCardinality(i); + } + return size; + } + + @Override + public int getCardinality() { + return (int) getLongCardinality(); + } + + @Override + public void forEach(IntConsumer ic) { + for (int i = 0; i < this.highLowContainer.size(); i++) { + highLowContainer.getContainerAtIndex(i).forEach(highLowContainer.getKeyAtIndex(i), ic); + } + } + + /** + * Return a low-level container pointer that can be used to access the underlying data structure. + * + * @return container pointer + */ + public MappeableContainerPointer getContainerPointer() { + return this.highLowContainer.getContainerPointer(); + } + + /** + * For better performance, consider the Use the {@link #forEach forEach} method. + * + * @return a custom iterator over set bits, the bits are traversed in ascending sorted order + */ + @Override + public PeekableIntIterator getIntIterator() { + return new ImmutableRoaringIntIterator(); + } + + /** + * @return a custom iterator over set bits, the bits are traversed in descending sorted order + */ + @Override + public IntIterator getReverseIntIterator() { + return new ImmutableRoaringReverseIntIterator(); + } + + /** + * Estimate of the memory usage of this data structure. This can be expected to be within 1% of + * the true memory usage. If exact measures are needed, we recommend using dedicated libraries + * such as SizeOf. + *

+ * When the bitmap is constructed from a ByteBuffer from a memory-mapped file, this estimate is + * invalid: we can expect the actual memory usage to be significantly (e.g., 10x) less. + * + * @return estimated memory usage. + */ + @Override + public long getLongSizeInBytes() { + long size = 4; + for (int i = 0; i < this.highLowContainer.size(); ++i) { + if (this.highLowContainer.getContainerAtIndex(i) instanceof MappeableRunContainer) { + MappeableRunContainer thisRunContainer = + (MappeableRunContainer) this.highLowContainer.getContainerAtIndex(i); + size += 4 + BufferUtil.getSizeInBytesFromCardinalityEtc(0, thisRunContainer.nbrruns, true); + } else { + size += 4 + BufferUtil + .getSizeInBytesFromCardinalityEtc(this.highLowContainer.getCardinality(i), 0, false); + } + } + return size; + } + + @Override + public int getSizeInBytes() { + return (int) getLongSizeInBytes(); + } + + @Override + public int hashCode() { + return highLowContainer.hashCode(); + } + + /** + * Check whether this bitmap has had its runs compressed. + * + * @return whether this bitmap has run compression + */ + public boolean hasRunCompression() { + return this.highLowContainer.hasRunCompression(); + } + + /** + * Checks whether the bitmap is empty. + * + * @return true if this bitmap contains no set bit + */ + @Override + public boolean isEmpty() { + return highLowContainer.size() == 0; + } + + /** + * iterate over the positions of the true values. + * + * @return the iterator + */ + @Override + public Iterator iterator() { + return new Iterator() { + int hs = 0; + + ShortIterator iter; + + short pos = 0; + + int x; + + @Override + public boolean hasNext() { + return pos < ImmutableRoaringBitmap.this.highLowContainer.size(); + } + + public Iterator init() { + if (pos < ImmutableRoaringBitmap.this.highLowContainer.size()) { + iter = ImmutableRoaringBitmap.this.highLowContainer.getContainerAtIndex(pos) + .getShortIterator(); + hs = BufferUtil + .toIntUnsigned(ImmutableRoaringBitmap.this.highLowContainer.getKeyAtIndex(pos)) << 16; + } + return this; + } + + @Override + public Integer next() { + x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + ++pos; + init(); + } + return x; + } + + @Override + public void remove() { + throw new RuntimeException("Cannot modify."); + } + + }.init(); + } + + /** + * Create a new Roaring bitmap containing at most maxcardinality integers. + * + * @param maxcardinality maximal cardinality + * @return a new bitmap with cardinality no more than maxcardinality + */ + @Override + public MutableRoaringBitmap limit(int maxcardinality) { + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int currentcardinality = 0; + for (int i = 0; (currentcardinality < maxcardinality) + && (i < this.highLowContainer.size()); i++) { + MappeableContainer c = this.highLowContainer.getContainerAtIndex(i); + if (c.getCardinality() + currentcardinality <= maxcardinality) { + ((MutableRoaringArray) answer.highLowContainer) + .append(this.highLowContainer.getKeyAtIndex(i), c.clone()); + currentcardinality += c.getCardinality(); + } else { + int leftover = maxcardinality - currentcardinality; + MappeableContainer limited = c.limit(leftover); + ((MutableRoaringArray) answer.highLowContainer) + .append(this.highLowContainer.getKeyAtIndex(i), limited); + break; + } + } + return answer; + } + + /** + * Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be + * GetCardinality()). + * + * @param x upper limit + * @return the rank + */ + @Override + public long rankLong(int x) { + long size = 0; + short xhigh = BufferUtil.highbits(x); + for (int i = 0; i < this.highLowContainer.size(); i++) { + short key = this.highLowContainer.getKeyAtIndex(i); + if (Util.compareUnsigned(key, xhigh) < 0) { + size += this.highLowContainer.getCardinality(i); + } else { + return size + this.highLowContainer.getContainerAtIndex(i).rank(BufferUtil.lowbits(x)); + } + } + return size; + } + + @Override + public int rank(int x) { + return (int) rankLong(x); + } + + /** + * Return the jth value stored in this bitmap. + * + * @param j index of the value + * @return the value + */ + @Override + public int select(int j) { + int leftover = j; + for (int i = 0; i < this.highLowContainer.size(); i++) { + int thiscard = this.highLowContainer.getCardinality(i); + if (thiscard > leftover) { + int keycontrib = this.highLowContainer.getKeyAtIndex(i) << 16; + MappeableContainer c = this.highLowContainer.getContainerAtIndex(i); + int lowcontrib = BufferUtil.toIntUnsigned(c.select(leftover)); + return lowcontrib + keycontrib; + } + leftover -= thiscard; + } + throw new IllegalArgumentException( + "select " + j + " when the cardinality is " + this.getCardinality()); + } + + /** + * Serialize this bitmap. + *

+ * Consider calling {@link MutableRoaringBitmap#runOptimize} before serialization to improve + * compression if this is a MutableRoaringBitmap instance. + *

+ * The current bitmap is not modified. + *

+ * Advanced example: To serialize your bitmap to a ByteBuffer, you can do the following. + *

+ *

+     * {
+     *   @code
+     *   // r is your bitmap
+     *
+     *   // r.runOptimize(); // might improve compression, only if you have a
+     *   // MutableRoaringBitmap instance.
+     *   // next we create the ByteBuffer where the data will be stored
+     *   ByteBuffer outbb = ByteBuffer.allocate(r.serializedSizeInBytes());
+     *   // then we can serialize on a custom OutputStream
+     *   mrb.serialize(new DataOutputStream(new OutputStream() {
+     *     ByteBuffer mBB;
+     *
+     *     OutputStream init(ByteBuffer mbb) {
+     *       mBB = mbb;
+     *       return this;
+     *     }
+     *
+     *     public void close() {}
+     *
+     *     public void flush() {}
+     *
+     *     public void write(int b) {
+     *       mBB.put((byte) b);
+     *     }
+     *
+     *     public void write(byte[] b) {
+     *       mBB.put(b);
+     *     }
+     *
+     *     public void write(byte[] b, int off, int l) {
+     *       mBB.put(b, off, l);
+     *     }
+     *   }.init(outbb)));
+     *   // outbuff will now contain a serialized version of your bitmap
+     * }
+     * 
+ *

+ * Note: Java's data structures are in big endian format. Roaring serializes to a little endian + * format, so the bytes are flipped by the library during serialization to ensure that what is + * stored is in little endian---despite Java's big endianness. You can defeat this process by + * reflipping the bytes again in a custom DataOutput which could lead to serialized Roaring + * objects with an incorrect byte order. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + @Override + public void serialize(DataOutput out) throws IOException { + this.highLowContainer.serialize(out); + } + + /** + * Report the number of bytes required for serialization. This count will match the bytes written + * when calling the serialize method. + * + * @return the size in bytes + */ + @Override + public int serializedSizeInBytes() { + return this.highLowContainer.serializedSizeInBytes(); + } + + /** + * Return the set values as an array if the cardinality is less + * than 2147483648. The integer values are in sorted order. + * + * @return array representing the set values. + */ + @Override + public int[] toArray() { + final int[] array = new int[(int) this.getCardinality()]; + int pos = 0, pos2 = 0; + while (pos < this.highLowContainer.size()) { + final int hs = BufferUtil.toIntUnsigned(this.highLowContainer.getKeyAtIndex(pos)) << 16; + final MappeableContainer c = this.highLowContainer.getContainerAtIndex(pos++); + c.fillLeastSignificant16bits(array, pos2, hs); + pos2 += c.getCardinality(); + } + return array; + } + + /** + * Copies the content of this bitmap to a bitmap that can be modified. + * + * @return a mutable bitmap. + */ + public MutableRoaringBitmap toMutableRoaringBitmap() { + MutableRoaringBitmap c = new MutableRoaringBitmap(); + MappeableContainerPointer mcp = highLowContainer.getContainerPointer(); + while (mcp.hasContainer()) { + c.getMappeableRoaringArray().appendCopy(mcp.key(), mcp.getContainer()); + mcp.advance(); + } + return c; + } + + /** + * Copies this bitmap to a mutable RoaringBitmap. + * + * @return a copy of this bitmap as a RoaringBitmap. + */ + public RoaringBitmap toRoaringBitmap() { + return new RoaringBitmap(this); + } + + /** + * A string describing the bitmap. + * + * @return the string + */ + @Override + public String toString() { + final StringBuilder answer = new StringBuilder(); + final IntIterator i = this.getIntIterator(); + answer.append("{"); + if (i.hasNext()) { + answer.append(i.next() & 0xFFFFFFFFL); + } + while (i.hasNext()) { + answer.append(","); + // to avoid using too much memory, we limit the size + if (answer.length() > 0x80000) { + answer.append("..."); + break; + } + answer.append(i.next() & 0xFFFFFFFFL); + } + answer.append("}"); + return answer.toString(); + } + + private final class ImmutableRoaringIntIterator implements PeekableIntIterator { + private MappeableContainerPointer cp = + ImmutableRoaringBitmap.this.highLowContainer.getContainerPointer(); + + private int hs = 0; + + private PeekableShortIterator iter; + + private boolean ok; + + public ImmutableRoaringIntIterator() { + nextContainer(); + } + + @Override + public PeekableIntIterator clone() { + try { + ImmutableRoaringIntIterator x = (ImmutableRoaringIntIterator) super.clone(); + x.iter = this.iter.clone(); + x.cp = this.cp.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return ok; + } + + @Override + public int next() { + int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + cp.advance(); + nextContainer(); + } + return x; + } + + + private void nextContainer() { + ok = cp.hasContainer(); + if (ok) { + iter = cp.getContainer().getShortIterator(); + hs = BufferUtil.toIntUnsigned(cp.key()) << 16; + } + } + + @Override + public void advanceIfNeeded(int minval) { + while (hasNext() && ((hs >>> 16) < (minval >>> 16))) { + cp.advance(); + nextContainer(); + } + if (ok && ((hs >>> 16) == (minval >>> 16))) { + iter.advanceIfNeeded(BufferUtil.lowbits(minval)); + if (!iter.hasNext()) { + cp.advance(); + nextContainer(); + } + } + } + + @Override + public int peekNext() { + return BufferUtil.toIntUnsigned(iter.peekNext()) | hs; + } + + + } + + private final class ImmutableRoaringReverseIntIterator implements IntIterator { + private MappeableContainerPointer cp = ImmutableRoaringBitmap.this.highLowContainer + .getContainerPointer(ImmutableRoaringBitmap.this.highLowContainer.size() - 1); + + private int hs = 0; + + private ShortIterator iter; + + private boolean ok; + + public ImmutableRoaringReverseIntIterator() { + nextContainer(); + } + + @Override + public IntIterator clone() { + try { + ImmutableRoaringReverseIntIterator x = (ImmutableRoaringReverseIntIterator) super.clone(); + x.iter = this.iter.clone(); + x.cp = this.cp.clone(); + return x; + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return ok; + } + + @Override + public int next() { + int x = iter.nextAsInt() | hs; + if (!iter.hasNext()) { + cp.previous(); + nextContainer(); + } + return x; + } + + + private void nextContainer() { + ok = cp.hasContainer(); + if (ok) { + iter = cp.getContainer().getReverseShortIterator(); + hs = BufferUtil.toIntUnsigned(cp.key()) << 16; + } + } + + + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableArrayContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableArrayContainer.java new file mode 100644 index 000000000..5a68357b1 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableArrayContainer.java @@ -0,0 +1,1688 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.ArrayContainer; +import com.fr.third.bitmap.roaringbitmap.Container; +import com.fr.third.bitmap.roaringbitmap.IntConsumer; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.Iterator; + +/** + * Simple container made of an array of 16-bit integers. Unlike ArrayContainer, + * this class uses a ShortBuffer to store data. + */ +public final class MappeableArrayContainer extends MappeableContainer implements Cloneable { + protected static final int DEFAULT_MAX_SIZE = 4096; // containers with DEFAULT_MAX_SZE or less + private static final int DEFAULT_INIT_SIZE = 4; + private static final int ARRAY_LAZY_LOWERBOUND = 1024; + // integers should be ArrayContainers + private static final long serialVersionUID = 1L; + protected int cardinality = 0; + protected ShortBuffer content; + + /** + * Create an array container with default capacity + */ + public MappeableArrayContainer() { + this(DEFAULT_INIT_SIZE); + } + + + /** + * Creates a new container from a non-mappeable one. This copies the data. + * + * @param bc the original container + */ + public MappeableArrayContainer(ArrayContainer bc) { + this.cardinality = bc.getCardinality(); + this.content = bc.toShortBuffer(); + } + + + /** + * Create an array container with specified capacity + * + * @param capacity The capacity of the container + */ + public MappeableArrayContainer(final int capacity) { + content = ShortBuffer.allocate(capacity); + } + + /** + * Create an array container with a run of ones from firstOfRun to lastOfRun, exclusive. Caller is + * responsible for making sure the range is small enough that ArrayContainer is appropriate. + * + * @param firstOfRun first index + * @param lastOfRun last index (range is exclusive) + */ + public MappeableArrayContainer(final int firstOfRun, final int lastOfRun) { + // TODO: this can be optimized for performance + final int valuesInRange = lastOfRun - firstOfRun; + content = ShortBuffer.allocate(valuesInRange); + short[] sarray = content.array(); + for (int i = 0; i < valuesInRange; ++i) { + sarray[i] = (short) (firstOfRun + i); + } + cardinality = valuesInRange; + } + + + private MappeableArrayContainer(int newCard, ShortBuffer newContent) { + this.cardinality = newCard; + ShortBuffer tmp = newContent.duplicate();// for thread-safety + this.content = ShortBuffer.allocate(Math.max(newCard, tmp.limit())); + tmp.rewind(); + this.content.put(tmp); + } + + /** + * Construct a new ArrayContainer backed by the provided ShortBuffer. Note that if you modify the + * ArrayContainer a new ShortBuffer may be produced. + * + * @param array ShortBuffer where the data is stored + * @param cardinality cardinality (number of values stored) + */ + public MappeableArrayContainer(final ShortBuffer array, final int cardinality) { + if (array.limit() != cardinality) { + throw new RuntimeException("Mismatch between buffer and cardinality"); + } + this.cardinality = cardinality; + this.content = array; + } + + protected static int getArraySizeInBytes(int cardinality) { + return cardinality * 2; + } + + protected static int serializedSizeInBytes(int cardinality) { + return cardinality * 2 + 2; + } + + @Override + public MappeableContainer add(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = end - begin; + int newcardinality = indexstart + (cardinality - indexend) + rangelength; + if (newcardinality > DEFAULT_MAX_SIZE) { + MappeableBitmapContainer a = this.toBitmapContainer(); + return a.iadd(begin, end); + } + MappeableArrayContainer answer = new MappeableArrayContainer(newcardinality, content); + if (!BufferUtil.isBackedBySimpleArray(answer.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + BufferUtil.arraycopy(content, indexend, answer.content, indexstart + rangelength, + cardinality - indexend); + short[] answerarray = answer.content.array(); + for (int k = 0; k < rangelength; ++k) { + answerarray[k + indexstart] = (short) (begin + k); + } + answer.cardinality = newcardinality; + return answer; + } + + /** + * running time is in O(n) time if insert is not in order. + */ + @Override + // not thread-safe + public MappeableContainer add(final short x) { + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] sarray = content.array(); + + int loc = Util.unsignedBinarySearch(sarray, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality exceeds DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + final MappeableBitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= sarray.length) { + increaseCapacity(); + sarray = content.array(); + } + // insertion : shift the elements > x by one + // position to + // the right + // and put x in it's appropriate place + System.arraycopy(sarray, -loc - 1, sarray, -loc, cardinality + loc + 1); + sarray[-loc - 1] = x; + ++cardinality; + } + } else { + + final int loc = BufferUtil.unsignedBinarySearch(content, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality exceeds DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + final MappeableBitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= this.content.limit()) { + increaseCapacity(); + } + // insertion : shift the elements > x by one + // position to + // the right + // and put x in it's appropriate place + for (int k = cardinality; k > -loc - 1; --k) { + content.put(k, content.get(k - 1)); + } + content.put(-loc - 1, x); + + ++cardinality; + } + } + return this; + } + + private int advance(ShortIterator it) { + if (it.hasNext()) { + return BufferUtil.toIntUnsigned(it.next()); + } else { + return -1; + } + } + + + @Override + public MappeableArrayContainer and(final MappeableArrayContainer value2) { + + MappeableArrayContainer value1 = this; + final int desiredCapacity = Math.min(value1.getCardinality(), value2.getCardinality()); + MappeableArrayContainer answer = new MappeableArrayContainer(desiredCapacity); + if (BufferUtil.isBackedBySimpleArray(this.content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + answer.cardinality = Util.unsignedIntersect2by2(value1.content.array(), + value1.getCardinality(), value2.content.array(), value2.getCardinality(), + answer.content.array()); + } else { + answer.cardinality = BufferUtil.unsignedIntersect2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content.array()); + } + return answer; + } + + @Override + public MappeableContainer and(MappeableBitmapContainer x) { + return x.and(this); + } + + @Override + public MappeableContainer and(final MappeableRunContainer value2) { + return value2.and(this); + } + + + @Override + public MappeableArrayContainer andNot(final MappeableArrayContainer value2) { + final MappeableArrayContainer value1 = this; + final int desiredCapacity = value1.getCardinality(); + final MappeableArrayContainer answer = new MappeableArrayContainer(desiredCapacity); + if (BufferUtil.isBackedBySimpleArray(value1.content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + answer.cardinality = + Util.unsignedDifference(value1.content.array(), value1.getCardinality(), + value2.content.array(), value2.getCardinality(), answer.content.array()); + } else { + answer.cardinality = BufferUtil.unsignedDifference(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content.array()); + } + return answer; + } + + @Override + public MappeableArrayContainer andNot(MappeableBitmapContainer value2) { + + final MappeableArrayContainer answer = new MappeableArrayContainer(content.limit()); + int pos = 0; + short[] sarray = answer.content.array(); + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] c = content.array(); + for (int k = 0; k < cardinality; ++k) { + short v = c[k]; + if (!value2.contains(v)) { + sarray[pos++] = v; + } + } + } else { + for (int k = 0; k < cardinality; ++k) { + short v = this.content.get(k); + if (!value2.contains(v)) { + sarray[pos++] = v; + } + } + } + answer.cardinality = pos; + return answer; + } + + @Override + public MappeableContainer andNot(final MappeableRunContainer x) { + int writeLocation = 0; + int runStart, runEnd; // the current or upcoming run. + if (x.nbrruns == 0) { + return clone(); + } + + ShortBuffer buffer = ShortBuffer.allocate(cardinality); + + runStart = BufferUtil.toIntUnsigned(x.getValue(0)); + runEnd = runStart + BufferUtil.toIntUnsigned(x.getLength(0)); + int whichRun = 0; + + short val; + for (int i = 0; i < cardinality; ++i) { + val = content.get(i); + int valInt = BufferUtil.toIntUnsigned(val); + if (valInt < runStart) { + buffer.put(writeLocation++, val); + } else if (valInt <= runEnd) { + ; // don't want item + } else { + // greater than this run, need to do an advanceUntil on runs + // done sequentially for now (no galloping attempts). + do { + if (whichRun + 1 < x.nbrruns) { + whichRun++; + runStart = BufferUtil.toIntUnsigned(x.getValue(whichRun)); + runEnd = runStart + BufferUtil.toIntUnsigned(x.getLength(whichRun)); + } else { + runStart = runEnd = (1 << 16) + 1; // infinity.... + } + } while (valInt > runEnd); + --i; // need to re-process this val + } + } + return new MappeableArrayContainer(writeLocation, buffer); + } + + @Override + public void clear() { + cardinality = 0; + } + + @Override + public MappeableArrayContainer clone() { + return new MappeableArrayContainer(this.cardinality, this.content); + } + + @Override + public boolean contains(final short x) { + return BufferUtil.unsignedBinarySearch(content, 0, cardinality, x) >= 0; + } + + // in order + // not thread-safe + private void emit(short val) { + if (cardinality == content.limit()) { + increaseCapacity(true); + } + content.put(cardinality++, val); + } + + @Override + public boolean equals(Object o) { + if (o instanceof MappeableArrayContainer) { + final MappeableArrayContainer srb = (MappeableArrayContainer) o; + if (srb.cardinality != this.cardinality) { + return false; + } + if (BufferUtil.isBackedBySimpleArray(this.content) + && BufferUtil.isBackedBySimpleArray(srb.content)) { + short[] t = this.content.array(); + short[] sr = srb.content.array(); + + for (int i = 0; i < this.cardinality; ++i) { + if (t[i] != sr[i]) { + return false; + } + } + + } else { + for (int i = 0; i < this.cardinality; ++i) { + if (this.content.get(i) != srb.content.get(i)) { + return false; + } + } + } + return true; + } else if (o instanceof MappeableRunContainer) { + return o.equals(this); + } + return false; + } + + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] c = this.content.array(); + for (int k = 0; k < this.cardinality; ++k) { + x[k + i] = BufferUtil.toIntUnsigned(c[k]) | mask; + } + + } else { + for (int k = 0; k < this.cardinality; ++k) { + x[k + i] = BufferUtil.toIntUnsigned(this.content.get(k)) | mask; + } + } + } + + @Override + // not thread-safe + public MappeableContainer flip(short x) { + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] sarray = content.array(); + int loc = Util.unsignedBinarySearch(sarray, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality = DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + MappeableBitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= sarray.length) { + increaseCapacity(); + sarray = content.array(); + } + // insertion : shift the elements > x by one position to + // the right + // and put x in it's appropriate place + System.arraycopy(sarray, -loc - 1, sarray, -loc, cardinality + loc + 1); + sarray[-loc - 1] = x; + ++cardinality; + } else { + System.arraycopy(sarray, loc + 1, sarray, loc, cardinality - loc - 1); + --cardinality; + } + return this; + + } else { + int loc = BufferUtil.unsignedBinarySearch(content, 0, cardinality, x); + if (loc < 0) { + // Transform the ArrayContainer to a BitmapContainer + // when cardinality = DEFAULT_MAX_SIZE + if (cardinality >= DEFAULT_MAX_SIZE) { + MappeableBitmapContainer a = this.toBitmapContainer(); + a.add(x); + return a; + } + if (cardinality >= content.limit()) { + increaseCapacity(); + } + // insertion : shift the elements > x by one position to + // the right + // and put x in it's appropriate place + for (int k = cardinality; k > -loc - 1; --k) { + content.put(k, content.get(k - 1)); + } + content.put(-loc - 1, x); + ++cardinality; + } else { + for (int k = loc + 1; k < cardinality; --k) { + content.put(k - 1, content.get(k)); + } + --cardinality; + } + return this; + } + } + + @Override + protected int getArraySizeInBytes() { + return getArraySizeInBytes(cardinality); + } + + @Override + public int getCardinality() { + return cardinality; + } + + @Override + public ShortIterator getReverseShortIterator() { + if (this.isArrayBacked()) { + return new RawReverseArrayContainerShortIterator(this); + } + return new ReverseMappeableArrayContainerShortIterator(this); + } + + @Override + public PeekableShortIterator getShortIterator() { + if (this.isArrayBacked()) { + return new RawArrayContainerShortIterator(this); + } + return new MappeableArrayContainerShortIterator(this); + } + + @Override + public int getSizeInBytes() { + return this.cardinality * 2; + } + + @Override + public int hashCode() { + int hash = 0; + for (int k = 0; k < cardinality; ++k) { + hash += 31 * hash + content.get(k); + } + return hash; + } + + @Override + // not thread-safe + public MappeableContainer iadd(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = end - begin; + int newcardinality = indexstart + (cardinality - indexend) + rangelength; + if (newcardinality > DEFAULT_MAX_SIZE) { + MappeableBitmapContainer a = this.toBitmapContainer(); + return a.iadd(begin, end); + } + if (newcardinality >= this.content.limit()) { + increaseCapacity(newcardinality); + } + BufferUtil.arraycopy(content, indexend, content, indexstart + rangelength, + cardinality - indexend); + if (BufferUtil.isBackedBySimpleArray(content)) { + short[] contentarray = content.array(); + for (int k = 0; k < rangelength; ++k) { + contentarray[k + indexstart] = (short) (begin + k); + } + } else { + for (int k = 0; k < rangelength; ++k) { + content.put(k + indexstart, (short) (begin + k)); + } + } + cardinality = newcardinality; + return this; + } + + @Override + public MappeableArrayContainer iand(final MappeableArrayContainer value2) { + final MappeableArrayContainer value1 = this; + if (!BufferUtil.isBackedBySimpleArray(value1.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + value1.cardinality = BufferUtil.unsignedIntersect2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), value1.content.array()); + return this; + } + + + @Override + public MappeableContainer iand(MappeableBitmapContainer value2) { + int pos = 0; + for (int k = 0; k < cardinality; ++k) { + short v = this.content.get(k); + if (value2.contains(v)) { + this.content.put(pos++, v); + } + } + cardinality = pos; + return this; + } + + + // Note it is never inplace, may wish to fix + @Override + public MappeableContainer iand(final MappeableRunContainer value2) { + return value2.and(this); + } + + @Override + public MappeableArrayContainer iandNot(final MappeableArrayContainer value2) { + if (!BufferUtil.isBackedBySimpleArray(this.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + this.cardinality = + Util.unsignedDifference(this.content.array(), this.getCardinality(), + value2.content.array(), value2.getCardinality(), this.content.array()); + } else { + this.cardinality = BufferUtil.unsignedDifference(this.content, this.getCardinality(), + value2.content, value2.getCardinality(), this.content.array()); + } + + return this; + } + + @Override + public MappeableArrayContainer iandNot(MappeableBitmapContainer value2) { + if (!BufferUtil.isBackedBySimpleArray(this.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + short[] c = this.content.array(); + int pos = 0; + for (int k = 0; k < cardinality; ++k) { + short v = c[k]; + if (!value2.contains(v)) { + c[pos++] = v; + } + } + this.cardinality = pos; + return this; + } + + @Override + public MappeableContainer iandNot(final MappeableRunContainer value2) { // not inplace, revisit? + return andNot(value2); + } + + private void increaseCapacity() { + increaseCapacity(false); + } + + // temporarily allow an illegally large size, as long as the operation creating + // the illegal container does not return it. + // not thread safe! + private void increaseCapacity(boolean allowIllegalSize) { + int len = this.content.limit(); + int newCapacity = (len == 0) ? DEFAULT_INIT_SIZE + : len < 64 ? len * 2 : this.content.limit() < 1067 ? len * 3 / 2 : len * 5 / 4; + // do not allocate more than we will ever need + if (newCapacity > MappeableArrayContainer.DEFAULT_MAX_SIZE && !allowIllegalSize) { + newCapacity = MappeableArrayContainer.DEFAULT_MAX_SIZE; + } + // if we are within 1/16th of the max., go to max right away to avoid further reallocations + if (newCapacity > MappeableArrayContainer.DEFAULT_MAX_SIZE + - MappeableArrayContainer.DEFAULT_MAX_SIZE / 16 && !allowIllegalSize) { + newCapacity = MappeableArrayContainer.DEFAULT_MAX_SIZE; + } + final ShortBuffer newContent = ShortBuffer.allocate(newCapacity); + this.content.rewind(); + newContent.put(this.content); + this.content = newContent; + } + + + // not thread safe! + private void increaseCapacity(int min) { + int len = this.content.limit(); + int newCapacity = (len == 0) ? DEFAULT_INIT_SIZE + : len < 64 ? len * 2 : len < 1024 ? len * 3 / 2 : len * 5 / 4; + if (newCapacity < min) { + newCapacity = min; + } + if (newCapacity > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + newCapacity = MappeableArrayContainer.DEFAULT_MAX_SIZE; + } + if (newCapacity > MappeableArrayContainer.DEFAULT_MAX_SIZE + - MappeableArrayContainer.DEFAULT_MAX_SIZE / 16) { + newCapacity = MappeableArrayContainer.DEFAULT_MAX_SIZE; + } + final ShortBuffer newContent = ShortBuffer.allocate(newCapacity); + this.content.rewind(); + newContent.put(this.content); + this.content = newContent; + } + + @Override + // not thread safe! (duh!) + public MappeableContainer inot(final int firstOfRange, final int lastOfRange) { + // TODO: may need to convert to a RunContainer + // TODO: this can be optimized for performance + // determine the span of array indices to be affected + int startIndex = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) firstOfRange); + if (startIndex < 0) { + startIndex = -startIndex - 1; + } + int lastIndex = + BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (lastOfRange - 1)); + if (lastIndex < 0) { + lastIndex = -lastIndex - 1 - 1; + } + final int currentValuesInRange = lastIndex - startIndex + 1; + final int spanToBeFlipped = lastOfRange - firstOfRange; + final int newValuesInRange = spanToBeFlipped - currentValuesInRange; + final ShortBuffer buffer = ShortBuffer.allocate(newValuesInRange); + final int cardinalityChange = newValuesInRange - currentValuesInRange; + final int newCardinality = cardinality + cardinalityChange; + + if (cardinalityChange > 0) { // expansion, right shifting needed + if (newCardinality > content.limit()) { + // so big we need a bitmap? + if (newCardinality > DEFAULT_MAX_SIZE) { + return toBitmapContainer().inot(firstOfRange, lastOfRange); + } + final ShortBuffer co = ShortBuffer.allocate(newCardinality); + content.rewind(); + co.put(content); + content = co; + } + // slide right the contents after the range + for (int pos = cardinality - 1; pos > lastIndex; --pos) { + content.put(pos + cardinalityChange, content.get(pos)); + } + negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange); + } else { // no expansion needed + negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange); + if (cardinalityChange < 0) { + // Leave array oversize + for (int i = startIndex + newValuesInRange; i < newCardinality; ++i) { + content.put(i, content.get(i - cardinalityChange)); + } + } + } + cardinality = newCardinality; + return this; + } + + @Override + public boolean intersects(MappeableArrayContainer value2) { + MappeableArrayContainer value1 = this; + return BufferUtil.unsignedIntersects(value1.content, value1.getCardinality(), value2.content, + value2.getCardinality()); + } + + @Override + public boolean intersects(MappeableBitmapContainer x) { + return x.intersects(this); + } + + @Override + public boolean intersects(MappeableRunContainer x) { + return x.intersects(this); + } + + @Override + public MappeableContainer ior(final MappeableArrayContainer value2) { + return this.or(value2); + } + + @Override + public MappeableContainer ior(MappeableBitmapContainer x) { + return x.or(this); + } + + @Override + public MappeableContainer ior(final MappeableRunContainer value2) { + // not inplace + return value2.or(this); + } + + @Override + public MappeableContainer iremove(int begin, int end) { + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = indexend - indexstart; + BufferUtil.arraycopy(content, indexstart + rangelength, content, indexstart, + cardinality - indexstart - rangelength); + cardinality -= rangelength; + return this; + } + + @Override + protected boolean isArrayBacked() { + return BufferUtil.isBackedBySimpleArray(this.content); + } + + + @Override + public Iterator iterator() { + + return new Iterator() { + short pos = 0; + + @Override + public boolean hasNext() { + return pos < MappeableArrayContainer.this.cardinality; + } + + @Override + public Short next() { + return MappeableArrayContainer.this.content.get(pos++); + } + + @Override + public void remove() { + MappeableArrayContainer.this.remove((short) (pos - 1)); + pos--; + } + }; + } + + @Override + public MappeableContainer ixor(final MappeableArrayContainer value2) { + return this.xor(value2); + } + + + @Override + public MappeableContainer ixor(MappeableBitmapContainer x) { + return x.xor(this); + } + + @Override + public MappeableContainer ixor(final MappeableRunContainer value2) { + return value2.xor(this); + } + + @Override + public MappeableContainer limit(int maxcardinality) { + if (maxcardinality < this.getCardinality()) { + return new MappeableArrayContainer(maxcardinality, this.content); + } else { + return clone(); + } + } + + + protected void loadData(final MappeableBitmapContainer bitmapContainer) { + this.cardinality = bitmapContainer.cardinality; + if (!BufferUtil.isBackedBySimpleArray(this.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + bitmapContainer.fillArray(content.array()); + } + + // for use in inot range known to be nonempty + private void negateRange(final ShortBuffer buffer, final int startIndex, final int lastIndex, + final int startRange, final int lastRange) { + // compute the negation into buffer + + int outPos = 0; + int inPos = startIndex; // value here always >= valInRange, + // until it is exhausted + // n.b., we can start initially exhausted. + + int valInRange = startRange; + for (; valInRange < lastRange && inPos <= lastIndex; ++valInRange) { + if ((short) valInRange != content.get(inPos)) { + buffer.put(outPos++, (short) valInRange); + } else { + ++inPos; + } + } + + // if there are extra items (greater than the biggest + // pre-existing one in range), buffer them + for (; valInRange < lastRange; ++valInRange) { + buffer.put(outPos++, (short) valInRange); + } + + if (outPos != buffer.limit()) { + throw new RuntimeException( + "negateRange: outPos " + outPos + " whereas buffer.length=" + buffer.limit()); + } + assert outPos == buffer.limit(); + // copy back from buffer...caller must ensure there is room + int i = startIndex; + int len = buffer.limit(); + for (int k = 0; k < len; ++k) { + final short item = buffer.get(k); + content.put(i++, item); + } + } + + // shares lots of code with inot; candidate for refactoring + @Override + public MappeableContainer not(final int firstOfRange, final int lastOfRange) { + // TODO: may need to convert to a RunContainer + // TODO: this can be optimized for performance + if (firstOfRange >= lastOfRange) { + return clone(); // empty range + } + + // determine the span of array indices to be affected + int startIndex = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) firstOfRange); + if (startIndex < 0) { + startIndex = -startIndex - 1; + } + int lastIndex = + BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (lastOfRange - 1)); + if (lastIndex < 0) { + lastIndex = -lastIndex - 2; + } + final int currentValuesInRange = lastIndex - startIndex + 1; + final int spanToBeFlipped = lastOfRange - firstOfRange; + final int newValuesInRange = spanToBeFlipped - currentValuesInRange; + final int cardinalityChange = newValuesInRange - currentValuesInRange; + final int newCardinality = cardinality + cardinalityChange; + + if (newCardinality > DEFAULT_MAX_SIZE) { + return toBitmapContainer().not(firstOfRange, lastOfRange); + } + + final MappeableArrayContainer answer = new MappeableArrayContainer(newCardinality); + if (!BufferUtil.isBackedBySimpleArray(answer.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + short[] sarray = answer.content.array(); + + for (int i = 0; i < startIndex; ++i) { + // copy stuff before the active area + sarray[i] = content.get(i); + } + + int outPos = startIndex; + int inPos = startIndex; // item at inPos always >= valInRange + + int valInRange = firstOfRange; + for (; valInRange < lastOfRange && inPos <= lastIndex; ++valInRange) { + if ((short) valInRange != content.get(inPos)) { + sarray[outPos++] = (short) valInRange; + } else { + ++inPos; + } + } + + for (; valInRange < lastOfRange; ++valInRange) { + answer.content.put(outPos++, (short) valInRange); + } + + // content after the active range + for (int i = lastIndex + 1; i < cardinality; ++i) { + answer.content.put(outPos++, content.get(i)); + } + answer.cardinality = newCardinality; + return answer; + } + + @Override + int numberOfRuns() { + if (cardinality == 0) { + return 0; // should never happen + } + + if (BufferUtil.isBackedBySimpleArray(content)) { + short[] c = content.array(); + int numRuns = 1; + int oldv = BufferUtil.toIntUnsigned(c[0]); + for (int i = 1; i < cardinality; i++) { + int newv = BufferUtil.toIntUnsigned(c[i]); + if (oldv + 1 != newv) { + ++numRuns; + } + oldv = newv; + } + return numRuns; + } else { + int numRuns = 1; + int previous = BufferUtil.toIntUnsigned(content.get(0)); + // we do not proceed like above for fear that calling "get" twice per loop would be too much + for (int i = 1; i < cardinality; i++) { + int val = BufferUtil.toIntUnsigned(content.get(i)); + if (val != previous + 1) { + ++numRuns; + } + previous = val; + } + return numRuns; + } + } + + + @Override + public MappeableContainer or(final MappeableArrayContainer value2) { + final MappeableArrayContainer value1 = this; + final int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > DEFAULT_MAX_SIZE) {// it could be a bitmap! + final MappeableBitmapContainer bc = new MappeableBitmapContainer(); + if (!BufferUtil.isBackedBySimpleArray(bc.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = bc.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] sarray = value2.content.array(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } else { + for (int k = 0; k < value2.cardinality; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + bitArray[i] |= (1L << v2); + } + } + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] sarray = this.content.array(); + for (int k = 0; k < this.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } else { + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content.get(k); + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } + bc.cardinality = 0; + int len = bc.bitmap.limit(); + for (int index = 0; index < len; ++index) { + bc.cardinality += Long.bitCount(bitArray[index]); + } + if (bc.cardinality <= DEFAULT_MAX_SIZE) { + return bc.toArrayContainer(); + } + return bc; + } + final MappeableArrayContainer answer = new MappeableArrayContainer(totalCardinality); + if (BufferUtil.isBackedBySimpleArray(value1.content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + answer.cardinality = + Util.unsignedUnion2by2(value1.content.array(), value1.getCardinality(), + value2.content.array(), value2.getCardinality(), answer.content.array()); + } else { + answer.cardinality = BufferUtil.unsignedUnion2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content.array()); + } + return answer; + } + + protected MappeableContainer lazyor(final MappeableArrayContainer value2) { + final MappeableArrayContainer value1 = this; + final int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > ARRAY_LAZY_LOWERBOUND) {// it could be a bitmap! + final MappeableBitmapContainer bc = new MappeableBitmapContainer(); + if (!BufferUtil.isBackedBySimpleArray(bc.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = bc.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] sarray = value2.content.array(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } else { + for (int k = 0; k < value2.cardinality; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + bitArray[i] |= (1L << v2); + } + } + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] sarray = this.content.array(); + for (int k = 0; k < this.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } else { + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content.get(k); + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] |= (1L << v); + } + } + bc.cardinality = -1; + return bc; + } + final MappeableArrayContainer answer = new MappeableArrayContainer(totalCardinality); + if (BufferUtil.isBackedBySimpleArray(value1.content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + answer.cardinality = + Util.unsignedUnion2by2(value1.content.array(), value1.getCardinality(), + value2.content.array(), value2.getCardinality(), answer.content.array()); + } else { + answer.cardinality = BufferUtil.unsignedUnion2by2(value1.content, value1.getCardinality(), + value2.content, value2.getCardinality(), answer.content.array()); + } + return answer; + } + + + @Override + public MappeableContainer or(MappeableBitmapContainer x) { + return x.or(this); + } + + @Override + public MappeableContainer or(final MappeableRunContainer value2) { + return value2.or(this); + } + + protected MappeableContainer or(ShortIterator it) { + return or(it, false); + } + + /** + * it must return items in (unsigned) sorted order. Possible candidate for Container interface? + **/ + private MappeableContainer or(ShortIterator it, boolean exclusive) { + MappeableArrayContainer ac = new MappeableArrayContainer(); + int myItPos = 0; + ac.cardinality = 0; + // do a merge. int -1 denotes end of input. + int myHead = (myItPos == cardinality) ? -1 : BufferUtil.toIntUnsigned(content.get(myItPos++)); + int hisHead = advance(it); + + while (myHead != -1 && hisHead != -1) { + if (myHead < hisHead) { + ac.emit((short) myHead); + myHead = (myItPos == cardinality) ? -1 : BufferUtil.toIntUnsigned(content.get(myItPos++)); + } else if (myHead > hisHead) { + ac.emit((short) hisHead); + hisHead = advance(it); + } else { + if (!exclusive) { + ac.emit((short) hisHead); + } + hisHead = advance(it); + myHead = (myItPos == cardinality) ? -1 : BufferUtil.toIntUnsigned(content.get(myItPos++)); + } + } + + while (myHead != -1) { + ac.emit((short) myHead); + myHead = (myItPos == cardinality) ? -1 : BufferUtil.toIntUnsigned(content.get(myItPos++)); + } + + while (hisHead != -1) { + ac.emit((short) hisHead); + hisHead = advance(it); + } + + if (ac.cardinality > DEFAULT_MAX_SIZE) { + return ac.toBitmapContainer(); + } else { + return ac; + } + } + + @Override + public int rank(short lowbits) { + int answer = BufferUtil.unsignedBinarySearch(content, 0, cardinality, lowbits); + if (answer >= 0) { + return answer + 1; + } else { + return -answer - 1; + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + // little endian + this.cardinality = 0xFFFF & Short.reverseBytes(in.readShort()); + if (this.content.limit() < this.cardinality) { + this.content = ShortBuffer.allocate(this.cardinality); + } + for (int k = 0; k < this.cardinality; ++k) { + this.content.put(k, Short.reverseBytes(in.readShort())); + } + } + + @Override + public MappeableContainer remove(int begin, int end) { + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + int indexstart = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) begin); + if (indexstart < 0) { + indexstart = -indexstart - 1; + } + int indexend = BufferUtil.unsignedBinarySearch(content, 0, cardinality, (short) (end - 1)); + if (indexend < 0) { + indexend = -indexend - 1; + } else { + indexend++; + } + int rangelength = indexend - indexstart; + MappeableArrayContainer answer = clone(); + BufferUtil.arraycopy(content, indexstart + rangelength, answer.content, indexstart, + cardinality - indexstart - rangelength); + answer.cardinality = cardinality - rangelength; + return answer; + } + + + @Override + public MappeableContainer remove(final short x) { + if (BufferUtil.isBackedBySimpleArray(this.content)) { + final int loc = Util.unsignedBinarySearch(content.array(), 0, cardinality, x); + if (loc >= 0) { + // insertion + System.arraycopy(content.array(), loc + 1, content.array(), loc, cardinality - loc - 1); + --cardinality; + } + return this; + } else { + final int loc = BufferUtil.unsignedBinarySearch(content, 0, cardinality, x); + if (loc >= 0) { + // insertion + for (int k = loc + 1; k < cardinality; --k) { + content.put(k - 1, content.get(k)); + } + --cardinality; + } + return this; + + } + } + + @Override + public MappeableContainer repairAfterLazy() { + return this; + } + + + @Override + public MappeableContainer runOptimize() { + int numRuns = numberOfRuns(); + int sizeAsRunContainer = MappeableRunContainer.getArraySizeInBytes(numRuns); + if (getArraySizeInBytes() > sizeAsRunContainer) { + return new MappeableRunContainer(this, numRuns); // this could be + // maybe faster if + // initial + // container is a + // bitmap + } else { + return this; + } + } + + @Override + public short select(int j) { + return this.content.get(j); + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(cardinality); + } + + /** + * Copies the data in a bitmap container. + * + * @return the bitmap container + */ + public MappeableBitmapContainer toBitmapContainer() { + final MappeableBitmapContainer bc = new MappeableBitmapContainer(); + bc.loadData(this); + return bc; + } + + @Override + public Container toContainer() { + return new ArrayContainer(this); + } + + /** + * Create a copy of the content of this container as a short array. This creates a copy. + * + * @return copy of the content as a short array + */ + public short[] toShortArray() { + short[] answer = new short[cardinality]; + content.rewind(); + content.get(answer); + return answer; + } + + @Override + public String toString() { + if (this.cardinality == 0) { + return "{}"; + } + final StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (int i = 0; i < this.cardinality - 1; i++) { + sb.append(this.content.get(i)); + sb.append(","); + } + sb.append(this.content.get(this.cardinality - 1)); + sb.append("}"); + return sb.toString(); + } + + @Override + public void trim() { + if (this.content.limit() == this.cardinality) { + return; + } + if (BufferUtil.isBackedBySimpleArray(content)) { + this.content = ShortBuffer.wrap(Arrays.copyOf(content.array(), cardinality)); + } else { + final ShortBuffer co = ShortBuffer.allocate(this.cardinality); + // can assume that new co is array backed + short[] x = co.array(); + for (int k = 0; k < this.cardinality; ++k) { + x[k] = this.content.get(k); + } + this.content = co; + } + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + // little endian + if (BufferUtil.isBackedBySimpleArray(content)) { + short[] a = content.array(); + for (int k = 0; k < this.cardinality; ++k) { + out.writeShort(Short.reverseBytes(a[k])); + } + } else { + for (int k = 0; k < this.cardinality; ++k) { + out.writeShort(Short.reverseBytes(content.get(k))); + } + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.write(this.cardinality & 0xFF); + out.write((this.cardinality >>> 8) & 0xFF); + if (BufferUtil.isBackedBySimpleArray(content)) { + short[] a = content.array(); + for (int k = 0; k < this.cardinality; ++k) { + out.writeShort(Short.reverseBytes(a[k])); + } + } else { + for (int k = 0; k < this.cardinality; ++k) { + out.writeShort(Short.reverseBytes(content.get(k))); + } + } + } + + @Override + public MappeableContainer xor(final MappeableArrayContainer value2) { + final MappeableArrayContainer value1 = this; + final int totalCardinality = value1.getCardinality() + value2.getCardinality(); + if (totalCardinality > DEFAULT_MAX_SIZE) {// it could be a bitmap! + final MappeableBitmapContainer bc = new MappeableBitmapContainer(); + if (!BufferUtil.isBackedBySimpleArray(bc.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = bc.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] sarray = value2.content.array(); + for (int k = 0; k < value2.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] ^= (1L << v); + } + } else { + for (int k = 0; k < value2.cardinality; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + bitArray[i] ^= (1L << v2); + } + } + if (BufferUtil.isBackedBySimpleArray(this.content)) { + short[] sarray = this.content.array(); + for (int k = 0; k < this.cardinality; ++k) { + short v = sarray[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] ^= (1L << v); + } + } else { + for (int k = 0; k < this.cardinality; ++k) { + short v = this.content.get(k); + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + bitArray[i] ^= (1L << v); + } + } + + bc.cardinality = 0; + int len = bc.bitmap.limit(); + for (int index = 0; index < len; ++index) { + bc.cardinality += Long.bitCount(bitArray[index]); + } + if (bc.cardinality <= DEFAULT_MAX_SIZE) { + return bc.toArrayContainer(); + } + return bc; + } + final MappeableArrayContainer answer = new MappeableArrayContainer(totalCardinality); + if (BufferUtil.isBackedBySimpleArray(value1.content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + answer.cardinality = Util.unsignedExclusiveUnion2by2(value1.content.array(), + value1.getCardinality(), value2.content.array(), value2.getCardinality(), + answer.content.array()); + } else { + answer.cardinality = BufferUtil.unsignedExclusiveUnion2by2(value1.content, + value1.getCardinality(), value2.content, value2.getCardinality(), answer.content.array()); + } + return answer; + } + + @Override + public MappeableContainer xor(MappeableBitmapContainer x) { + return x.xor(this); + } + + @Override + public MappeableContainer xor(final MappeableRunContainer value2) { + return value2.xor(this); + } + + + protected MappeableContainer xor(ShortIterator it) { + return or(it, true); + } + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + if (BufferUtil.isBackedBySimpleArray(content)) { + short[] c = content.array(); + for (int k = 0; k < cardinality; ++k) { + ic.accept((c[k] & 0xFFFF) | high); + } + } else { + for (int k = 0; k < cardinality; ++k) { + ic.accept((content.get(k) & 0xFFFF) | high); + } + } + } + + @Override + public int andCardinality(MappeableArrayContainer value2) { + if (BufferUtil.isBackedBySimpleArray(content) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + return Util.unsignedLocalIntersect2by2Cardinality(content.array(), cardinality, + value2.content.array(), value2.getCardinality()); + } + return BufferUtil.unsignedLocalIntersect2by2Cardinality(content, cardinality, + value2.content, value2.getCardinality()); + } + + @Override + public int andCardinality(MappeableBitmapContainer x) { + return x.andCardinality(this); + } + + @Override + // see andNot for an approach that might be better. + public int andCardinality(MappeableRunContainer x) { + return x.andCardinality(this); + } + + +} + + +final class MappeableArrayContainerShortIterator implements PeekableShortIterator { + int pos; + MappeableArrayContainer parent; + + MappeableArrayContainerShortIterator() { + } + + + MappeableArrayContainerShortIterator(MappeableArrayContainer p) { + wrap(p); + } + + @Override + public void advanceIfNeeded(short minval) { + pos = BufferUtil.advanceUntil(parent.content, pos - 1, parent.cardinality, minval); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < parent.cardinality; + } + + @Override + public short next() { + return parent.content.get(pos++); + } + + + @Override + public int nextAsInt() { + return BufferUtil.toIntUnsigned(parent.content.get(pos++)); + } + + @Override + public short peekNext() { + return parent.content.get(pos); + } + + + @Override + public void remove() { + parent.remove((short) (pos - 1)); + pos--; + } + + void wrap(MappeableArrayContainer p) { + parent = p; + pos = 0; + } + + +} + + +final class RawArrayContainerShortIterator implements PeekableShortIterator { + int pos; + MappeableArrayContainer parent; + short[] content; + + + RawArrayContainerShortIterator(MappeableArrayContainer p) { + parent = p; + if (!p.isArrayBacked()) { + throw new RuntimeException("internal bug"); + } + content = p.content.array(); + pos = 0; + } + + + @Override + public void advanceIfNeeded(short minval) { + pos = Util.advanceUntil(content, pos - 1, parent.cardinality, minval); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < parent.cardinality; + } + + @Override + public short next() { + return content[pos++]; + } + + @Override + public int nextAsInt() { + return BufferUtil.toIntUnsigned(content[pos++]); + } + + @Override + public short peekNext() { + return content[pos]; + } + + @Override + public void remove() { + parent.remove((short) (pos - 1)); + pos--; + } + +} + + +final class RawReverseArrayContainerShortIterator implements ShortIterator { + int pos; + MappeableArrayContainer parent; + short[] content; + + + RawReverseArrayContainerShortIterator(MappeableArrayContainer p) { + parent = p; + if (!p.isArrayBacked()) { + throw new RuntimeException("internal bug"); + } + content = p.content.array(); + pos = parent.cardinality - 1; + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + + @Override + public short next() { + return content[pos--]; + } + + @Override + public int nextAsInt() { + return BufferUtil.toIntUnsigned(content[pos--]); + } + + @Override + public void remove() { + parent.remove((short) (pos + 1)); + pos++; + } + +}; + + +final class ReverseMappeableArrayContainerShortIterator implements ShortIterator { + + int pos; + + MappeableArrayContainer parent; + + ReverseMappeableArrayContainerShortIterator() { + } + + + ReverseMappeableArrayContainerShortIterator(MappeableArrayContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public short next() { + return parent.content.get(pos--); + } + + + @Override + public int nextAsInt() { + return BufferUtil.toIntUnsigned(parent.content.get(pos--)); + } + + @Override + public void remove() { + parent.remove((short) (pos + 1)); + pos++; + } + + void wrap(MappeableArrayContainer p) { + parent = p; + pos = parent.cardinality - 1; + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableBitmapContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableBitmapContainer.java new file mode 100644 index 000000000..9d1178aab --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableBitmapContainer.java @@ -0,0 +1,2012 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.BitmapContainer; +import com.fr.third.bitmap.roaringbitmap.Container; +import com.fr.third.bitmap.roaringbitmap.IntConsumer; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.LongBuffer; +import java.util.Iterator; + +/** + * Simple bitset-like container. Unlike BitmapContainer, this class uses a + * LongBuffer to store data. + */ +public final class MappeableBitmapContainer extends MappeableContainer implements Cloneable { + /** + * optimization flag: whether the cardinality of the bitmaps is maintained through branchless + * operation + */ + public static final boolean USE_BRANCHLESS = true; + protected static final int MAX_CAPACITY = 1 << 16; + private static final long serialVersionUID = 2L; + // 64 words can have max 32 runs per word, max 2k runs + // bail out early when the number of runs is excessive, without + // an exact count (just a decent lower bound) + private static final int BLOCKSIZE = 128; + // nruns value for which RunContainer.serializedSizeInBytes == + // BitmapContainer.getArraySizeInBytes() + private final int MAXRUNS = (getArraySizeInBytes() - 2) / 4; + LongBuffer bitmap; + + int cardinality; + + /** + * Create a bitmap container with all bits set to false + */ + public MappeableBitmapContainer() { + this.cardinality = 0; + this.bitmap = LongBuffer.allocate(MAX_CAPACITY / 64); + } + + /** + * Creates a new bitmap container from a non-mappeable one. This copies the data. + * + * @param bc the original container + */ + public MappeableBitmapContainer(BitmapContainer bc) { + this.cardinality = bc.getCardinality(); + this.bitmap = bc.toLongBuffer(); + } + + /** + * Create a bitmap container with a run of ones from firstOfRun to lastOfRun, inclusive caller + * must ensure that the range isn't so small that an ArrayContainer should have been created + * instead + * + * @param firstOfRun first index + * @param lastOfRun last index (range is exclusive) + */ + public MappeableBitmapContainer(final int firstOfRun, final int lastOfRun) { + // TODO: this can be optimized for performance + this.cardinality = lastOfRun - firstOfRun; + this.bitmap = LongBuffer.allocate(MAX_CAPACITY / 64); + Util.setBitmapRange(bitmap.array(), firstOfRun, lastOfRun); + } + + MappeableBitmapContainer(int newCardinality, LongBuffer newBitmap) { + this.cardinality = newCardinality; + LongBuffer tmp = newBitmap.duplicate(); // for thread safety + this.bitmap = LongBuffer.allocate(tmp.limit()); + tmp.rewind(); + this.bitmap.put(tmp); + } + + + /** + * Construct a new BitmapContainer backed by the provided LongBuffer. + * + * @param array LongBuffer where the data is stored + * @param initCardinality cardinality (number of values stored) + */ + public MappeableBitmapContainer(final LongBuffer array, final int initCardinality) { + if (array.limit() != MAX_CAPACITY / 64) { + throw new RuntimeException("Mismatch between buffer and storage requirements: " + + array.limit() + " vs. " + MAX_CAPACITY / 64); + } + this.cardinality = initCardinality; + this.bitmap = array; + } + + // the parameter is for overloading and symmetry with ArrayContainer + protected static int serializedSizeInBytes(int unusedCardinality) { + return MAX_CAPACITY / 8; + } + + @Override + public MappeableContainer add(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + MappeableBitmapContainer answer = clone(); + BufferUtil.setBitmapRange(answer.bitmap, begin, end); + answer.computeCardinality(); + return answer; + } + + + @Override + public MappeableContainer add(final short i) { + final int x = BufferUtil.toIntUnsigned(i); + final long previous = bitmap.get(x / 64); + final long newv = previous | (1L << x); + bitmap.put(x / 64, newv); + if (USE_BRANCHLESS) { + cardinality += (previous ^ newv) >>> x; + } else if (previous != newv) { + cardinality++; + } + return this; + } + + @Override + public MappeableArrayContainer and(final MappeableArrayContainer value2) { + + final MappeableArrayContainer answer = new MappeableArrayContainer(value2.content.limit()); + if (!BufferUtil.isBackedBySimpleArray(answer.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + short[] sarray = answer.content.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] c = value2.content.array(); + int ca = value2.cardinality; + for (int k = 0; k < ca; ++k) { + short v = c[k]; + if (this.contains(v)) { + sarray[answer.cardinality++] = v; + } + } + + } else { + int ca = value2.cardinality; + for (int k = 0; k < ca; ++k) { + short v = value2.content.get(k); + if (this.contains(v)) { + sarray[answer.cardinality++] = v; + } + } + } + return answer; + } + + @Override + public MappeableContainer and(final MappeableBitmapContainer value2) { + int newCardinality = 0; + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] tb = this.bitmap.array(); + long[] v2b = value2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(tb[k] & v2b[k]); + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(this.bitmap.get(k) & value2.bitmap.get(k)); + } + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + final MappeableBitmapContainer answer = new MappeableBitmapContainer(); + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] tb = this.bitmap.array(); + long[] v2b = value2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = tb[k] & v2b[k]; + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = this.bitmap.get(k) & value2.bitmap.get(k); + } + } + answer.cardinality = newCardinality; + return answer; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + Util.fillArrayAND(ac.content.array(), this.bitmap.array(), + value2.bitmap.array()); + } else { + BufferUtil.fillArrayAND(ac.content.array(), this.bitmap, value2.bitmap); + } + ac.cardinality = newCardinality; + return ac; + } + + @Override + public MappeableContainer and(final MappeableRunContainer value2) { + return value2.and(this); + } + + + @Override + public MappeableContainer andNot(final MappeableArrayContainer value2) { + final MappeableBitmapContainer answer = clone(); + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content) + && BufferUtil.isBackedBySimpleArray(this.bitmap)) { + short[] v2 = value2.content.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = v2[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + long w = bitArray[i]; + long aft = w & (~(1L << v)); + bitArray[i] = aft; + answer.cardinality -= (w ^ aft) >>> v; + } + } else { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + long w = bitArray[i]; + long aft = bitArray[i] & (~(1L << v2)); + bitArray[i] = aft; + answer.cardinality -= (w ^ aft) >>> v2; + } + } + if (answer.cardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + + @Override + public MappeableContainer andNot(final MappeableBitmapContainer value2) { + + int newCardinality = 0; + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] b = this.bitmap.array(); + long[] v2 = value2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] & (~v2[k])); + } + + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(this.bitmap.get(k) & (~value2.bitmap.get(k))); + } + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + final MappeableBitmapContainer answer = new MappeableBitmapContainer(); + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] b = this.bitmap.array(); + long[] v2 = value2.bitmap.array(); + int len = answer.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = b[k] & (~v2[k]); + } + } else { + int len = answer.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = this.bitmap.get(k) & (~value2.bitmap.get(k)); + } + } + answer.cardinality = newCardinality; + return answer; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + Util.fillArrayANDNOT(ac.content.array(), this.bitmap.array(), + value2.bitmap.array()); + } else { + BufferUtil.fillArrayANDNOT(ac.content.array(), this.bitmap, value2.bitmap); + } + ac.cardinality = newCardinality; + return ac; + } + + @Override + public MappeableContainer andNot(final MappeableRunContainer value2) { + MappeableBitmapContainer answer = this.clone(); + if (BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + long[] b = answer.bitmap.array(); + for (int rlepos = 0; rlepos < value2.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(value2.getValue(rlepos)); + int end = BufferUtil.toIntUnsigned(value2.getValue(rlepos)) + + BufferUtil.toIntUnsigned(value2.getLength(rlepos)) + 1; + Util.resetBitmapRange(b, start, end); + } + } else { + for (int rlepos = 0; rlepos < value2.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(value2.getValue(rlepos)); + int end = BufferUtil.toIntUnsigned(value2.getValue(rlepos)) + + BufferUtil.toIntUnsigned(value2.getLength(rlepos)) + 1; + BufferUtil.resetBitmapRange(answer.bitmap, start, end); + } + } + answer.computeCardinality(); + if (answer.getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public void clear() { + if (cardinality != 0) { + cardinality = 0; + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitmap.put(k, 0); + } + } + } + + + @Override + public MappeableBitmapContainer clone() { + return new MappeableBitmapContainer(this.cardinality, this.bitmap); + } + + /** + * Recomputes the cardinality of the bitmap. + */ + protected void computeCardinality() { + this.cardinality = 0; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = this.bitmap.array(); + for (int k = 0; k < b.length; k++) { + this.cardinality += Long.bitCount(b[k]); + } + } else { + int m = this.bitmap.limit(); + for (int k = 0; k < m; k++) { + this.cardinality += Long.bitCount(this.bitmap.get(k)); + } + } + } + + @Override + public boolean contains(final short i) { + final int x = BufferUtil.toIntUnsigned(i); + return (bitmap.get(x / 64) & (1L << x)) != 0; + } + + @Override + + public boolean equals(Object o) { + if (o instanceof MappeableBitmapContainer) { + final MappeableBitmapContainer srb = (MappeableBitmapContainer) o; + if (srb.cardinality != this.cardinality) { + return false; + } + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(srb.bitmap)) { + long[] b = this.bitmap.array(); + long[] s = srb.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + if (b[k] != s[k]) { + return false; + } + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + if (this.bitmap.get(k) != srb.bitmap.get(k)) { + return false; + } + } + } + return true; + + } else if (o instanceof MappeableRunContainer) { + return o.equals(this); + } + return false; + } + + /** + * Fill the array with set bits + * + * @param array container (should be sufficiently large) + */ + protected void fillArray(final short[] array) { + int pos = 0; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + int base = 0; + for (int k = 0; k < b.length; ++k) { + long bitset = b[k]; + while (bitset != 0) { + final long t = bitset & -bitset; + array[pos++] = (short) (base + Long.bitCount(t - 1)); + bitset ^= t; + } + base += 64; + } + + } else { + int len = this.bitmap.limit(); + int base = 0; + for (int k = 0; k < len; ++k) { + long bitset = bitmap.get(k); + while (bitset != 0) { + final long t = bitset & -bitset; + array[pos++] = (short) (base + Long.bitCount(t - 1)); + bitset ^= t; + } + base += 64; + } + } + } + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + int pos = i; + int base = mask; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + long bitset = b[k]; + while (bitset != 0) { + final long t = bitset & -bitset; + x[pos++] = base + Long.bitCount(t - 1); + bitset ^= t; + } + base += 64; + } + + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + long bitset = bitmap.get(k); + while (bitset != 0) { + final long t = bitset & -bitset; + x[pos++] = base + Long.bitCount(t - 1); + bitset ^= t; + } + base += 64; + } + } + } + + @Override + public MappeableContainer flip(short i) { + final int x = BufferUtil.toIntUnsigned(i); + final long bef = bitmap.get(x / 64); + final long mask = 1L << x; + if (cardinality == MappeableArrayContainer.DEFAULT_MAX_SIZE + 1) {// this + // is + // the + // uncommon + // path + if ((bef & mask) != 0) { + --cardinality; + bitmap.put(x / 64, bef & ~mask); + return this.toArrayContainer(); + } + } + long aft = bef ^ mask; + ; + // TODO: check whether a branchy version could be faster + cardinality += 1 - 2 * ((bef & mask) >>> x); + bitmap.put(x / 64, aft); + return this; + } + + + @Override + protected int getArraySizeInBytes() { + return MAX_CAPACITY / 8; + } + + + @Override + public int getCardinality() { + return cardinality; + } + + @Override + public ShortIterator getReverseShortIterator() { + if (this.isArrayBacked()) { + return BitmapContainer.getReverseShortIterator(bitmap.array()); + } + return new ReverseMappeableBitmapContainerShortIterator(this); + } + + @Override + public PeekableShortIterator getShortIterator() { + if (this.isArrayBacked()) { + return BitmapContainer.getShortIterator(bitmap.array()); + } + return new MappeableBitmapContainerShortIterator(this); + } + + @Override + public int getSizeInBytes() { + return this.bitmap.limit() * 8; + } + + @Override + public int hashCode() { + long hash = 0; + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + hash += 31 * hash + this.bitmap.get(k); + } + return (int) hash; + } + + @Override + public MappeableContainer iadd(int begin, int end) { + // TODO: may need to convert to a RunContainer + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + BufferUtil.setBitmapRange(bitmap, begin, end); + computeCardinality(); + return this; + } + + @Override + public MappeableContainer iand(final MappeableArrayContainer b2) { + return b2.and(this);// no inplace possible + } + + + @Override + public MappeableContainer iand(final MappeableBitmapContainer b2) { + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(b2.bitmap)) { + int newCardinality = 0; + long[] tb = this.bitmap.array(); + long[] tb2 = b2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(tb[k] & tb2[k]); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + tb[k] &= tb2[k]; + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + BufferUtil.fillArrayAND(ac.content.array(), this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } else { + int newCardinality = 0; + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(this.bitmap.get(k) & b2.bitmap.get(k)); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + this.bitmap.put(k, this.bitmap.get(k) & b2.bitmap.get(k)); + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + BufferUtil.fillArrayAND(ac.content.array(), this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + } + + @Override + public MappeableContainer iand(final MappeableRunContainer x) { + final int card = x.getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // no point in doing it in-place + MappeableArrayContainer answer = new MappeableArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(x.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (this.contains((short) runValue)) { + answer.content.put(answer.cardinality++, (short) runValue); + } + } + } + return answer; + } + int start = 0; + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int end = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + BufferUtil.resetBitmapRange(this.bitmap, start, end); + start = end + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + } + BufferUtil.resetBitmapRange(this.bitmap, start, BufferUtil.maxLowBitAsInteger() + 1); + computeCardinality(); + if (getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + + @Override + public MappeableContainer iandNot(final MappeableArrayContainer b2) { + for (int k = 0; k < b2.cardinality; ++k) { + this.remove(b2.content.get(k)); + } + if (cardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + return this; + } + + + @Override + public MappeableContainer iandNot(final MappeableBitmapContainer b2) { + int newCardinality = 0; + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = this.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(b2.bitmap)) { + long[] b2Arr = b2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] & (~b2Arr[k])); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + this.bitmap.put(k, b[k] & (~b2Arr[k])); + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + Util.fillArrayANDNOT(ac.content.array(), b, b2Arr); + ac.cardinality = newCardinality; + return ac; + + } + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] & (~b2.bitmap.get(k))); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + b[k] &= (~b2.bitmap.get(k)); + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + + BufferUtil.fillArrayANDNOT(ac.content.array(), this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public MappeableContainer iandNot(final MappeableRunContainer x) { + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] b = this.bitmap.array(); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.resetBitmapRange(b, start, end); + } + computeCardinality(); + if (getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + BufferUtil.resetBitmapRange(this.bitmap, start, end); + } + computeCardinality(); + if (getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + + } + + protected MappeableContainer ilazyor(MappeableArrayContainer value2) { + this.cardinality = -1;// invalid + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = this.bitmap.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + b[i] |= (1L << v2); + } + return this; + } + + protected MappeableContainer ilazyor(MappeableBitmapContainer x) { + if (BufferUtil.isBackedBySimpleArray(x.bitmap)) { + long[] b = this.bitmap.array(); + long[] b2 = x.bitmap.array(); + for (int k = 0; k < b.length; k++) { + b[k] |= b2[k]; + } + } else { + final int m = this.bitmap.limit(); + for (int k = 0; k < m; k++) { + this.bitmap.put(k, this.bitmap.get(k) | x.bitmap.get(k)); + } + } + this.cardinality = -1;// invalid + return this; + } + + + protected MappeableContainer ilazyor(MappeableRunContainer x) { + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(this.bitmap, start, end); + } + this.cardinality = -1; + return this; + } + + @Override + public MappeableContainer inot(final int firstOfRange, final int lastOfRange) { + if (lastOfRange - firstOfRange == MAX_CAPACITY) { + BufferUtil.flipBitmapRange(bitmap, firstOfRange, lastOfRange); + cardinality = MAX_CAPACITY - cardinality; + } else if (lastOfRange - firstOfRange > MAX_CAPACITY / 2) { + BufferUtil.flipBitmapRange(bitmap, firstOfRange, lastOfRange); + computeCardinality(); + } else { + cardinality += + BufferUtil.flipBitmapRangeAndCardinalityChange(bitmap, firstOfRange, lastOfRange); + } + if (cardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return toArrayContainer(); + } + return this; + } + + + @Override + public boolean intersects(MappeableArrayContainer value2) { + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] c = value2.content.array(); + int ca = value2.cardinality; + for (int k = 0; k < ca; ++k) { + if (this.contains(c[k])) { + return true; + } + } + + } else { + int ca = value2.cardinality; + for (int k = 0; k < ca; ++k) { + short v = value2.content.get(k); + if (this.contains(v)) { + return true; + } + } + } + return false; + } + + @Override + public boolean intersects(MappeableBitmapContainer value2) { + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] tb = this.bitmap.array(); + long[] v2b = value2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + if ((tb[k] & v2b[k]) != 0) { + return true; + } + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + if ((this.bitmap.get(k) & value2.bitmap.get(k)) != 0) { + return true; + } + } + } + return false; + } + + @Override + public boolean intersects(MappeableRunContainer x) { + return x.intersects(this); + } + + + @Override + public MappeableBitmapContainer ior(final MappeableArrayContainer value2) { + if (!BufferUtil.isBackedBySimpleArray(this.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = this.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] v2 = value2.content.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + final int i = BufferUtil.toIntUnsigned(v2[k]) >>> 6; + long bef = b[i]; + long aft = bef | (1L << v2[k]); + b[i] = aft; + if (USE_BRANCHLESS) { + cardinality += (bef - aft) >>> 63; + } else { + if (aft != bef) { + cardinality++; + } + } + } + return this; + } + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + long bef = b[i]; + long aft = bef | (1L << v2); + b[i] = aft; + if (USE_BRANCHLESS) { + cardinality += (bef - aft) >>> 63; + } else { + if (aft != bef) { + cardinality++; + } + } + } + return this; + } + + @Override + public MappeableContainer ior(final MappeableBitmapContainer b2) { + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = this.bitmap.array(); + this.cardinality = 0; + if (BufferUtil.isBackedBySimpleArray(b2.bitmap)) { + long[] b2Arr = b2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; k++) { + long w = b[k] | b2Arr[k]; + b[k] = w; + this.cardinality += Long.bitCount(w); + } + return this; + } + int len = this.bitmap.limit(); + for (int k = 0; k < len; k++) { + long w = b[k] | b2.bitmap.get(k); + b[k] = w; + this.cardinality += Long.bitCount(w); + } + return this; + } + + @Override + public MappeableContainer ior(final MappeableRunContainer x) { + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] b = this.bitmap.array(); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.setBitmapRange(b, start, end); + } + } else { + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(this.bitmap, start, end); + } + } + computeCardinality(); + return this; + } + + @Override + public MappeableContainer iremove(int begin, int end) { + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + BufferUtil.resetBitmapRange(bitmap, begin, end); + computeCardinality(); + if (getCardinality() < MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return toArrayContainer(); + } + return this; + } + + @Override + protected boolean isArrayBacked() { + return BufferUtil.isBackedBySimpleArray(this.bitmap); + } + + @Override + public Iterator iterator() { + return new Iterator() { + final ShortIterator si = MappeableBitmapContainer.this.getShortIterator(); + + @Override + public boolean hasNext() { + return si.hasNext(); + } + + @Override + public Short next() { + return si.next(); + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + }; + } + + @Override + public MappeableContainer ixor(final MappeableArrayContainer value2) { + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] v2 = value2.content.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short vc = v2[k]; + long mask = 1L << v2[k]; + final int index = BufferUtil.toIntUnsigned(vc) >>> 6; + long ba = b[index]; + // TODO: check whether a branchy version could be faster + this.cardinality += 1 - 2 * ((ba & mask) >>> vc); + b[index] = ba ^ mask; + } + + } else { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + long mask = 1L << v2; + final int index = BufferUtil.toIntUnsigned(v2) >>> 6; + long ba = b[index]; + // TODO: check whether a branchy version could be faster + this.cardinality += 1 - 2 * ((ba & mask) >>> v2); + b[index] = ba ^ mask; + } + } + if (this.cardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + return this; + } + + @Override + public MappeableContainer ixor(MappeableBitmapContainer b2) { + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(b2.bitmap)) { + long[] b2Arr = b2.bitmap.array(); + + int newCardinality = 0; + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] ^ b2Arr[k]); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + b[k] ^= b2Arr[k]; + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + + Util.fillArrayXOR(ac.content.array(), b, b2Arr); + ac.cardinality = newCardinality; + return ac; + + } + int newCardinality = 0; + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] ^ b2.bitmap.get(k)); + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + for (int k = 0; k < len; ++k) { + b[k] ^= b2.bitmap.get(k); + } + this.cardinality = newCardinality; + return this; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + + BufferUtil.fillArrayXOR(ac.content.array(), this.bitmap, b2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public MappeableContainer ixor(final MappeableRunContainer x) { + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] b = this.bitmap.array(); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.flipBitmapRange(b, start, end); + } + } else { + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + BufferUtil.flipBitmapRange(this.bitmap, start, end); + } + } + computeCardinality(); + if (this.getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this; + } else { + return toArrayContainer(); + } + } + + protected MappeableContainer lazyor(MappeableArrayContainer value2) { + MappeableBitmapContainer answer = clone(); + answer.cardinality = -1;// invalid + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = answer.bitmap.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + b[i] |= 1L << v2; + } + return answer; + } + + protected MappeableContainer lazyor(MappeableBitmapContainer x) { + MappeableBitmapContainer answer = new MappeableBitmapContainer(); + answer.cardinality = -1;// invalid + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] b = answer.bitmap.array(); + for (int k = 0; k < b.length; k++) { + b[k] = this.bitmap.get(k) | x.bitmap.get(k); + } + return answer; + } + + protected MappeableContainer lazyor(MappeableRunContainer x) { + MappeableBitmapContainer bc = clone(); + bc.cardinality = -1; + if (BufferUtil.isBackedBySimpleArray(bc.bitmap)) { + long[] b = bc.bitmap.array(); + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + Util.setBitmapRange(b, start, end); + } + return bc; + + } + for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(x.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(x.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(bc.bitmap, start, end); + } + return bc; + } + + @Override + public MappeableContainer limit(int maxcardinality) { + if (maxcardinality >= this.cardinality) { + return clone(); + } + if (maxcardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + MappeableArrayContainer ac = new MappeableArrayContainer(maxcardinality); + int pos = 0; + if (!BufferUtil.isBackedBySimpleArray(ac.content)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + short[] cont = ac.content.array(); + int len = this.bitmap.limit(); + for (int k = 0; (ac.cardinality < maxcardinality) && (k < len); ++k) { + long bitset = bitmap.get(k); + while ((ac.cardinality < maxcardinality) && (bitset != 0)) { + long t = bitset & -bitset; + cont[pos++] = (short) (k * 64 + Long.bitCount(t - 1)); + ac.cardinality++; + bitset ^= t; + } + } + return ac; + } + MappeableBitmapContainer bc = new MappeableBitmapContainer(maxcardinality, this.bitmap); + int s = BufferUtil.toIntUnsigned(select(maxcardinality)); + int usedwords = (s + 63) / 64; + int len = this.bitmap.limit(); + int todelete = len - usedwords; + for (int k = 0; k < todelete; ++k) { + bc.bitmap.put(len - 1 - k, 0); + } + int lastword = s % 64; + if (lastword != 0) { + bc.bitmap.put(s / 64, (bc.bitmap.get(s / 64) & (0xFFFFFFFFFFFFFFFFL >>> (64 - lastword)))); + } + return bc; + } + + protected void loadData(final MappeableArrayContainer arrayContainer) { + this.cardinality = arrayContainer.cardinality; + if (!BufferUtil.isBackedBySimpleArray(bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(bitmap) + && BufferUtil.isBackedBySimpleArray(arrayContainer.content)) { + long[] b = bitmap.array(); + short[] ac = arrayContainer.content.array(); + for (int k = 0; k < arrayContainer.cardinality; ++k) { + final short x = ac[k]; + bitArray[BufferUtil.toIntUnsigned(x) / 64] = + b[BufferUtil.toIntUnsigned(x) / 64] | (1L << x); + } + + } else { + for (int k = 0; k < arrayContainer.cardinality; ++k) { + final short x = arrayContainer.content.get(k); + bitArray[BufferUtil.toIntUnsigned(x) / 64] = + bitmap.get(BufferUtil.toIntUnsigned(x) / 64) | (1L << x); + } + } + } + + /** + * Find the index of the next set bit greater or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the next set bit + */ + public int nextSetBit(final int i) { + int x = i >> 6; // signed i / 64 + long w = bitmap.get(x); + w >>>= i; + if (w != 0) { + return i + Long.numberOfTrailingZeros(w); + } + // for speed, replaced bitmap.limit() with hardcoded MAX_CAPACITY / 64 + for (++x; x < MAX_CAPACITY / 64; ++x) { + long X = bitmap.get(x); + if (X != 0) { + return x * 64 + Long.numberOfTrailingZeros(X); + } + } + return -1; + } + + /** + * Find the index of the next unset bit greater or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the next unset bit + */ + public short nextUnsetBit(final int i) { + int x = i / 64; + long w = ~bitmap.get(x); + w >>>= i; + if (w != 0) { + return (short) (i + Long.numberOfTrailingZeros(w)); + } + ++x; + // for speed, replaced bitmap.limit() with hardcoded MAX_CAPACITY / 64 + for (; x < MAX_CAPACITY / 64; ++x) { + long X = bitmap.get(x); + if (X != ~0L) { + return (short) (x * 64 + Long.numberOfTrailingZeros(~X)); + } + } + return -1; + } + + @Override + public MappeableContainer not(final int firstOfRange, final int lastOfRange) { + MappeableBitmapContainer answer = clone(); + return answer.inot(firstOfRange, lastOfRange); + } + + + @Override + int numberOfRuns() { + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] src = this.bitmap.array(); + int numRuns = 0; + long nextWord = src[0]; + + for (int i = 0; i < src.length - 1; i++) { + long word = nextWord; + nextWord = src[i + 1]; + numRuns += Long.bitCount((~word) & (word << 1)) + ((word >>> 63) & ~nextWord); + } + + long word = nextWord; + numRuns += Long.bitCount((~word) & (word << 1)); + if ((word & 0x8000000000000000L) != 0) { + numRuns++; + } + + return numRuns; + } else { + int numRuns = 0; + long nextWord = bitmap.get(0); + int len = bitmap.limit(); + + for (int i = 0; i < len - 1; i++) { + long word = nextWord; + nextWord = bitmap.get(i + 1); + numRuns += Long.bitCount((~word) & (word << 1)) + ((word >>> 63) & ~nextWord); + } + + long word = nextWord; + numRuns += Long.bitCount((~word) & (word << 1)); + if ((word & 0x8000000000000000L) != 0) { + numRuns++; + } + + return numRuns; + + } + } + + /** + * Computes the number of runs + * + * @return the number of runs + */ + public int numberOfRunsAdjustment() { + int ans = 0; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + long nextWord = b[0]; + for (int i = 0; i < b.length - 1; i++) { + final long word = nextWord; + + nextWord = b[i + 1]; + ans += ((word >>> 63) & ~nextWord); + } + + final long word = nextWord; + + if ((word & 0x8000000000000000L) != 0) { + ans++; + } + + } else { + long nextWord = bitmap.get(0); + int len = bitmap.limit(); + for (int i = 0; i < len - 1; i++) { + final long word = nextWord; + + nextWord = bitmap.get(i + 1); + ans += ((word >>> 63) & ~nextWord); + } + + final long word = nextWord; + + if ((word & 0x8000000000000000L) != 0) { + ans++; + } + } + return ans; + } + + /** + * Counts how many runs there is in the bitmap, up to a maximum + * + * @param mustNotExceed maximum of runs beyond which counting is pointless + * @return estimated number of courses + */ + public int numberOfRunsLowerBound(int mustNotExceed) { + int numRuns = 0; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + + for (int blockOffset = 0; blockOffset < b.length; blockOffset += BLOCKSIZE) { + + for (int i = blockOffset; i < blockOffset + BLOCKSIZE; i++) { + long word = b[i]; + numRuns += Long.bitCount((~word) & (word << 1)); + } + if (numRuns > mustNotExceed) { + return numRuns; + } + } + } else { + int len = bitmap.limit(); + for (int blockOffset = 0; blockOffset < len; blockOffset += BLOCKSIZE) { + + for (int i = blockOffset; i < blockOffset + BLOCKSIZE; i++) { + long word = bitmap.get(i); + numRuns += Long.bitCount((~word) & (word << 1)); + } + if (numRuns > mustNotExceed) { + return numRuns; + } + } + } + return numRuns; + } + + + @Override + public MappeableBitmapContainer or(final MappeableArrayContainer value2) { + final MappeableBitmapContainer answer = clone(); + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(answer.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.content)) { + long[] ab = answer.bitmap.array(); + short[] v2 = value2.content.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = v2[k]; + final int i = BufferUtil.toIntUnsigned(v) >>> 6; + long w = ab[i]; + long aft = w | (1L << v); + bitArray[i] = aft; + if (USE_BRANCHLESS) { + answer.cardinality += (w - aft) >>> 63; + } else { + if (w != aft) { + answer.cardinality++; + } + } + } + } else { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + final int i = BufferUtil.toIntUnsigned(v2) >>> 6; + long w = answer.bitmap.get(i); + long aft = w | (1L << v2); + bitArray[i] = aft; + if (USE_BRANCHLESS) { + answer.cardinality += (w - aft) >>> 63; + } else { + if (w != aft) { + answer.cardinality++; + } + } + } + } + return answer; + } + + + @Override + public MappeableContainer or(final MappeableBitmapContainer value2) { + final MappeableBitmapContainer value1 = this.clone(); + return value1.ior(value2); + } + + @Override + public MappeableContainer or(final MappeableRunContainer value2) { + return value2.or(this); + } + + + /** + * Find the index of the previous set bit less than or equal to i, returns -1 if none found. + * + * @param i starting index + * @return index of the previous set bit + */ + public int prevSetBit(final int i) { + int x = i >> 6; // signed i / 64 + long w = bitmap.get(x); + w <<= 64 - i - 1; + if (w != 0) { + return i - Long.numberOfLeadingZeros(w); + } + for (--x; x >= 0; --x) { + long X = bitmap.get(x); + if (X != 0) { + return x * 64 + 63 - Long.numberOfLeadingZeros(X); + } + } + return -1; + } + + @Override + public int rank(short lowbits) { + int x = BufferUtil.toIntUnsigned(lowbits); + int leftover = (x + 1) & 63; + int answer = 0; + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] b = this.bitmap.array(); + for (int k = 0; k < (x + 1) / 64; ++k) { + answer += Long.bitCount(b[k]); + } + if (leftover != 0) { + answer += Long.bitCount(b[(x + 1) / 64] << (64 - leftover)); + } + } else { + for (int k = 0; k < (x + 1) / 64; ++k) { + answer += Long.bitCount(bitmap.get(k)); + } + if (leftover != 0) { + answer += Long.bitCount(bitmap.get((x + 1) / 64) << (64 - leftover)); + } + } + return answer; + } + + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + // little endian + this.cardinality = 0; + int len = this.bitmap.limit(); + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = this.bitmap.array(); + for (int k = 0; k < len; ++k) { + long w = Long.reverseBytes(in.readLong()); + b[k] = w; + this.cardinality += Long.bitCount(w); + } + } else { + for (int k = 0; k < len; ++k) { + long w = Long.reverseBytes(in.readLong()); + bitmap.put(k, w); + this.cardinality += Long.bitCount(w); + } + } + } + + @Override + public MappeableContainer remove(int begin, int end) { + if (end == begin) { + return clone(); + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + MappeableBitmapContainer answer = clone(); + BufferUtil.resetBitmapRange(answer.bitmap, begin, end); + answer.computeCardinality(); + if (answer.getCardinality() < MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + + @Override + public MappeableContainer remove(final short i) { + final int x = BufferUtil.toIntUnsigned(i); + long X = bitmap.get(x / 64); + long mask = 1L << x; + + if (cardinality == MappeableArrayContainer.DEFAULT_MAX_SIZE + 1) {// this is + // the + // uncommon + // path + if ((X & mask) != 0) { + --cardinality; + bitmap.put(x / 64, X & (~mask)); + return this.toArrayContainer(); + } + } + long aft = X & ~(mask); + cardinality -= (aft - X) >>> 63; + bitmap.put(x / 64, aft); + return this; + } + + + @Override + public MappeableContainer repairAfterLazy() { + if (getCardinality() < 0) { + computeCardinality(); + if (getCardinality() <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return this.toArrayContainer(); + } + } + return this; + } + + @Override + public MappeableContainer runOptimize() { + int numRuns = numberOfRunsLowerBound(MAXRUNS); // decent choice + + int sizeAsRunContainerLowerBound = MappeableRunContainer.serializedSizeInBytes(numRuns); + + if (sizeAsRunContainerLowerBound >= getArraySizeInBytes()) { + return this; + } + // else numRuns is a relatively tight bound that needs to be exact + // in some cases (or if we need to make the runContainer the right + // size) + numRuns += numberOfRunsAdjustment(); + int sizeAsRunContainer = MappeableRunContainer.serializedSizeInBytes(numRuns); + + if (getArraySizeInBytes() > sizeAsRunContainer) { + return new MappeableRunContainer(this, numRuns); + } else { + return this; + } + } + + @Override + public short select(int j) { + int leftover = j; + if (BufferUtil.isBackedBySimpleArray(this.bitmap)) { + long[] b = this.bitmap.array(); + + for (int k = 0; k < b.length; ++k) { + int w = Long.bitCount(b[k]); + if (w > leftover) { + return (short) (k * 64 + Util.select(b[k], leftover)); + } + leftover -= w; + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + long X = bitmap.get(k); + int w = Long.bitCount(X); + if (w > leftover) { + return (short) (k * 64 + Util.select(X, leftover)); + } + leftover -= w; + } + } + throw new IllegalArgumentException("Insufficient cardinality."); + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(0); + } + + /** + * Copies the data to an array container + * + * @return the array container + */ + public MappeableArrayContainer toArrayContainer() { + final MappeableArrayContainer ac = new MappeableArrayContainer(cardinality); + ac.loadData(this); + if (ac.getCardinality() != cardinality) { + throw new RuntimeException("Internal error."); + } + return ac; + } + + @Override + public Container toContainer() { + return new BitmapContainer(this); + } + + /** + * Create a copy of the content of this container as a long array. This creates a copy. + * + * @return copy of the content as a long array + */ + public long[] toLongArray() { + long[] answer = new long[bitmap.limit()]; + bitmap.rewind(); + bitmap.get(answer); + return answer; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + final ShortIterator i = this.getShortIterator(); + sb.append("{"); + while (i.hasNext()) { + sb.append(i.next()); + if (i.hasNext()) { + sb.append(","); + } + } + sb.append("}"); + return sb.toString(); + } + + @Override + public void trim() { + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + // little endian + int len = this.bitmap.limit(); + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + for (int k = 0; k < len; ++k) { + out.writeLong(Long.reverseBytes(b[k])); + } + } else { + for (int k = 0; k < len; ++k) { + final long w = bitmap.get(k); + out.writeLong(Long.reverseBytes(w)); + } + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + writeArray(out); + } + + @Override + public MappeableContainer xor(final MappeableArrayContainer value2) { + final MappeableBitmapContainer answer = clone(); + if (!BufferUtil.isBackedBySimpleArray(answer.bitmap)) { + throw new RuntimeException("Should not happen. Internal bug."); + } + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(value2.content)) { + short[] v2 = value2.content.array(); + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short vc = v2[k]; + long mask = 1L << vc; + final int index = BufferUtil.toIntUnsigned(vc) >>> 6; + long ba = bitArray[index]; + // TODO: check whether a branchy version could be faster + answer.cardinality += 1 - 2 * ((ba & mask) >>> vc); + bitArray[index] = ba ^ mask; + } + } else { + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v2 = value2.content.get(k); + long mask = 1L << v2; + final int index = BufferUtil.toIntUnsigned(v2) >>> 6; + long ba = bitArray[index]; + // TODO: check whether a branchy version could be faster + answer.cardinality += 1 - 2 * ((ba & mask) >>> v2); + bitArray[index] = ba ^ mask; + } + } + if (answer.cardinality <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer.toArrayContainer(); + } + return answer; + } + + @Override + public MappeableContainer xor(MappeableBitmapContainer value2) { + int newCardinality = 0; + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] b = this.bitmap.array(); + long[] v2 = value2.bitmap.array(); + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(b[k] ^ v2[k]); + } + } else { + int len = this.bitmap.limit(); + for (int k = 0; k < len; ++k) { + newCardinality += Long.bitCount(this.bitmap.get(k) ^ value2.bitmap.get(k)); + } + } + if (newCardinality > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + final MappeableBitmapContainer answer = new MappeableBitmapContainer(); + long[] bitArray = answer.bitmap.array(); + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] b = this.bitmap.array(); + long[] v2 = value2.bitmap.array(); + int len = answer.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = b[k] ^ v2[k]; + } + } else { + int len = answer.bitmap.limit(); + for (int k = 0; k < len; ++k) { + bitArray[k] = this.bitmap.get(k) ^ value2.bitmap.get(k); + } + } + answer.cardinality = newCardinality; + return answer; + } + final MappeableArrayContainer ac = new MappeableArrayContainer(newCardinality); + BufferUtil.fillArrayXOR(ac.content.array(), this.bitmap, value2.bitmap); + ac.cardinality = newCardinality; + return ac; + } + + @Override + public MappeableContainer xor(final MappeableRunContainer value2) { + return value2.xor(this); + } + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + if (BufferUtil.isBackedBySimpleArray(bitmap)) { + long[] b = bitmap.array(); + for (int x = 0; x < b.length; ++x) { + long w = b[x]; + while (w != 0) { + long t = w & -w; + ic.accept((x * 64 + Long.bitCount(t - 1)) | high); + w ^= t; + } + } + } else { + int l = bitmap.limit(); + for (int x = 0; x < l; ++x) { + long w = bitmap.get(x); + while (w != 0) { + long t = w & -w; + ic.accept((x * 64 + Long.bitCount(t - 1)) | high); + w ^= t; + } + } + } + } + + + @Override + public int andCardinality(final MappeableArrayContainer value2) { + int answer = 0; + int c = value2.cardinality; + for (int k = 0; k < c; ++k) { + short v = value2.content.get(k); + if (this.contains(v)) { + answer++; + } + } + return answer; + } + + @Override + public int andCardinality(final MappeableBitmapContainer value2) { + int newCardinality = 0; + if (BufferUtil.isBackedBySimpleArray(this.bitmap) + && BufferUtil.isBackedBySimpleArray(value2.bitmap)) { + long[] b1 = this.bitmap.array(); + long[] b2 = value2.bitmap.array(); + for (int k = 0; k < b1.length; ++k) { + newCardinality += Long.bitCount(b1[k] & b2[k]); + } + } else { + final int size = this.bitmap.limit(); + for (int k = 0; k < size; ++k) { + newCardinality += Long.bitCount(this.bitmap.get(k) & value2.bitmap.get(k)); + } + } + return newCardinality; + } + + @Override + public int andCardinality(MappeableRunContainer x) { + return x.andCardinality(this); + } + + @Override + public MappeableBitmapContainer toBitmapContainer() { + return this; + } + + +} + + +final class MappeableBitmapContainerShortIterator implements PeekableShortIterator { + final static int len = MappeableBitmapContainer.MAX_CAPACITY / 64;// hard coded for speed + long w; + int x; + + + MappeableBitmapContainer parent; + + MappeableBitmapContainerShortIterator() { + } + + MappeableBitmapContainerShortIterator(MappeableBitmapContainer p) { + wrap(p); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return x < len; + } + + @Override + public short next() { + long t = w & -w; + short answer = (short) (x * 64 + Long.bitCount(t - 1)); + w ^= t; + while (w == 0) { + ++x; + if (x == len) { + break; + } + w = parent.bitmap.get(x); + } + return answer; + } + + @Override + public int nextAsInt() { + long t = w & -w; + int answer = x * 64 + Long.bitCount(t - 1); + w ^= t; + while (w == 0) { + ++x; + if (x == len) { + break; + } + w = parent.bitmap.get(x); + } + return answer; + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + + void wrap(MappeableBitmapContainer p) { + parent = p; + for (x = 0; x < len; ++x) { + if ((w = parent.bitmap.get(x)) != 0) { + break; + } + } + } + + @Override + public void advanceIfNeeded(short minval) { + if (BufferUtil.toIntUnsigned(minval) >= (x + 1) * 64) { + x = BufferUtil.toIntUnsigned(minval) / 64; + w = parent.bitmap.get(x); + while (w == 0) { + ++x; + if (x == len) { + return; + } + w = parent.bitmap.get(x); + } + } + while (hasNext() && (BufferUtil.toIntUnsigned(peekNext()) < BufferUtil.toIntUnsigned(minval))) { + next(); // could be optimized + } + + } + + @Override + public short peekNext() { + long t = w & -w; + return (short) (x * 64 + Long.bitCount(t - 1)); + } +} + + +final class ReverseMappeableBitmapContainerShortIterator implements ShortIterator { + + final static int len = MappeableBitmapContainer.MAX_CAPACITY / 64;// hard coded for speed + long w; + int x; + + MappeableBitmapContainer parent; + + ReverseMappeableBitmapContainerShortIterator() { + } + + ReverseMappeableBitmapContainerShortIterator(MappeableBitmapContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null; + } + } + + @Override + public boolean hasNext() { + return x >= 0; + } + + @Override + public short next() { + long t = w & -w; + short answer = (short) ((x + 1) * 64 - 1 - Long.bitCount(t - 1)); + w ^= t; + while (w == 0) { + --x; + if (x < 0) { + break; + } + w = Long.reverse(parent.bitmap.get(x)); + } + return answer; + } + + @Override + public int nextAsInt() { + long t = w & -w; + int answer = (x + 1) * 64 - 1 - Long.bitCount(t - 1); + w ^= t; + while (w == 0) { + --x; + if (x < 0) { + break; + } + w = Long.reverse(parent.bitmap.get(x)); + } + return answer; + } + + @Override + public void remove() { + // TODO: implement + throw new RuntimeException("unsupported operation: remove"); + } + + public void wrap(MappeableBitmapContainer p) { + parent = p; + for (x = len - 1; x >= 0; --x) { + if ((w = Long.reverse(parent.bitmap.get(x))) != 0) { + break; + } + } + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainer.java new file mode 100644 index 000000000..cb5d1fd85 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainer.java @@ -0,0 +1,759 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.Container; +import com.fr.third.bitmap.roaringbitmap.IntConsumer; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; + +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; + +/** + * Base container class. This class is similar to Container but meant to be used + * with memory mapping. + */ +public abstract class MappeableContainer implements Iterable, Cloneable, Externalizable { + /** + * Name of the various possible containers + */ + public static String ContainerNames[] = {"mappeablebitmap", "mappeablearray", "mappeablerun"}; + + /** + * Create a container initialized with a range of consecutive values + * + * @param start first index + * @param last last index (range is exclusive) + * @return a new container initialized with the specified values + */ + public static MappeableContainer rangeOfOnes(final int start, final int last) { + final int sizeAsArrayContainer = MappeableArrayContainer.serializedSizeInBytes(last - start); + final int sizeAsRunContainer = MappeableRunContainer.serializedSizeInBytes(1); + MappeableContainer answer = sizeAsRunContainer < sizeAsArrayContainer + ? new MappeableRunContainer() : new MappeableArrayContainer(); + answer = answer.iadd(start, last); + return answer; + } + + /** + * Return a new container with all shorts in [begin,end) added using an unsigned interpretation. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract MappeableContainer add(int begin, int end); + + /** + * Add a short to the container. May generate a new container. + * + * @param x short to be added + * @return the new container + */ + public abstract MappeableContainer add(short x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer and(MappeableArrayContainer x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer and(MappeableBitmapContainer x); + + protected MappeableContainer and(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return and((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return and((MappeableRunContainer) x); + } + return and((MappeableBitmapContainer) x); + + } + + protected abstract int andCardinality(MappeableArrayContainer x); + + protected abstract int andCardinality(MappeableBitmapContainer x); + + protected abstract int andCardinality(MappeableRunContainer x); + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public int andCardinality(MappeableContainer x) { + if (this.getCardinality() == 0) { + return 0; + } else if (x.getCardinality() == 0) { + return 0; + } else { + if (x instanceof MappeableArrayContainer) { + return andCardinality((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return andCardinality((MappeableBitmapContainer) x); + } + return andCardinality((MappeableRunContainer) x); + } + } + + /** + * Computes the bitwise AND of this container with another (intersection). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer and(MappeableRunContainer x); + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer andNot(MappeableArrayContainer x); + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer andNot(MappeableBitmapContainer x); + + protected MappeableContainer andNot(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return andNot((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return andNot((MappeableRunContainer) x); + } + + return andNot((MappeableBitmapContainer) x); + } + + /** + * Computes the bitwise ANDNOT of this container with another (difference). This container as well + * as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer andNot(MappeableRunContainer x); + + /** + * Empties the container + */ + public abstract void clear(); + + @Override + public abstract MappeableContainer clone(); + + /** + * Checks whether the contain contains the provided value + * + * @param x value to check + * @return whether the value is in the container + */ + public abstract boolean contains(short x); + + /** + * Fill the least significant 16 bits of the integer array, starting at index index, with the + * short values from this container. The caller is responsible to allocate enough room. The most + * significant 16 bits of each integer are given by the most significant bits of the provided + * mask. + * + * @param x provided array + * @param i starting index + * @param mask indicates most significant bits + */ + public abstract void fillLeastSignificant16bits(int[] x, int i, int mask); + + /** + * Add a short to the container if it is not present, otherwise remove it. May generate a new + * container. + * + * @param x short to be added + * @return the new container + */ + public abstract MappeableContainer flip(short x); + + /** + * Size of the underlying array + * + * @return size in bytes + */ + protected abstract int getArraySizeInBytes(); + + /** + * Computes the distinct number of short values in the container. Can be expected to run in + * constant time. + * + * @return the cardinality + */ + public abstract int getCardinality(); + + /** + * Get the name of this container. + * + * @return name of the container + */ + public String getContainerName() { + if (this instanceof MappeableBitmapContainer) { + return ContainerNames[0]; + } else if (this instanceof MappeableArrayContainer) { + return ContainerNames[1]; + } else { + return ContainerNames[2]; + } + } + + /** + * Iterator to visit the short values in the container in descending order. + * + * @return iterator + */ + public abstract ShortIterator getReverseShortIterator(); + + + /** + * Iterator to visit the short values in the container in ascending order. + * + * @return iterator + */ + public abstract PeekableShortIterator getShortIterator(); + + + /** + * Iterate through the values of this container and pass them + * along to the IntConsumer, using msb as the 16 most significant bits. + * + * @param msb 16 most significant bits + * @param ic consumer + */ + public abstract void forEach(short msb, IntConsumer ic); + + /** + * Computes an estimate of the memory usage of this container. The estimate is not meant to be + * exact. + * + * @return estimated memory usage in bytes + */ + public abstract int getSizeInBytes(); + + /** + * Add all shorts in [begin,end) using an unsigned interpretation. May generate a new container. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract MappeableContainer iadd(int begin, int end); + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer iand(MappeableArrayContainer x); + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer iand(MappeableBitmapContainer x); + + + protected MappeableContainer iand(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return iand((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return iand((MappeableRunContainer) x); + } + + return iand((MappeableBitmapContainer) x); + + } + + /** + * Computes the in-place bitwise AND of this container with another (intersection). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer iand(MappeableRunContainer x); + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer iandNot(MappeableArrayContainer x); + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer iandNot(MappeableBitmapContainer x); + + protected MappeableContainer iandNot(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return iandNot((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return iandNot((MappeableRunContainer) x); + } + + return iandNot((MappeableBitmapContainer) x); + } + + /** + * Computes the in-place bitwise ANDNOT of this container with another (difference). The current + * container is generally modified, whereas the provided container (x) is unaffected. May generate + * a new container. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer iandNot(MappeableRunContainer x); + + + /** + * Computes the in-place bitwise NOT of this container (complement). Only those bits within the + * range are affected. The current container is generally modified. May generate a new container. + * + * @param rangeStart beginning of range (inclusive); 0 is beginning of this container. + * @param rangeEnd ending of range (exclusive) + * @return (partially) completmented container + */ + public abstract MappeableContainer inot(int rangeStart, int rangeEnd); + + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(MappeableArrayContainer x); + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(MappeableBitmapContainer x); + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public boolean intersects(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return intersects((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return intersects((MappeableBitmapContainer) x); + } + return intersects((MappeableRunContainer) x); + } + + /** + * Returns true if the current container intersects the other container. + * + * @param x other container + * @return whether they intersect + */ + public abstract boolean intersects(MappeableRunContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer ior(MappeableArrayContainer x); + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer ior(MappeableBitmapContainer x); + + protected MappeableContainer ior(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return ior((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return ior((MappeableRunContainer) x); + } + + return ior((MappeableBitmapContainer) x); + } + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer ior(MappeableRunContainer x); + + /** + * Remove shorts in [begin,end) using an unsigned interpretation. May generate a new container. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract MappeableContainer iremove(int begin, int end); + + protected abstract boolean isArrayBacked(); + + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer ixor(MappeableArrayContainer x); + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer ixor(MappeableBitmapContainer x); + + protected MappeableContainer ixor(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return ixor((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return ixor((MappeableRunContainer) x); + } + + return ixor((MappeableBitmapContainer) x); + + } + + + /** + * Computes the in-place bitwise XOR of this container with another (symmetric difference). The + * current container is generally modified, whereas the provided container (x) is unaffected. May + * generate a new container. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer ixor(MappeableRunContainer x); + + + /** + * Computes the in-place bitwise OR of this container with another (union). The current container + * is generally modified, whereas the provided container (x) is unaffected. May generate a new + * container. The resulting container may not track its cardinality correctly. The resulting + * container may not track its cardinality correctly. This can be fixed as follows: + * if(c.getCardinality()<0) ((MappeableBitmapContainer)c).computeCardinality(); + * + * @param x other container + * @return aggregated container + */ + public MappeableContainer lazyIOR(MappeableContainer x) { + if (this instanceof MappeableArrayContainer) { + if (x instanceof MappeableArrayContainer) { + return ((MappeableArrayContainer) this).lazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) x).lazyor((MappeableArrayContainer) this); + } + return ((MappeableRunContainer) x).lazyor((MappeableArrayContainer) this); + } else if (this instanceof MappeableRunContainer) { + if (x instanceof MappeableArrayContainer) { + return ((MappeableRunContainer) this).ilazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) x).lazyor((MappeableRunContainer) this); + } + return ior((MappeableRunContainer) x); + } else { + if (x instanceof MappeableArrayContainer) { + return ((MappeableBitmapContainer) this).ilazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) this).ilazyor((MappeableBitmapContainer) x); + } + return ((MappeableBitmapContainer) this).ilazyor((MappeableRunContainer) x); + } + } + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. The resulting container may not track its cardinality + * correctly. This can be fixed as follows: if(c.getCardinality()<0) + * ((MappeableBitmapContainer)c).computeCardinality(); + * + * @param x other container + * @return aggregated container + */ + public MappeableContainer lazyOR(MappeableContainer x) { + if (this instanceof MappeableArrayContainer) { + if (x instanceof MappeableArrayContainer) { + return ((MappeableArrayContainer) this).lazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) x).lazyor((MappeableArrayContainer) this); + } + return ((MappeableRunContainer) x).lazyor((MappeableArrayContainer) this); + } else if (this instanceof MappeableRunContainer) { + if (x instanceof MappeableArrayContainer) { + return ((MappeableRunContainer) this).lazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) x).lazyor((MappeableRunContainer) this); + } + return or((MappeableRunContainer) x); + } else { + if (x instanceof MappeableArrayContainer) { + return ((MappeableBitmapContainer) this).lazyor((MappeableArrayContainer) x); + } else if (x instanceof MappeableBitmapContainer) { + return ((MappeableBitmapContainer) this).lazyor((MappeableBitmapContainer) x); + } + return ((MappeableBitmapContainer) this).lazyor((MappeableRunContainer) x); + } + } + + /** + * Create a new MappeableContainer containing at most maxcardinality integers. + * + * @param maxcardinality maximal cardinality + * @return a new bitmap with cardinality no more than maxcardinality + */ + public abstract MappeableContainer limit(int maxcardinality); + + /** + * Computes the bitwise NOT of this container (complement). Only those bits within the range are + * affected. The current container is left unaffected. + * + * @param rangeStart beginning of range (inclusive); 0 is beginning of this container. + * @param rangeEnd ending of range (exclusive) + * @return (partially) completmented container + */ + public abstract MappeableContainer not(int rangeStart, int rangeEnd); + + abstract int numberOfRuns(); + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer or(MappeableArrayContainer x); + + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer or(MappeableBitmapContainer x); + + + protected MappeableContainer or(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return or((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return or((MappeableRunContainer) x); + } + + return or((MappeableBitmapContainer) x); + } + + /** + * Computes the bitwise OR of this container with another (union). This container as well as the + * provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + + public abstract MappeableContainer or(MappeableRunContainer x); + + /** + * Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be + * GetCardinality()). + * + * @param lowbits upper limit + * @return the rank + */ + public abstract int rank(short lowbits); + + /** + * Return a new container with all shorts in [begin,end) remove using an unsigned interpretation. + * + * @param begin start of range (inclusive) + * @param end end of range (exclusive) + * @return the new container + */ + public abstract MappeableContainer remove(int begin, int end); + + /** + * Remove the short from this container. May create a new container. + * + * @param x to be removed + * @return New container + */ + public abstract MappeableContainer remove(short x); + + /** + * The output of a lazyOR or lazyIOR might be an invalid container, this should be called on it. + * + * @return a new valid container + */ + public abstract MappeableContainer repairAfterLazy(); + + /** + * Convert to MappeableRunContainers, when the result is smaller. Overridden by + * MappeableRunContainer to possibly switch from MappeableRunContainer to a smaller alternative. + * + * @return the new container + */ + public abstract MappeableContainer runOptimize(); + + + /** + * Return the jth value + * + * @param j index of the value + * @return the value + */ + public abstract short select(int j); + + + /** + * Report the number of bytes required to serialize this container. + * + * @return the size in bytes + */ + public abstract int serializedSizeInBytes(); + + + /** + * Convert to a non-mappeable container. + * + * @return the non-mappeable container + */ + public abstract Container toContainer(); + + /** + * If possible, recover wasted memory. + */ + public abstract void trim(); + + + /** + * Write just the underlying array. + * + * @param out output stream + * @throws IOException in case of failure + */ + protected abstract void writeArray(DataOutput out) throws IOException; + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer xor(MappeableArrayContainer x); + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other container + * @return aggregated container + */ + public abstract MappeableContainer xor(MappeableBitmapContainer x); + + protected MappeableContainer xor(MappeableContainer x) { + if (x instanceof MappeableArrayContainer) { + return xor((MappeableArrayContainer) x); + } else if (x instanceof MappeableRunContainer) { + return xor((MappeableRunContainer) x); + } + + return xor((MappeableBitmapContainer) x); + + } + + /** + * Computes the bitwise XOR of this container with another (symmetric difference). This container + * as well as the provided container are left unaffected. + * + * @param x other parameter + * @return aggregated container + */ + public abstract MappeableContainer xor(MappeableRunContainer x); + + /** + * Convert the current container to a BitmapContainer, if a conversion is needed. + * If the container is already a bitmap, the container is returned unchanged. + * + * @return a bitmap container + */ + public abstract MappeableBitmapContainer toBitmapContainer(); + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainerPointer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainerPointer.java new file mode 100644 index 000000000..3a26415fc --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableContainerPointer.java @@ -0,0 +1,80 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +/** + * This interface allows you to iterate over the containers in a roaring bitmap. + */ +public interface MappeableContainerPointer + extends Comparable, Cloneable { + /** + * Move to the next container + */ + void advance(); + + /** + * Create a copy + * + * @return return a clone of this pointer + */ + MappeableContainerPointer clone(); + + /** + * Returns the cardinality of the current container. Can be faster than loading the container + * first. + * + * @return cardinality of the current container + */ + int getCardinality(); + + /** + * This method can be used to check whether there is current a valid container as it returns null + * when there is not. + * + * @return null or the current container + */ + MappeableContainer getContainer(); + + /** + * Get the size in bytes of the container. Used for sorting. + * + * @return the size in bytes + */ + int getSizeInBytes(); + + /** + * @return whether there is a container at the current position + */ + boolean hasContainer(); + + /** + * Returns true if it is a bitmap container (MappeableBitmapContainer). + * + * @return boolean indicated if it is a bitmap container + */ + public boolean isBitmapContainer(); + + /** + * Returns true if it is a run container (MappeableRunContainer). + * + * @return boolean indicated if it is a run container + */ + boolean isRunContainer(); + + /** + * The key is a 16-bit integer that indicates the position of the container in the roaring bitmap. + * To be interpreted as an unsigned integer. + * + * @return the key + */ + short key(); + + /** + * Move to the previous container + */ + void previous(); + + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableRunContainer.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableRunContainer.java new file mode 100644 index 000000000..a373bad5d --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MappeableRunContainer.java @@ -0,0 +1,2798 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ +package com.fr.third.bitmap.roaringbitmap.buffer; + + +import com.fr.third.bitmap.roaringbitmap.Container; +import com.fr.third.bitmap.roaringbitmap.IntConsumer; +import com.fr.third.bitmap.roaringbitmap.PeekableShortIterator; +import com.fr.third.bitmap.roaringbitmap.RunContainer; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.ShortBuffer; +import java.util.Arrays; +import java.util.Iterator; + +/** + * This container takes the form of runs of consecutive values (effectively, run-length encoding). + * Uses a ShortBuffer to store data, unlike RunContainer. Otherwise similar. + *

+ *

+ * Adding and removing content from this container might make it wasteful so regular calls to + * "runOptimize" might be warranted. + */ +public final class MappeableRunContainer extends MappeableContainer implements Cloneable { + private static final int DEFAULT_INIT_SIZE = 4; + private static final long serialVersionUID = 1L; + protected ShortBuffer valueslength; + protected int nbrruns = 0;// how many runs, this number should fit in 16 bits. + + /** + * Create a container with default capacity + */ + public MappeableRunContainer() { + this(DEFAULT_INIT_SIZE); + } + + /** + * Create an array container with specified capacity + * + * @param capacity The capacity of the container + */ + public MappeableRunContainer(final int capacity) { + valueslength = ShortBuffer.allocate(2 * capacity); + } + + + private MappeableRunContainer(int nbrruns, final ShortBuffer valueslength) { + this.nbrruns = nbrruns; + ShortBuffer tmp = valueslength.duplicate();// for thread safety + this.valueslength = ShortBuffer.allocate(Math.max(2 * nbrruns, tmp.limit())); + tmp.rewind(); + this.valueslength.put(tmp); // may copy more than it needs to?? + } + + protected MappeableRunContainer(MappeableArrayContainer arr, int nbrRuns) { + this.nbrruns = nbrRuns; + valueslength = ShortBuffer.allocate(2 * nbrRuns); + short[] vl = valueslength.array(); + if (nbrRuns == 0) { + return; + } + + int prevVal = -2; + int runLen = 0; + int runCount = 0; + if (BufferUtil.isBackedBySimpleArray(arr.content)) { + short[] a = arr.content.array(); + for (int i = 0; i < arr.cardinality; i++) { + int curVal = BufferUtil.toIntUnsigned(a[i]); + if (curVal == prevVal + 1) { + ++runLen; + } else { + if (runCount > 0) { + vl[2 * (runCount - 1) + 1] = (short) runLen; + } + // setLength(runCount - 1, (short) runLen); + vl[2 * runCount] = (short) curVal; + // setValue(runCount, (short) curVal); + runLen = 0; + ++runCount; + } + prevVal = curVal; + } + + } else { + for (int i = 0; i < arr.cardinality; i++) { + int curVal = BufferUtil.toIntUnsigned(arr.content.get(i)); + if (curVal == prevVal + 1) { + ++runLen; + } else { + if (runCount > 0) { + vl[2 * (runCount - 1) + 1] = (short) runLen; + } + // setLength(runCount - 1, (short) runLen); + vl[2 * runCount] = (short) curVal; + // setValue(runCount, (short) curVal); + runLen = 0; + ++runCount; + } + prevVal = curVal; + } + } + // setLength(runCount-1, (short) runLen); + vl[2 * (runCount - 1) + 1] = (short) runLen; + } + + // convert a bitmap container to a run container somewhat efficiently. + protected MappeableRunContainer(MappeableBitmapContainer bc, int nbrRuns) { + this.nbrruns = nbrRuns; + valueslength = ShortBuffer.allocate(2 * nbrRuns); + if (!BufferUtil.isBackedBySimpleArray(valueslength)) { + throw new RuntimeException("Unexpected internal error."); + } + short[] vl = valueslength.array(); + if (nbrRuns == 0) { + return; + } + if (bc.isArrayBacked()) { + long[] b = bc.bitmap.array(); + int longCtr = 0; // index of current long in bitmap + long curWord = b[0]; // its value + int runCount = 0; + final int len = bc.bitmap.limit(); + while (true) { + // potentially multiword advance to first 1 bit + while (curWord == 0L && longCtr < len - 1) { + curWord = b[++longCtr]; + } + + if (curWord == 0L) { + // wrap up, no more runs + return; + } + int localRunStart = Long.numberOfTrailingZeros(curWord); + int runStart = localRunStart + 64 * longCtr; + // stuff 1s into number's LSBs + long curWordWith1s = curWord | (curWord - 1); + + // find the next 0, potentially in a later word + int runEnd = 0; + while (curWordWith1s == -1L && longCtr < len - 1) { + curWordWith1s = b[++longCtr]; + } + + if (curWordWith1s == -1L) { + // a final unterminated run of 1s (32 of them) + runEnd = 64 + longCtr * 64; + // setValue(runCount, (short) runStart); + vl[2 * runCount] = (short) runStart; + // setLength(runCount, (short) (runEnd-runStart-1)); + vl[2 * runCount + 1] = (short) (runEnd - runStart - 1); + return; + } + int localRunEnd = Long.numberOfTrailingZeros(~curWordWith1s); + runEnd = localRunEnd + longCtr * 64; + // setValue(runCount, (short) runStart); + vl[2 * runCount] = (short) runStart; + // setLength(runCount, (short) (runEnd-runStart-1)); + vl[2 * runCount + 1] = (short) (runEnd - runStart - 1); + runCount++; + // now, zero out everything right of runEnd. + curWord = curWordWith1s & (curWordWith1s + 1); + // We've lathered and rinsed, so repeat... + } + } else { + int longCtr = 0; // index of current long in bitmap + long curWord = bc.bitmap.get(0); // its value + int runCount = 0; + final int len = bc.bitmap.limit(); + while (true) { + // potentially multiword advance to first 1 bit + while (curWord == 0L && longCtr < len - 1) { + curWord = bc.bitmap.get(++longCtr); + } + + if (curWord == 0L) { + // wrap up, no more runs + return; + } + int localRunStart = Long.numberOfTrailingZeros(curWord); + int runStart = localRunStart + 64 * longCtr; + // stuff 1s into number's LSBs + long curWordWith1s = curWord | (curWord - 1); + + // find the next 0, potentially in a later word + int runEnd = 0; + while (curWordWith1s == -1L && longCtr < len - 1) { + curWordWith1s = bc.bitmap.get(++longCtr); + } + + if (curWordWith1s == -1L) { + // a final unterminated run of 1s (32 of them) + runEnd = 64 + longCtr * 64; + // setValue(runCount, (short) runStart); + vl[2 * runCount] = (short) runStart; + // setLength(runCount, (short) (runEnd-runStart-1)); + vl[2 * runCount + 1] = (short) (runEnd - runStart - 1); + return; + } + int localRunEnd = Long.numberOfTrailingZeros(~curWordWith1s); + runEnd = localRunEnd + longCtr * 64; + // setValue(runCount, (short) runStart); + vl[2 * runCount] = (short) runStart; + // setLength(runCount, (short) (runEnd-runStart-1)); + vl[2 * runCount + 1] = (short) (runEnd - runStart - 1); + runCount++; + // now, zero out everything right of runEnd. + + curWord = curWordWith1s & (curWordWith1s + 1); + // We've lathered and rinsed, so repeat... + } + + } + } + + /** + * Creates a new container from a non-mappeable one. This copies the data. + * + * @param bc the original container + */ + public MappeableRunContainer(RunContainer bc) { + this.nbrruns = bc.numberOfRuns(); + this.valueslength = bc.toShortBuffer(); + } + + + /** + * Construct a new RunContainer backed by the provided ShortBuffer. Note that if you modify the + * RunContainer a new ShortBuffer may be produced. + * + * @param array ShortBuffer where the data is stored + * @param numRuns number of runs (each using 2 shorts in the buffer) + */ + public MappeableRunContainer(final ShortBuffer array, final int numRuns) { + if (array.limit() < 2 * numRuns) { + throw new RuntimeException("Mismatch between buffer and numRuns"); + } + this.nbrruns = numRuns; + this.valueslength = array; + } + + private static int branchyBufferedUnsignedInterleavedBinarySearch(final ShortBuffer sb, + final int begin, final int end, final short k) { + int ikey = BufferUtil.toIntUnsigned(k); + int low = begin; + int high = end - 1; + while (low <= high) { + final int middleIndex = (low + high) >>> 1; + final int middleValue = BufferUtil.toIntUnsigned(sb.get(2 * middleIndex)); + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); + } + + private static int bufferedUnsignedInterleavedBinarySearch(final ShortBuffer sb, final int begin, + final int end, final short k) { + return branchyBufferedUnsignedInterleavedBinarySearch(sb, begin, end, k); + } + + protected static int getArraySizeInBytes(int nbrruns) { + return 2 + 4 * nbrruns; + } + + static short getLength(short[] vl, int index) { + return vl[2 * index + 1]; + } + + static short getValue(short[] vl, int index) { + return vl[2 * index]; + } + + protected static int serializedSizeInBytes(int numberOfRuns) { + return 2 + 2 * 2 * numberOfRuns; // each run requires 2 2-byte entries. + } + + @Override + public MappeableContainer add(int begin, int end) { + MappeableRunContainer rc = (MappeableRunContainer) clone(); + return rc.iadd(begin, end); + } + + @Override + // not thread-safe + public MappeableContainer add(short k) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).add(k) + int index = bufferedUnsignedInterleavedBinarySearch(valueslength, 0, nbrruns, k); + if (index >= 0) { + return this;// already there + } + index = -index - 2;// points to preceding value, possibly -1 + if (index >= 0) {// possible match + int offset = BufferUtil.toIntUnsigned(k) - BufferUtil.toIntUnsigned(getValue(index)); + int le = BufferUtil.toIntUnsigned(getLength(index)); + if (offset <= le) { + return this; + } + if (offset == le + 1) { + // we may need to fuse + if (index + 1 < nbrruns) { + if (BufferUtil.toIntUnsigned(getValue(index + 1)) == BufferUtil.toIntUnsigned(k) + 1) { + // indeed fusion is needed + setLength(index, + (short) (getValue(index + 1) + getLength(index + 1) - getValue(index))); + recoverRoomAtIndex(index + 1); + return this; + } + } + incrementLength(index); + return this; + } + if (index + 1 < nbrruns) { + // we may need to fuse + if (BufferUtil.toIntUnsigned(getValue(index + 1)) == BufferUtil.toIntUnsigned(k) + 1) { + // indeed fusion is needed + setValue(index + 1, k); + setLength(index + 1, (short) (getLength(index + 1) + 1)); + return this; + } + } + } + if (index == -1) { + // we may need to extend the first run + if (0 < nbrruns) { + if (getValue(0) == k + 1) { + incrementLength(0); + decrementValue(0); + return this; + } + } + } + makeRoomAtIndex(index + 1); + setValue(index + 1, k); + setLength(index + 1, (short) 0); + return this; + } + + + @Override + public MappeableContainer and(MappeableArrayContainer x) { + MappeableArrayContainer ac = new MappeableArrayContainer(x.cardinality); + if (this.nbrruns == 0) { + return ac; + } + int rlepos = 0; + int arraypos = 0; + + int rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = BufferUtil.toIntUnsigned(x.content.get(arraypos)); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return ac;// we are done + } + rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = + BufferUtil.advanceUntil(x.content, arraypos, x.cardinality, (short) rleval); + } else { + ac.content.put(ac.cardinality, (short) arrayval); + ac.cardinality++; + arraypos++; + } + } + return ac; + } + + @Override + public MappeableContainer and(MappeableBitmapContainer x) { + int card = this.getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // result can only be an array (assuming that we never make a RunContainer) + if (card > x.cardinality) { + card = x.cardinality; + } + MappeableArrayContainer answer = new MappeableArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) { + answer.content.put(answer.cardinality++, (short) runValue); + } + } + } + return answer; + } + // we expect the answer to be a bitmap (if we are lucky) + + MappeableBitmapContainer answer = x.clone(); + int start = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int end = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + BufferUtil.resetBitmapRange(answer.bitmap, start, end); + start = end + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + BufferUtil.resetBitmapRange(answer.bitmap, start, BufferUtil.maxLowBitAsInteger() + 1); + answer.computeCardinality(); + if (answer.getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + + } + + @Override + public MappeableContainer and(MappeableRunContainer x) { + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.nbrruns)), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + int xrlepos = 0; + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + // exit the first run + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + final int lateststart = start > xstart ? start : xstart; + int earliestend; + if (end == xend) {// improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + + } else {// end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } + vl[2 * answer.nbrruns] = (short) lateststart; + vl[2 * answer.nbrruns + 1] = (short) (earliestend - lateststart - 1); + answer.nbrruns++; + } + } + return answer; + } + + @Override + public MappeableContainer andNot(MappeableArrayContainer x) { + // when x is small, we guess that the result will still be a run container + final int arbitrary_threshold = 32; // this is arbitrary + if (x.getCardinality() < arbitrary_threshold) { + return lazyandNot(x).toEfficientContainer(); + } + // otherwise we generate either an array or bitmap container + final int card = getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // if the cardinality is small, we construct the solution in place + MappeableArrayContainer ac = new MappeableArrayContainer(card); + ac.cardinality = Util.unsignedDifference(this.getShortIterator(), + x.getShortIterator(), ac.content.array()); + return ac; + } + // otherwise, we generate a bitmap + return toBitmapOrArrayContainer(card).iandNot(x); + } + + @Override + public MappeableContainer andNot(MappeableBitmapContainer x) { + int card = this.getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // result can only be an array (assuming that we never make a RunContainer) + MappeableArrayContainer answer = new MappeableArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (!x.contains((short) runValue)) { + answer.content.put(answer.cardinality++, (short) runValue); + } + } + } + return answer; + } + // we expect the answer to be a bitmap (if we are lucky) + MappeableBitmapContainer answer = x.clone(); + int lastPos = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.resetBitmapRange(answer.bitmap, lastPos, start); + BufferUtil.flipBitmapRange(answer.bitmap, start, end); + lastPos = end; + } + BufferUtil.resetBitmapRange(answer.bitmap, lastPos, answer.bitmap.capacity() * 64); + answer.computeCardinality(); + if (answer.getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public MappeableContainer andNot(MappeableRunContainer x) { + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.nbrruns)), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + int xrlepos = 0; + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + // output the first run + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else { + if (start < xstart) { + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (xstart - start - 1); + answer.nbrruns++; + } + if (xend < end) { + start = xend; + } else { + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } + } + } + if (rlepos < this.nbrruns) { + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + for (; rlepos < this.nbrruns; ++rlepos) { + vl[2 * answer.nbrruns] = this.valueslength.get(2 * rlepos); + vl[2 * answer.nbrruns + 1] = this.valueslength.get(2 * rlepos + 1); + answer.nbrruns++; + } + // next bit would be faster but not thread-safe because of the "position" + // if(rlepos < this.nbrruns) { + // this.valueslength.position(2 * rlepos); + // this.valueslength.get(vl, 2 * answer.nbrruns, 2*(this.nbrruns-rlepos )); + // answer.nbrruns = answer.nbrruns + this.nbrruns - rlepos; + // } + } + return answer; + } + + // Append a value length with all values until a given value + private void appendValueLength(int value, int index) { + int previousValue = BufferUtil.toIntUnsigned(getValue(index)); + int length = BufferUtil.toIntUnsigned(getLength(index)); + int offset = value - previousValue; + if (offset > length) { + setLength(index, (short) offset); + } + } + + + // To check if a value length can be prepended with a given value + private boolean canPrependValueLength(int value, int index) { + if (index < this.nbrruns) { + int nextValue = BufferUtil.toIntUnsigned(getValue(index)); + if (nextValue == value + 1) { + return true; + } + } + return false; + } + + + @Override + public void clear() { + nbrruns = 0; + } + + + @Override + public MappeableContainer clone() { + return new MappeableRunContainer(nbrruns, valueslength); + } + + + // To set the last value of a value length + private void closeValueLength(int value, int index) { + int initialValue = BufferUtil.toIntUnsigned(getValue(index)); + setLength(index, (short) (value - initialValue)); + } + + @Override + public boolean contains(short x) { + int index = bufferedUnsignedInterleavedBinarySearch(valueslength, 0, nbrruns, x); + if (index >= 0) { + return true; + } + index = -index - 2; // points to preceding value, possibly -1 + if (index != -1) {// possible match + int offset = BufferUtil.toIntUnsigned(x) - BufferUtil.toIntUnsigned(getValue(index)); + int le = BufferUtil.toIntUnsigned(getLength(index)); + if (offset <= le) { + return true; + } + } + return false; + } + + // a very cheap check... if you have more than 4096, then you should use a bitmap container. + // this function avoids computing the cardinality + private MappeableContainer convertToLazyBitmapIfNeeded() { + // when nbrruns exceed MappeableArrayContainer.DEFAULT_MAX_SIZE, then we know it should be + // stored as a bitmap, always + if (this.nbrruns > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + MappeableBitmapContainer answer = new MappeableBitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = -1; + return answer; + } + return this; + } + + // Push all values length to the end of the array (resize array if needed) + private void copyToOffset(int offset) { + final int minCapacity = 2 * (offset + nbrruns); + if (valueslength.capacity() < minCapacity) { + // expensive case where we need to reallocate + int newCapacity = valueslength.capacity(); + while (newCapacity < minCapacity) { + newCapacity = (newCapacity == 0) ? DEFAULT_INIT_SIZE + : newCapacity < 64 ? newCapacity * 2 + : newCapacity < 1024 ? newCapacity * 3 / 2 : newCapacity * 5 / 4; + } + ShortBuffer newvalueslength = ShortBuffer.allocate(newCapacity); + copyValuesLength(this.valueslength, 0, newvalueslength, offset, nbrruns); + this.valueslength = newvalueslength; + } else { + // efficient case where we just copy + copyValuesLength(this.valueslength, 0, this.valueslength, offset, nbrruns); + } + } + + private void copyValuesLength(ShortBuffer src, int srcIndex, ShortBuffer dst, int dstIndex, + int length) { + if (BufferUtil.isBackedBySimpleArray(src) && BufferUtil.isBackedBySimpleArray(dst)) { + // common case. + System.arraycopy(src.array(), 2 * srcIndex, dst.array(), 2 * dstIndex, 2 * length); + return; + } + // source and destination may overlap + // consider specialized code for various cases, rather than using a second buffer + ShortBuffer temp = ShortBuffer.allocate(2 * length); + for (int i = 0; i < 2 * length; ++i) { + temp.put(src.get(2 * srcIndex + i)); + } + temp.flip(); + for (int i = 0; i < 2 * length; ++i) { + dst.put(2 * dstIndex + i, temp.get()); + } + } + + private void decrementLength(int index) { + // caller is responsible to ensure that value is non-zero + valueslength.put(2 * index + 1, (short) (valueslength.get(2 * index + 1) - 1)); + } + + + private void decrementValue(int index) { + valueslength.put(2 * index, (short) (valueslength.get(2 * index) - 1)); + } + + // not thread safe! + // not actually used anywhere, but potentially useful + protected void ensureCapacity(int minNbRuns) { + final int minCapacity = 2 * minNbRuns; + if (valueslength.capacity() < minCapacity) { + int newCapacity = valueslength.capacity(); + while (newCapacity < minCapacity) { + newCapacity = (newCapacity == 0) ? DEFAULT_INIT_SIZE + : newCapacity < 64 ? newCapacity * 2 + : newCapacity < 1024 ? newCapacity * 3 / 2 : newCapacity * 5 / 4; + } + final ShortBuffer nv = ShortBuffer.allocate(newCapacity); + valueslength.rewind(); + nv.put(valueslength); + valueslength = nv; + } + } + + @Override + public boolean equals(Object o) { + if (o instanceof MappeableRunContainer) { + MappeableRunContainer srb = (MappeableRunContainer) o; + if (srb.nbrruns != this.nbrruns) { + return false; + } + for (int i = 0; i < nbrruns; ++i) { + if (this.getValue(i) != srb.getValue(i)) { + return false; + } + if (this.getLength(i) != srb.getLength(i)) { + return false; + } + } + return true; + } else if (o instanceof MappeableContainer) { + if (((MappeableContainer) o).getCardinality() != this.getCardinality()) { + return false; // should be a frequent branch if they differ + } + // next bit could be optimized if needed: + ShortIterator me = this.getShortIterator(); + ShortIterator you = ((MappeableContainer) o).getShortIterator(); + while (me.hasNext()) { + if (me.next() != you.next()) { + return false; + } + } + return true; + } + return false; + } + + @Override + public void fillLeastSignificant16bits(int[] x, int i, int mask) { + int pos = i; + for (int k = 0; k < this.nbrruns; ++k) { + final int limit = BufferUtil.toIntUnsigned(this.getLength(k)); + final int base = BufferUtil.toIntUnsigned(this.getValue(k)); + for (int le = 0; le <= limit; ++le) { + x[pos++] = (base + le) | mask; + } + } + } + + + @Override + public MappeableContainer flip(short x) { + if (this.contains(x)) { + return this.remove(x); + } else { + return this.add(x); + } + } + + @Override + protected int getArraySizeInBytes() { + return 2 + 4 * this.nbrruns; // "array" includes its size + } + + @Override + public int getCardinality() { + int sum = nbrruns; // lengths are stored -1 + if (isArrayBacked()) { + short[] vl = valueslength.array(); + for (int k = 0; k < nbrruns; ++k) { + sum = sum + BufferUtil.toIntUnsigned(vl[2 * k + 1])/* + 1 */; + } + } else { + for (int k = 0; k < nbrruns; ++k) { + sum = sum + BufferUtil.toIntUnsigned(getLength(k))/* + 1 */; + } + } + return sum; + } + + short getLength(int index) { + return valueslength.get(2 * index + 1); + } + + @Override + public ShortIterator getReverseShortIterator() { + if (isArrayBacked()) { + return new RawReverseMappeableRunContainerShortIterator(this); + } + return new ReverseMappeableRunContainerShortIterator(this); + } + + @Override + public PeekableShortIterator getShortIterator() { + if (isArrayBacked()) { + return new RawMappeableRunContainerShortIterator(this); + } + return new MappeableRunContainerShortIterator(this); + } + + @Override + public int getSizeInBytes() { + return this.nbrruns * 4 + 4; // not sure about how exact it will be + } + + short getValue(int index) { + return valueslength.get(2 * index); + } + + @Override + public int hashCode() { + int hash = 0; + for (int k = 0; k < nbrruns * 2; ++k) { + hash += 31 * hash + valueslength.get(k); + } + return hash; + } + + @Override + // not thread-safe + public MappeableContainer iadd(int begin, int end) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).iadd(begin,end) + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + if (begin == end - 1) { + add((short) begin); + return this; + } + + int bIndex = + bufferedUnsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) begin); + int eIndex = bufferedUnsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, + (short) (end - 1)); + + if (bIndex >= 0 && eIndex >= 0) { + mergeValuesLength(bIndex, eIndex); + return this; + + } else if (bIndex >= 0 && eIndex < 0) { + eIndex = -eIndex - 2; + + if (canPrependValueLength(end - 1, eIndex + 1)) { + mergeValuesLength(bIndex, eIndex + 1); + return this; + } + + appendValueLength(end - 1, eIndex); + mergeValuesLength(bIndex, eIndex); + return this; + + } else if (bIndex < 0 && eIndex >= 0) { + bIndex = -bIndex - 2; + + if (bIndex >= 0) { + if (valueLengthContains(begin - 1, bIndex)) { + mergeValuesLength(bIndex, eIndex); + return this; + } + } + prependValueLength(begin, bIndex + 1); + mergeValuesLength(bIndex + 1, eIndex); + return this; + + } else { + bIndex = -bIndex - 2; + eIndex = -eIndex - 2; + + if (eIndex >= 0) { + if (bIndex >= 0) { + if (!valueLengthContains(begin - 1, bIndex)) { + if (bIndex == eIndex) { + if (canPrependValueLength(end - 1, eIndex + 1)) { + prependValueLength(begin, eIndex + 1); + return this; + } + makeRoomAtIndex(eIndex + 1); + setValue(eIndex + 1, (short) begin); + setLength(eIndex + 1, (short) (end - 1 - begin)); + return this; + + } else { + bIndex++; + prependValueLength(begin, bIndex); + } + } + } else { + bIndex = 0; + prependValueLength(begin, bIndex); + } + + if (canPrependValueLength(end - 1, eIndex + 1)) { + mergeValuesLength(bIndex, eIndex + 1); + return this; + } + + appendValueLength(end - 1, eIndex); + mergeValuesLength(bIndex, eIndex); + return this; + + } else { + if (canPrependValueLength(end - 1, 0)) { + prependValueLength(begin, 0); + } else { + makeRoomAtIndex(0); + setValue(0, (short) begin); + setLength(0, (short) (end - 1 - begin)); + } + return this; + } + } + } + + + @Override + public MappeableContainer iand(MappeableArrayContainer x) { + return and(x); + } + + @Override + public MappeableContainer iand(MappeableBitmapContainer x) { + return and(x); + } + + + @Override + public MappeableContainer iand(MappeableRunContainer x) { + return and(x); + } + + @Override + public MappeableContainer iandNot(MappeableArrayContainer x) { + return andNot(x); + } + + @Override + public MappeableContainer iandNot(MappeableBitmapContainer x) { + return andNot(x); + } + + @Override + public MappeableContainer iandNot(MappeableRunContainer x) { + return andNot(x); + } + + protected MappeableContainer ilazyor(MappeableArrayContainer x) { + if (isFull()) { + return this; // this can sometimes solve a lot of computation! + } + return ilazyorToRun(x); + } + + + private MappeableContainer ilazyorToRun(MappeableArrayContainer x) { + if (isFull()) { + return this.clone(); + } + final int nbrruns = this.nbrruns; + final int offset = Math.max(nbrruns, x.getCardinality()); + copyToOffset(offset); + short[] vl = valueslength.array(); + int rlepos = 0; + this.nbrruns = 0; + PeekableShortIterator i = (PeekableShortIterator) x.getShortIterator(); + while (i.hasNext() && (rlepos < nbrruns)) { + if (BufferUtil.compareUnsigned(getValue(vl, rlepos + offset), i.peekNext()) <= 0) { + smartAppend(vl, getValue(vl, rlepos + offset), getLength(vl, rlepos + offset)); + rlepos++; + } else { + smartAppend(vl, i.next()); + } + } + if (i.hasNext()) { + /* + * if(this.nbrruns>0) { // this might be useful if the run container has just one very large + * run int lastval = BufferUtil.toIntUnsigned(getValue(vl,nbrruns + offset - 1)) + + * BufferUtil.toIntUnsigned(getLength(vl,nbrruns + offset - 1)) + 1; i.advanceIfNeeded((short) + * lastval); } + */ + while (i.hasNext()) { + smartAppend(vl, i.next()); + } + } else { + while (rlepos < nbrruns) { + smartAppend(vl, getValue(vl, rlepos + offset), getLength(vl, rlepos + offset)); + rlepos++; + } + } + return convertToLazyBitmapIfNeeded(); + } + + // not thread safe! + private void increaseCapacity() { + int newCapacity = (valueslength.capacity() == 0) ? DEFAULT_INIT_SIZE + : valueslength.capacity() < 64 ? valueslength.capacity() * 2 + : valueslength.capacity() < 1024 ? valueslength.capacity() * 3 / 2 + : valueslength.capacity() * 5 / 4; + + final ShortBuffer nv = ShortBuffer.allocate(newCapacity); + valueslength.rewind(); + nv.put(valueslength); + valueslength = nv; + } + + private void incrementLength(int index) { + valueslength.put(2 * index + 1, (short) (1 + valueslength.get(2 * index + 1))); + } + + private void incrementValue(int index) { + valueslength.put(2 * index, (short) (1 + valueslength.get(2 * index))); + } + + // To set the first value of a value length + private void initValueLength(int value, int index) { + int initialValue = BufferUtil.toIntUnsigned(getValue(index)); + int length = BufferUtil.toIntUnsigned(getLength(index)); + setValue(index, (short) (value)); + setLength(index, (short) (length - (value - initialValue))); + } + + + @Override + public MappeableContainer inot(int rangeStart, int rangeEnd) { + if (rangeEnd <= rangeStart) { + return this; + } + short[] vl = this.valueslength.array(); + + // TODO: write special case code for rangeStart=0; rangeEnd=65535 + // a "sliding" effect where each range records the gap adjacent it + // can probably be quite fast. Probably have 2 cases: start with a + // 0 run vs start with a 1 run. If you both start and end with 0s, + // you will require room for expansion. + + // the +1 below is needed in case the valueslength.length is odd + if (vl.length <= 2 * nbrruns + 1) { + // no room for expansion + // analyze whether this is a case that will require expansion (that we cannot do) + // this is a bit costly now (4 "contains" checks) + + boolean lastValueBeforeRange = false; + boolean firstValueInRange = false; + boolean lastValueInRange = false; + boolean firstValuePastRange = false; + + // contains is based on a binary search and is hopefully fairly fast. + // however, one binary search could *usually* suffice to find both + // lastValueBeforeRange AND firstValueInRange. ditto for + // lastVaueInRange and firstValuePastRange + + // find the start of the range + if (rangeStart > 0) { + lastValueBeforeRange = contains((short) (rangeStart - 1)); + } + firstValueInRange = contains((short) rangeStart); + + if (lastValueBeforeRange == firstValueInRange) { + // expansion is required if also lastValueInRange==firstValuePastRange + + // tougher to optimize out, but possible. + lastValueInRange = contains((short) (rangeEnd - 1)); + if (rangeEnd != 65536) { + firstValuePastRange = contains((short) rangeEnd); + } + + // there is definitely one more run after the operation. + if (lastValueInRange == firstValuePastRange) { + return not(rangeStart, rangeEnd); // can't do in-place: true space limit + } + } + } + // either no expansion required, or we have room to handle any required expansion for it. + + // remaining code is just a minor variation on not() + int myNbrRuns = nbrruns; + + MappeableRunContainer ans = this; // copy on top of self. + int k = 0; + ans.nbrruns = 0; // losing this.nbrruns, which is stashed in myNbrRuns. + + // could try using unsignedInterleavedBinarySearch(valueslength, 0, nbrruns, rangeStart) instead + // of sequential scan + // to find the starting location + + for (; k < myNbrRuns && BufferUtil.toIntUnsigned(this.getValue(k)) < rangeStart; ++k) { + // since it is atop self, there is no copying needed + // ans.valueslength[2 * k] = this.valueslength[2 * k]; + // ans.valueslength[2 * k + 1] = this.valueslength[2 * k + 1]; + ans.nbrruns++; + } + // We will work left to right, with a read pointer that always stays + // left of the write pointer. However, we need to give the read pointer a head start. + // use local variables so we are always reading 1 location ahead. + + short bufferedValue = 0, bufferedLength = 0; // 65535 start and 65535 length would be illegal, + // could use as sentinel + short nextValue = 0, nextLength = 0; + if (k < myNbrRuns) { // prime the readahead variables + bufferedValue = vl[2 * k];// getValue(k); + bufferedLength = vl[2 * k + 1];// getLength(k); + } + + ans.smartAppendExclusive(vl, (short) rangeStart, (short) (rangeEnd - rangeStart - 1)); + + for (; k < myNbrRuns; ++k) { + if (ans.nbrruns > k + 1) { + throw new RuntimeException( + "internal error in inot, writer has overtaken reader!! " + k + " " + ans.nbrruns); + } + if (k + 1 < myNbrRuns) { + nextValue = vl[2 * (k + 1)];// getValue(k+1); // readahead for next iteration + nextLength = vl[2 * (k + 1) + 1];// getLength(k+1); + } + ans.smartAppendExclusive(vl, bufferedValue, bufferedLength); + bufferedValue = nextValue; + bufferedLength = nextLength; + } + // the number of runs can increase by one, meaning (rarely) a bitmap will become better + // or the cardinality can decrease by a lot, making an array better + return ans.toEfficientContainer(); + } + + @Override + public boolean intersects(MappeableArrayContainer x) { + if (this.nbrruns == 0) { + return false; + } + int rlepos = 0; + int arraypos = 0; + + int rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = BufferUtil.toIntUnsigned(x.content.get(arraypos)); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return false; + } + rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = + BufferUtil.advanceUntil(x.content, arraypos, x.cardinality, this.getValue(rlepos)); + } else { + return true; + } + } + return false; + } + + @Override + public boolean intersects(MappeableBitmapContainer x) { + // possibly inefficient + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) { + return true; + } + } + } + return false; + } + + @Override + public boolean intersects(MappeableRunContainer x) { + int rlepos = 0; + int xrlepos = 0; + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + // exit the first run + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + return true; + } + } + return false; + } + + @Override + public MappeableContainer ior(MappeableArrayContainer x) { + if (isFull()) { + return this; + } + final int nbrruns = this.nbrruns; + final int offset = Math.max(nbrruns, x.getCardinality()); + copyToOffset(offset); + short[] vl = this.valueslength.array(); + int rlepos = 0; + this.nbrruns = 0; + PeekableShortIterator i = (PeekableShortIterator) x.getShortIterator(); + while (i.hasNext() && (rlepos < nbrruns)) { + if (BufferUtil.compareUnsigned(getValue(vl, rlepos + offset), i.peekNext()) <= 0) { + smartAppend(vl, getValue(vl, rlepos + offset), getLength(vl, rlepos + offset)); + rlepos++; + } else { + smartAppend(vl, i.next()); + } + } + if (i.hasNext()) { + /* + * if(this.nbrruns>0) { // this might be useful if the run container has just one very large + * run int lastval = BufferUtil.toIntUnsigned(getValue(nbrruns + offset - 1)) + + * BufferUtil.toIntUnsigned(getLength(nbrruns + offset - 1)) + 1; i.advanceIfNeeded((short) + * lastval); } + */ + while (i.hasNext()) { + smartAppend(vl, i.next()); + } + } else { + while (rlepos < nbrruns) { + smartAppend(vl, getValue(vl, rlepos + offset), getLength(vl, rlepos + offset)); + rlepos++; + } + } + return toEfficientContainer(); + } + + @Override + public MappeableContainer ior(MappeableBitmapContainer x) { + if (isFull()) { + return this; + } + return or(x); + } + + @Override + public MappeableContainer ior(MappeableRunContainer x) { + if (isFull()) { + return this; + } + + final int nbrruns = this.nbrruns; + final int xnbrruns = x.nbrruns; + final int offset = Math.max(nbrruns, xnbrruns); + + // Push all values length to the end of the array (resize array if needed) + copyToOffset(offset); + + // Aggregate and store the result at the beginning of the array + this.nbrruns = 0; + int rlepos = 0; + int xrlepos = 0; + short[] vl = this.valueslength.array(); + + // Add values length (smaller first) + while ((rlepos < nbrruns) && (xrlepos < xnbrruns)) { + final short value = getValue(vl, offset + rlepos); + final short xvalue = x.getValue(xrlepos); + final short length = getLength(vl, offset + rlepos); + final short xlength = x.getLength(xrlepos); + + if (BufferUtil.compareUnsigned(value, xvalue) <= 0) { + this.smartAppend(vl, value, length); + ++rlepos; + } else { + this.smartAppend(vl, xvalue, xlength); + ++xrlepos; + } + } + while (rlepos < nbrruns) { + this.smartAppend(vl, getValue(vl, offset + rlepos), getLength(vl, offset + rlepos)); + ++rlepos; + } + while (xrlepos < xnbrruns) { + this.smartAppend(vl, x.getValue(xrlepos), x.getLength(xrlepos)); + ++xrlepos; + } + return this.toBitmapIfNeeded(); + } + + @Override + // not thread-safe + public MappeableContainer iremove(int begin, int end) { + // TODO: it might be better and simpler to do return + // toBitmapOrArrayContainer(getCardinality()).iremove(begin,end) + if (end == begin) { + return this; + } + if ((begin > end) || (end > (1 << 16))) { + throw new IllegalArgumentException("Invalid range [" + begin + "," + end + ")"); + } + if (begin == end - 1) { + remove((short) begin); + return this; + } + + int bIndex = + bufferedUnsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, (short) begin); + int eIndex = bufferedUnsignedInterleavedBinarySearch(this.valueslength, 0, this.nbrruns, + (short) (end - 1)); + + if (bIndex >= 0) { + if (eIndex < 0) { + eIndex = -eIndex - 2; + } + + if (valueLengthContains(end, eIndex)) { + initValueLength(end, eIndex); + recoverRoomsInRange(bIndex - 1, eIndex - 1); + } else { + recoverRoomsInRange(bIndex - 1, eIndex); + } + + } else if (bIndex < 0 && eIndex >= 0) { + bIndex = -bIndex - 2; + + if (bIndex >= 0) { + if (valueLengthContains(begin, bIndex)) { + closeValueLength(begin - 1, bIndex); + } + } + // last run is one shorter + if (getLength(eIndex) == 0) {// special case where we remove last run + recoverRoomsInRange(eIndex, eIndex + 1); + } else { + incrementValue(eIndex); + decrementLength(eIndex); + } + recoverRoomsInRange(bIndex, eIndex - 1); + + } else { + bIndex = -bIndex - 2; + eIndex = -eIndex - 2; + + if (eIndex >= 0) { + if (bIndex >= 0) { + if (bIndex == eIndex) { + if (valueLengthContains(begin, bIndex)) { + if (valueLengthContains(end, eIndex)) { + makeRoomAtIndex(bIndex); + closeValueLength(begin - 1, bIndex); + initValueLength(end, bIndex + 1); + return this; + } + closeValueLength(begin - 1, bIndex); + } + } else { + if (valueLengthContains(begin, bIndex)) { + closeValueLength(begin - 1, bIndex); + } + if (valueLengthContains(end, eIndex)) { + initValueLength(end, eIndex); + eIndex--; + } + recoverRoomsInRange(bIndex, eIndex); + } + + } else { + if (valueLengthContains(end, eIndex)) { // was end-1 + initValueLength(end, eIndex); + recoverRoomsInRange(bIndex, eIndex - 1); + } else { + recoverRoomsInRange(bIndex, eIndex); + } + } + + } + + } + return this; + } + + @Override + protected boolean isArrayBacked() { + return BufferUtil.isBackedBySimpleArray(this.valueslength); + } + + protected boolean isFull() { + return (this.nbrruns == 1) && (this.getValue(0) == 0) && (this.getLength(0) == -1); + } + + @Override + public Iterator iterator() { + final ShortIterator i = getShortIterator(); + return new Iterator() { + + @Override + public boolean hasNext() { + return i.hasNext(); + } + + @Override + public Short next() { + return i.next(); + } + + @Override + public void remove() { + i.remove(); + } + }; + + } + + @Override + public MappeableContainer ixor(MappeableArrayContainer x) { + return xor(x); + } + + + @Override + public MappeableContainer ixor(MappeableBitmapContainer x) { + return xor(x); + } + + @Override + public MappeableContainer ixor(MappeableRunContainer x) { + return xor(x); + } + + private MappeableRunContainer lazyandNot(MappeableArrayContainer x) { + if (x.getCardinality() == 0) { + return this; + } + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.cardinality)), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + int xrlepos = 0; + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = BufferUtil.toIntUnsigned(x.content.get(xrlepos)); + while ((rlepos < this.nbrruns) && (xrlepos < x.cardinality)) { + if (end <= xstart) { + // output the first run + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xstart + 1 <= start) { + // exit the second run + xrlepos++; + if (xrlepos < x.cardinality) { + xstart = BufferUtil.toIntUnsigned(x.content.get(xrlepos)); + } + } else { + if (start < xstart) { + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (xstart - start - 1); + answer.nbrruns++; + } + if (xstart + 1 < end) { + start = xstart + 1; + } else { + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } + } + } + if (rlepos < this.nbrruns) { + vl[2 * answer.nbrruns] = (short) start; + vl[2 * answer.nbrruns + 1] = (short) (end - start - 1); + answer.nbrruns++; + rlepos++; + for (; rlepos < this.nbrruns; ++rlepos) { + vl[2 * answer.nbrruns] = this.valueslength.get(2 * rlepos); + vl[2 * answer.nbrruns + 1] = this.valueslength.get(2 * rlepos + 1); + answer.nbrruns++; + } + // next bit would be faster, but not thread-safe because of the "position" + // if(rlepos < this.nbrruns) { + // this.valueslength.position(2 * rlepos); + // this.valueslength.get(vl, 2 * answer.nbrruns, 2*(this.nbrruns-rlepos )); + // answer.nbrruns = answer.nbrruns + this.nbrruns - rlepos; + // } + } + return answer; + } + + + protected MappeableContainer lazyor(MappeableArrayContainer x) { + return lazyorToRun(x); + } + + private MappeableContainer lazyorToRun(MappeableArrayContainer x) { + if (isFull()) { + return this.clone(); + } + // TODO: should optimize for the frequent case where we have a single run + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.getCardinality())), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + PeekableShortIterator i = (PeekableShortIterator) x.getShortIterator(); + + while ((rlepos < this.nbrruns) && i.hasNext()) { + if (BufferUtil.compareUnsigned(getValue(rlepos), i.peekNext()) <= 0) { + answer.smartAppend(vl, getValue(rlepos), getLength(rlepos)); + // could call i.advanceIfNeeded(minval); + rlepos++; + } else { + answer.smartAppend(vl, i.next()); + } + } + if (i.hasNext()) { + /* + * if(answer.nbrruns>0) { // this might be useful if the run container has just one very large + * run int lastval = BufferUtil.toIntUnsigned(answer.getValue(answer.nbrruns - 1)) + + * BufferUtil.toIntUnsigned(answer.getLength(answer.nbrruns - 1)) + 1; + * i.advanceIfNeeded((short) lastval); } + */ + while (i.hasNext()) { + answer.smartAppend(vl, i.next()); + } + } else { + + while (rlepos < this.nbrruns) { + answer.smartAppend(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + } + } + return answer.convertToLazyBitmapIfNeeded(); + } + + private MappeableContainer lazyxor(MappeableArrayContainer x) { + if (x.getCardinality() == 0) { + return this; + } + if (this.nbrruns == 0) { + return x; + } + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.getCardinality())), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + ShortIterator i = x.getShortIterator(); + short cv = i.next(); + while (true) { + if (BufferUtil.compareUnsigned(getValue(rlepos), cv) < 0) { + answer.smartAppendExclusive(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + if (rlepos == this.nbrruns) { + answer.smartAppendExclusive(vl, cv); + while (i.hasNext()) { + answer.smartAppendExclusive(vl, i.next()); + } + break; + } + } else { + answer.smartAppendExclusive(vl, cv); + if (!i.hasNext()) { + while (rlepos < this.nbrruns) { + answer.smartAppendExclusive(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + } + break; + } else { + cv = i.next(); + } + } + } + return answer; + } + + @Override + public MappeableContainer limit(int maxcardinality) { + if (maxcardinality >= getCardinality()) { + return clone(); + } + + int r; + int cardinality = 0; + for (r = 0; r < this.nbrruns; ++r) { + cardinality += BufferUtil.toIntUnsigned(getLength(r)) + 1; + if (maxcardinality <= cardinality) { + break; + } + } + + ShortBuffer newBuf = ShortBuffer.allocate(2 * (r + 1)); + for (int i = 0; i < 2 * (r + 1); ++i) { + newBuf.put(valueslength.get(i)); // could be optimized + } + MappeableRunContainer rc = new MappeableRunContainer(newBuf, r + 1); + + rc.setLength(r, + (short) (BufferUtil.toIntUnsigned(rc.getLength(r)) - cardinality + maxcardinality)); + return rc; + } + + // not thread-safe + private void makeRoomAtIndex(int index) { + if (2 * (nbrruns + 1) > valueslength.capacity()) { + increaseCapacity(); + } + copyValuesLength(valueslength, index, valueslength, index + 1, nbrruns - index); + nbrruns++; + } + + // To merge values length from begin(inclusive) to end(inclusive) + private void mergeValuesLength(int begin, int end) { + if (begin < end) { + int bValue = BufferUtil.toIntUnsigned(getValue(begin)); + int eValue = BufferUtil.toIntUnsigned(getValue(end)); + int eLength = BufferUtil.toIntUnsigned(getLength(end)); + int newLength = eValue - bValue + eLength; + setLength(begin, (short) newLength); + recoverRoomsInRange(begin, end); + } + } + + @Override + public MappeableContainer not(int rangeStart, int rangeEnd) { + if (rangeEnd <= rangeStart) { + return this.clone(); + } + MappeableRunContainer ans = new MappeableRunContainer(nbrruns + 1); + if (!ans.isArrayBacked()) { + throw new RuntimeException("internal bug"); + } + short[] vl = ans.valueslength.array(); + int k = 0; + + if (isArrayBacked()) { + short[] myVl = valueslength.array(); + for (; k < this.nbrruns && BufferUtil.toIntUnsigned(getValue(myVl, k)) < rangeStart; ++k) { + vl[2 * k] = myVl[2 * k]; + vl[2 * k + 1] = myVl[2 * k + 1]; + ans.nbrruns++; + } + ans.smartAppendExclusive(vl, (short) rangeStart, (short) (rangeEnd - rangeStart - 1)); + for (; k < this.nbrruns; ++k) { + ans.smartAppendExclusive(vl, getValue(myVl, k), getLength(myVl, k)); + } + } else { // not array backed + + for (; k < this.nbrruns && BufferUtil.toIntUnsigned(this.getValue(k)) < rangeStart; ++k) { + vl[2 * k] = getValue(k); + vl[2 * k + 1] = getLength(k); + ans.nbrruns++; + } + ans.smartAppendExclusive(vl, (short) rangeStart, (short) (rangeEnd - rangeStart - 1)); + for (; k < this.nbrruns; ++k) { + ans.smartAppendExclusive(vl, getValue(k), getLength(k)); + } + } + return ans.toEfficientContainer(); + } + + @Override + public int numberOfRuns() { + return this.nbrruns; + } + + @Override + public MappeableContainer or(MappeableArrayContainer x) { + // we guess that, often, the result will still be efficiently expressed as a run container + return lazyorToRun(x).repairAfterLazy(); + } + + @Override + public MappeableContainer or(MappeableBitmapContainer x) { + if (isFull()) { + return clone(); + } + MappeableBitmapContainer answer = x.clone(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(answer.bitmap, start, end); + } + answer.computeCardinality(); + return answer; + } + + @Override + public MappeableContainer or(MappeableRunContainer x) { + if (isFull()) { + return clone(); + } + if (x.isFull()) { + return x.clone(); // cheap case that can save a lot of computation + } + // we really ought to optimize the rest of the code for the frequent case where there is a + // single run + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.nbrruns)), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + int xrlepos = 0; + + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (BufferUtil.compareUnsigned(getValue(rlepos), x.getValue(xrlepos)) <= 0) { + answer.smartAppend(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + } else { + answer.smartAppend(vl, x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + } + while (xrlepos < x.nbrruns) { + answer.smartAppend(vl, x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + while (rlepos < this.nbrruns) { + answer.smartAppend(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + } + return answer.toBitmapIfNeeded(); + } + + // Prepend a value length with all values starting from a given value + private void prependValueLength(int value, int index) { + int initialValue = BufferUtil.toIntUnsigned(getValue(index)); + int length = BufferUtil.toIntUnsigned(getLength(index)); + setValue(index, (short) value); + setLength(index, (short) (initialValue - value + length)); + } + + @Override + public int rank(short lowbits) { + int x = BufferUtil.toIntUnsigned(lowbits); + int answer = 0; + for (int k = 0; k < this.nbrruns; ++k) { + int value = BufferUtil.toIntUnsigned(getValue(k)); + int length = BufferUtil.toIntUnsigned(getLength(k)); + if (x < value) { + return answer; + } else if (value + length + 1 >= x) { + return answer + x - value + 1; + } + answer += length + 1; + } + return answer; + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + // little endian + this.nbrruns = 0xFFFF & Short.reverseBytes(in.readShort()); + if (this.valueslength.capacity() < 2 * this.nbrruns) { + this.valueslength = ShortBuffer.allocate(2 * this.nbrruns); + } + for (int k = 0; k < 2 * this.nbrruns; ++k) { + this.valueslength.put(k, Short.reverseBytes(in.readShort())); + } + } + + private void recoverRoomAtIndex(int index) { + copyValuesLength(valueslength, index + 1, valueslength, index, nbrruns - index - 1); + nbrruns--; + } + + // To recover rooms between begin(exclusive) and end(inclusive) + private void recoverRoomsInRange(int begin, int end) { + if (end + 1 < nbrruns) { + copyValuesLength(valueslength, end + 1, valueslength, begin + 1, nbrruns - 1 - end); + } + nbrruns -= end - begin; + } + + @Override + public MappeableContainer remove(int begin, int end) { + MappeableRunContainer rc = (MappeableRunContainer) clone(); + return rc.iremove(begin, end); + } + + @Override + // not thread-safe + public MappeableContainer remove(short x) { + int index = bufferedUnsignedInterleavedBinarySearch(valueslength, 0, nbrruns, x); + if (index >= 0) { + if (getLength(index) == 0) { + recoverRoomAtIndex(index); + } else { + incrementValue(index); + decrementLength(index); + } + return this;// already there + } + index = -index - 2;// points to preceding value, possibly -1 + if (index >= 0) {// possible match + int offset = BufferUtil.toIntUnsigned(x) - BufferUtil.toIntUnsigned(getValue(index)); + int le = BufferUtil.toIntUnsigned(getLength(index)); + if (offset < le) { + // need to break in two + this.setLength(index, (short) (offset - 1)); + // need to insert + int newvalue = BufferUtil.toIntUnsigned(x) + 1; + int newlength = le - offset - 1; + makeRoomAtIndex(index + 1); + this.setValue(index + 1, (short) newvalue); + this.setLength(index + 1, (short) newlength); + return this; + } else if (offset == le) { + decrementLength(index); + } + } + // no match + return this; + } + + @Override + public MappeableContainer repairAfterLazy() { + return toEfficientContainer(); + } + + /** + * Convert to Array or Bitmap container if the serialized form would be shorter + */ + + @Override + public MappeableContainer runOptimize() { + return toEfficientContainer(); // which had the same functionality. + } + + @Override + public short select(int j) { + int offset = 0; + for (int k = 0; k < this.nbrruns; ++k) { + int nextOffset = offset + BufferUtil.toIntUnsigned(getLength(k)) + 1; + if (nextOffset > j) { + return (short) (getValue(k) + (j - offset)); + } + offset = nextOffset; + } + throw new IllegalArgumentException( + "Cannot select " + j + " since cardinality is " + getCardinality()); + } + + @Override + public int serializedSizeInBytes() { + return serializedSizeInBytes(nbrruns); + } + + + private void setLength(int index, short v) { + setLength(valueslength, index, v); + } + + + private void setLength(ShortBuffer valueslength, int index, short v) { + valueslength.put(2 * index + 1, v); + } + + + private void setValue(int index, short v) { + setValue(valueslength, index, v); + } + + private void setValue(ShortBuffer valueslength, int index, short v) { + valueslength.put(2 * index, v); + } + + + // assume that the (maybe) inplace operations + // will never actually *be* in place if they are + // to return ArrayContainer or BitmapContainer + + private void smartAppend(short[] vl, short val) { + int oldend; + if ((nbrruns == 0) || (BufferUtil + .toIntUnsigned(val) > (oldend = BufferUtil.toIntUnsigned(vl[2 * (nbrruns - 1)]) + + BufferUtil.toIntUnsigned(vl[2 * (nbrruns - 1) + 1])) + 1)) { // we add a new one + vl[2 * nbrruns] = val; + vl[2 * nbrruns + 1] = 0; + nbrruns++; + return; + } + if (val == (short) (oldend + 1)) { // we merge + vl[2 * (nbrruns - 1) + 1]++; + } + } + + private void smartAppend(short[] vl, short start, short length) { + int oldend; + if ((nbrruns == 0) || (BufferUtil + .toIntUnsigned(start) > (oldend = BufferUtil.toIntUnsigned(vl[2 * (nbrruns - 1)]) + + BufferUtil.toIntUnsigned(vl[2 * (nbrruns - 1) + 1])) + 1)) { // we add a new one + vl[2 * nbrruns] = start; + vl[2 * nbrruns + 1] = length; + nbrruns++; + return; + } + int newend = BufferUtil.toIntUnsigned(start) + BufferUtil.toIntUnsigned(length) + 1; + if (newend > oldend) { // we merge + vl[2 * (nbrruns - 1) + 1] = + (short) (newend - 1 - BufferUtil.toIntUnsigned(vl[2 * (nbrruns - 1)])); + } + } + + private void smartAppendExclusive(short[] vl, short val) { + int oldend; + if ((nbrruns == 0) || (BufferUtil + .toIntUnsigned(val) > (oldend = BufferUtil.toIntUnsigned(getValue(nbrruns - 1)) + + BufferUtil.toIntUnsigned(getLength(nbrruns - 1)) + 1))) { // we add a new one + vl[2 * nbrruns] = val; + vl[2 * nbrruns + 1] = 0; + nbrruns++; + return; + } + if (oldend == BufferUtil.toIntUnsigned(val)) { + // we merge + vl[2 * (nbrruns - 1) + 1]++; + return; + } + + + int newend = BufferUtil.toIntUnsigned(val) + 1; + + if (BufferUtil.toIntUnsigned(val) == BufferUtil.toIntUnsigned(getValue(nbrruns - 1))) { + // we wipe out previous + if (newend != oldend) { + setValue(nbrruns - 1, (short) newend); + setLength(nbrruns - 1, (short) (oldend - newend - 1)); + return; + } else { // they cancel out + nbrruns--; + return; + } + } + setLength(nbrruns - 1, (short) (val - BufferUtil.toIntUnsigned(getValue(nbrruns - 1)) - 1)); + + if (newend < oldend) { + setValue(nbrruns, (short) newend); + setLength(nbrruns, (short) (oldend - newend - 1)); + nbrruns++; + } else if (oldend < newend) { + setValue(nbrruns, (short) oldend); + setLength(nbrruns, (short) (newend - oldend - 1)); + nbrruns++; + } + } + + private void smartAppendExclusive(short[] vl, short start, short length) { + int oldend; + if ((nbrruns == 0) || (BufferUtil + .toIntUnsigned(start) > (oldend = BufferUtil.toIntUnsigned(getValue(nbrruns - 1)) + + BufferUtil.toIntUnsigned(getLength(nbrruns - 1)) + 1))) { // we add a new one + vl[2 * nbrruns] = start; + vl[2 * nbrruns + 1] = length; + nbrruns++; + return; + } + if (oldend == BufferUtil.toIntUnsigned(start)) { + // we merge + vl[2 * (nbrruns - 1) + 1] += length + 1; + return; + } + + + int newend = BufferUtil.toIntUnsigned(start) + BufferUtil.toIntUnsigned(length) + 1; + + if (BufferUtil.toIntUnsigned(start) == BufferUtil.toIntUnsigned(getValue(nbrruns - 1))) { + // we wipe out previous + if (newend < oldend) { + setValue(nbrruns - 1, (short) newend); + setLength(nbrruns - 1, (short) (oldend - newend - 1)); + return; + } else if (newend > oldend) { + setValue(nbrruns - 1, (short) oldend); + setLength(nbrruns - 1, (short) (newend - oldend - 1)); + return; + } else { // they cancel out + nbrruns--; + return; + } + } + setLength(nbrruns - 1, (short) (start - BufferUtil.toIntUnsigned(getValue(nbrruns - 1)) - 1)); + + if (newend < oldend) { + setValue(nbrruns, (short) newend); + setLength(nbrruns, (short) (oldend - newend - 1)); + nbrruns++; + } else if (newend > oldend) { + setValue(nbrruns, (short) oldend); + setLength(nbrruns, (short) (newend - oldend - 1)); + nbrruns++; + } + } + + + // convert to bitmap *if needed* (useful if you know it can't be an array) + private MappeableContainer toBitmapIfNeeded() { + int sizeAsRunContainer = MappeableRunContainer.serializedSizeInBytes(this.nbrruns); + int sizeAsBitmapContainer = MappeableBitmapContainer.serializedSizeInBytes(0); + if (sizeAsBitmapContainer > sizeAsRunContainer) { + return this; + } + return toBitmapContainer(); + } + + /** + * Convert the container to either a Bitmap or an Array Container, depending on the cardinality. + * + * @param card the current cardinality + * @return new container + */ + MappeableContainer toBitmapOrArrayContainer(int card) { + // int card = this.getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + MappeableArrayContainer answer = new MappeableArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + answer.content.put(answer.cardinality++, (short) runValue); + } + } + return answer; + } + MappeableBitmapContainer answer = new MappeableBitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + + @Override + public Container toContainer() { + return new RunContainer(this); + } + + + // convert to bitmap or array *if needed* + private MappeableContainer toEfficientContainer() { + int sizeAsRunContainer = MappeableRunContainer.serializedSizeInBytes(this.nbrruns); + int sizeAsBitmapContainer = MappeableBitmapContainer.serializedSizeInBytes(0); + int card = this.getCardinality(); + int sizeAsArrayContainer = MappeableArrayContainer.serializedSizeInBytes(card); + if (sizeAsRunContainer <= Math.min(sizeAsBitmapContainer, sizeAsArrayContainer)) { + return this; + } + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + MappeableArrayContainer answer = new MappeableArrayContainer(card); + answer.cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + // next bit could potentially be faster, test + if (BufferUtil.isBackedBySimpleArray(answer.content)) { + short[] ba = answer.content.array(); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + ba[answer.cardinality++] = (short) runValue; + } + } else { + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + answer.content.put(answer.cardinality++, (short) runValue); + } + } + } + return answer; + } + MappeableBitmapContainer answer = new MappeableBitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + + /** + * Create a copy of the content of this container as a short array. This creates a copy. + * + * @return copy of the content as a short array + */ + public short[] toShortArray() { + short[] answer = new short[2 * nbrruns]; + valueslength.rewind(); + valueslength.get(answer); + return answer; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + for (int k = 0; k < this.nbrruns; ++k) { + sb.append("["); + sb.append(BufferUtil.toIntUnsigned(this.getValue(k))); + sb.append(","); + sb.append(BufferUtil.toIntUnsigned(this.getValue(k)) + + BufferUtil.toIntUnsigned(this.getLength(k)) + 1); + sb.append("]"); + } + return sb.toString(); + } + + @Override + public void trim() { + if (valueslength.limit() == 2 * nbrruns) { + return; + } + if (BufferUtil.isBackedBySimpleArray(valueslength)) { + this.valueslength = ShortBuffer.wrap(Arrays.copyOf(valueslength.array(), 2 * nbrruns)); + } else { + + final ShortBuffer co = ShortBuffer.allocate(2 * nbrruns); + short[] a = co.array(); + for (int k = 0; k < 2 * nbrruns; ++k) { + a[k] = this.valueslength.get(k); + } + this.valueslength = co; + } + } + + // To check if a value length contains a given value + private boolean valueLengthContains(int value, int index) { + int initialValue = BufferUtil.toIntUnsigned(getValue(index)); + int length = BufferUtil.toIntUnsigned(getLength(index)); + + if (value <= initialValue + length) { + return true; + } + return false; + } + + @Override + protected void writeArray(DataOutput out) throws IOException { + out.writeShort(Short.reverseBytes((short) this.nbrruns)); + for (int k = 0; k < 2 * this.nbrruns; ++k) { + out.writeShort(Short.reverseBytes(this.valueslength.get(k))); + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + out.writeShort(Short.reverseBytes((short) this.nbrruns)); + for (int k = 0; k < 2 * this.nbrruns; ++k) { + out.writeShort(Short.reverseBytes(this.valueslength.get(k))); + } + } + + @Override + public MappeableContainer xor(MappeableArrayContainer x) { + // if the cardinality of the array is small, guess that the output will still be a run container + final int arbitrary_threshold = 32; // 32 is arbitrary here + if (x.getCardinality() < arbitrary_threshold) { + return lazyxor(x).repairAfterLazy(); + } + // otherwise, we expect the output to be either an array or bitmap + final int card = getCardinality(); + if (card <= MappeableArrayContainer.DEFAULT_MAX_SIZE) { + // if the cardinality is small, we construct the solution in place + return x.xor(this.getShortIterator()); + } + // otherwise, we generate a bitmap + return toBitmapOrArrayContainer(card).ixor(x); + } + + @Override + public MappeableContainer xor(MappeableBitmapContainer x) { + MappeableBitmapContainer answer = x.clone(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.flipBitmapRange(answer.bitmap, start, end); + } + answer.computeCardinality(); + if (answer.getCardinality() > MappeableArrayContainer.DEFAULT_MAX_SIZE) { + return answer; + } else { + return answer.toArrayContainer(); + } + } + + @Override + public MappeableContainer xor(MappeableRunContainer x) { + if (x.nbrruns == 0) { + return this.clone(); + } + if (this.nbrruns == 0) { + return x.clone(); + } + MappeableRunContainer answer = + new MappeableRunContainer(ShortBuffer.allocate(2 * (this.nbrruns + x.nbrruns)), 0); + short[] vl = answer.valueslength.array(); + int rlepos = 0; + int xrlepos = 0; + + while (true) { + if (BufferUtil.compareUnsigned(getValue(rlepos), x.getValue(xrlepos)) < 0) { + answer.smartAppendExclusive(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + if (rlepos == this.nbrruns) { + while (xrlepos < x.nbrruns) { + answer.smartAppendExclusive(vl, x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + } + break; + } + } else { + answer.smartAppendExclusive(vl, x.getValue(xrlepos), x.getLength(xrlepos)); + xrlepos++; + if (xrlepos == x.nbrruns) { + while (rlepos < this.nbrruns) { + answer.smartAppendExclusive(vl, getValue(rlepos), getLength(rlepos)); + rlepos++; + } + break; + } + } + } + return answer.toEfficientContainer(); + } + + + @Override + public void forEach(short msb, IntConsumer ic) { + int high = ((int) msb) << 16; + for (int k = 0; k < this.nbrruns; ++k) { + int base = (this.getValue(k) & 0xFFFF) | high; + int le = this.getLength(k) & 0xFFFF; + for (int l = base; l <= base + le; ++l) { + ic.accept(l); + } + } + } + + + @Override + public int andCardinality(MappeableArrayContainer x) { + if (this.nbrruns == 0) { + return 0; + } + int rlepos = 0; + int arraypos = 0; + int andCardinality = 0; + int rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + while (arraypos < x.cardinality) { + int arrayval = BufferUtil.toIntUnsigned(x.content.get(arraypos)); + while (rleval + rlelength < arrayval) {// this will frequently be false + ++rlepos; + if (rlepos == this.nbrruns) { + return andCardinality;// we are done + } + rleval = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + rlelength = BufferUtil.toIntUnsigned(this.getLength(rlepos)); + } + if (rleval > arrayval) { + arraypos = BufferUtil.advanceUntil(x.content, arraypos, + x.cardinality, this.getValue(rlepos)); + } else { + andCardinality++; + arraypos++; + } + } + return andCardinality; + } + + + @Override + public int andCardinality(MappeableBitmapContainer x) { + // could be implemented as return toBitmapOrArrayContainer().iand(x); + int cardinality = 0; + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int runStart = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int runEnd = runStart + BufferUtil.toIntUnsigned(this.getLength(rlepos)); + for (int runValue = runStart; runValue <= runEnd; ++runValue) { + if (x.contains((short) runValue)) {// it looks like contains() should be cheap enough if + // accessed sequentially + cardinality++; + } + } + } + return cardinality; + } + + @Override + public int andCardinality(MappeableRunContainer x) { + int cardinality = 0; + int rlepos = 0; + int xrlepos = 0; + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + int xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + int xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + } else if (xend <= start) { + ++xrlepos; + + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else {// they overlap + final int lateststart = start > xstart ? start : xstart; + int earliestend; + if (end == xend) {// improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < this.nbrruns) { + start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + } + + } else {// end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < x.nbrruns) { + xstart = BufferUtil.toIntUnsigned(x.getValue(xrlepos)); + xend = xstart + BufferUtil.toIntUnsigned(x.getLength(xrlepos)) + 1; + } + } + // earliestend - lateststart are all values that are true. + cardinality += (short) (earliestend - lateststart); + } + } + return cardinality; + } + + + @Override + public MappeableBitmapContainer toBitmapContainer() { + int card = this.getCardinality(); + MappeableBitmapContainer answer = new MappeableBitmapContainer(); + for (int rlepos = 0; rlepos < this.nbrruns; ++rlepos) { + int start = BufferUtil.toIntUnsigned(this.getValue(rlepos)); + int end = start + BufferUtil.toIntUnsigned(this.getLength(rlepos)) + 1; + BufferUtil.setBitmapRange(answer.bitmap, start, end); + } + answer.cardinality = card; + return answer; + } + + +} + + +final class MappeableRunContainerShortIterator implements PeekableShortIterator { + int pos; + int le = 0; + int maxlength; + int base; + + MappeableRunContainer parent; + + MappeableRunContainerShortIterator() { + } + + MappeableRunContainerShortIterator(MappeableRunContainer p) { + wrap(p); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos < parent.nbrruns; + } + + @Override + public short next() { + short ans = (short) (base + le); + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public int nextAsInt() { + int ans = base + le; + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(MappeableRunContainer p) { + parent = p; + pos = 0; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + + @Override + public void advanceIfNeeded(short minval) { + while (base + maxlength < BufferUtil.toIntUnsigned(minval)) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } else { + return; + } + } + if (base > BufferUtil.toIntUnsigned(minval)) { + return; + } + le = BufferUtil.toIntUnsigned(minval) - base; + } + + @Override + public short peekNext() { + return (short) (base + le); + } + +}; + + +final class RawMappeableRunContainerShortIterator implements PeekableShortIterator { + int pos; + int le = 0; + int maxlength; + int base; + + MappeableRunContainer parent; + short[] vl; + + + RawMappeableRunContainerShortIterator(MappeableRunContainer p) { + wrap(p); + } + + @Override + public PeekableShortIterator clone() { + try { + return (PeekableShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + short getLength(int index) { + return vl[2 * index + 1]; + } + + short getValue(int index) { + return vl[2 * index]; + } + + @Override + public boolean hasNext() { + return pos < parent.nbrruns; + } + + @Override + public short next() { + short ans = (short) (base + le); + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + return ans; + } + + + @Override + public int nextAsInt() { + int ans = base + le; + le++; + if (le > maxlength) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(MappeableRunContainer p) { + parent = p; + if (!parent.isArrayBacked()) { + throw new RuntimeException("internal error"); + } + vl = parent.valueslength.array(); + pos = 0; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + + @Override + public void advanceIfNeeded(short minval) { + while (base + maxlength < BufferUtil.toIntUnsigned(minval)) { + pos++; + le = 0; + if (pos < parent.nbrruns) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } else { + return; + } + } + if (base > BufferUtil.toIntUnsigned(minval)) { + return; + } + le = BufferUtil.toIntUnsigned(minval) - base; + } + + @Override + public short peekNext() { + return (short) (base + le); + } + +} + + +final class RawReverseMappeableRunContainerShortIterator implements ShortIterator { + int pos; + int le; + int maxlength; + int base; + MappeableRunContainer parent; + short[] vl; + + + RawReverseMappeableRunContainerShortIterator(MappeableRunContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + short getLength(int index) { + return vl[2 * index + 1]; + } + + short getValue(int index) { + return vl[2 * index]; + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public short next() { + short ans = (short) (base + maxlength - le); + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + return ans; + } + + @Override + public int nextAsInt() { + int ans = base + maxlength - le; + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(MappeableRunContainer p) { + parent = p; + if (!parent.isArrayBacked()) { + throw new RuntimeException("internal error"); + } + vl = parent.valueslength.array(); + pos = parent.nbrruns - 1; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(getLength(pos)); + base = BufferUtil.toIntUnsigned(getValue(pos)); + } + } + +}; + + +final class ReverseMappeableRunContainerShortIterator implements ShortIterator { + int pos; + int le; + int maxlength; + int base; + MappeableRunContainer parent; + + + ReverseMappeableRunContainerShortIterator() { + } + + ReverseMappeableRunContainerShortIterator(MappeableRunContainer p) { + wrap(p); + } + + @Override + public ShortIterator clone() { + try { + return (ShortIterator) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public boolean hasNext() { + return pos >= 0; + } + + @Override + public short next() { + short ans = (short) (base + maxlength - le); + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public int nextAsInt() { + int ans = base + maxlength - le; + le++; + if (le > maxlength) { + pos--; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + return ans; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented");// TODO + } + + void wrap(MappeableRunContainer p) { + parent = p; + pos = parent.nbrruns - 1; + le = 0; + if (pos >= 0) { + maxlength = BufferUtil.toIntUnsigned(parent.getLength(pos)); + base = BufferUtil.toIntUnsigned(parent.getValue(pos)); + } + } + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringArray.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringArray.java new file mode 100644 index 000000000..39706dfe4 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringArray.java @@ -0,0 +1,573 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + + +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.nio.LongBuffer; +import java.nio.ShortBuffer; +import java.util.Arrays; + + +/** + * Specialized array to store the containers used by a RoaringBitmap. This class is similar to + * RoaringArray but meant to be used with memory mapping. This is not meant to be + * used by end users. + *

+ * Objects of this class reside in RAM. + */ +public final class MutableRoaringArray implements Cloneable, Externalizable, PointableRoaringArray { + + protected static final int INITIAL_CAPACITY = 4; + + protected static final short SERIAL_COOKIE_NO_RUNCONTAINER = 12346; + protected static final short SERIAL_COOKIE = 12347; + + protected static final int NO_OFFSET_THRESHOLD = 4; + + private static final long serialVersionUID = 5L; // TODO: OFK was 4L, not sure + protected boolean mayHaveRunContainers = false; // does not necessarily have them, after + // optimization + + + short[] keys = null; + MappeableContainer[] values = null; + + int size = 0; + + protected MutableRoaringArray() { + this.keys = new short[INITIAL_CAPACITY]; + this.values = new MappeableContainer[INITIAL_CAPACITY]; + } + + + @Override + public int advanceUntil(short x, int pos) { + int lower = pos + 1; + + // special handling for a possibly common sequential case + if (lower >= size || BufferUtil.toIntUnsigned(keys[lower]) >= BufferUtil.toIntUnsigned(x)) { + return lower; + } + + int spansize = 1; // could set larger + // bootstrap an upper limit + + while (lower + spansize < size + && BufferUtil.toIntUnsigned(keys[lower + spansize]) < BufferUtil.toIntUnsigned(x)) { + spansize *= 2; // hoping for compiler will reduce to shift + } + int upper = (lower + spansize < size) ? lower + spansize : size - 1; + + // maybe we are lucky (could be common case when the seek ahead + // expected to be small and sequential will otherwise make us look bad) + if (keys[upper] == x) { + return upper; + } + + if (BufferUtil.toIntUnsigned(keys[upper]) < BufferUtil.toIntUnsigned(x)) {// means array has no + // item key >= x + return size; + } + + // we know that the next-smallest span was too small + lower += (spansize / 2); + + // else begin binary search + // invariant: array[lower]x + while (lower + 1 != upper) { + int mid = (lower + upper) / 2; + if (keys[mid] == x) { + return mid; + } else if (BufferUtil.toIntUnsigned(keys[mid]) < BufferUtil.toIntUnsigned(x)) { + lower = mid; + } else { + upper = mid; + } + } + return upper; + } + + protected void append(short key, MappeableContainer value) { + extendArray(1); + this.keys[this.size] = key; + this.values[this.size] = value; + this.size++; + } + + /** + * Append copies of the values AFTER a specified key (may or may not be present) to end. + * + * @param highLowContainer the other array + * @param beforeStart given key is the largest key that we won't copy + */ + protected void appendCopiesAfter(PointableRoaringArray highLowContainer, short beforeStart) { + + int startLocation = highLowContainer.getIndex(beforeStart); + if (startLocation >= 0) { + startLocation++; + } else { + startLocation = -startLocation - 1; + } + extendArray(highLowContainer.size() - startLocation); + + for (int i = startLocation; i < highLowContainer.size(); ++i) { + this.keys[this.size] = highLowContainer.getKeyAtIndex(i); + this.values[this.size] = highLowContainer.getContainerAtIndex(i).clone(); + this.size++; + } + } + + /** + * Append copies of the values from another array, from the start + * + * @param highLowContainer the other array + * @param stoppingKey any equal or larger key in other array will terminate copying + */ + protected void appendCopiesUntil(PointableRoaringArray highLowContainer, short stoppingKey) { + final int stopKey = BufferUtil.toIntUnsigned(stoppingKey); + MappeableContainerPointer cp = highLowContainer.getContainerPointer(); + while (cp.hasContainer()) { + if (BufferUtil.toIntUnsigned(cp.key()) >= stopKey) { + break; + } + extendArray(1); + this.keys[this.size] = cp.key(); + this.values[this.size] = cp.getContainer().clone(); + this.size++; + cp.advance(); + } + } + + /** + * Append copies of the values from another array + * + * @param highLowContainer other array + * @param startingIndex starting index in the other array + * @param end last index array in the other array + */ + protected void appendCopy(PointableRoaringArray highLowContainer, int startingIndex, int end) { + extendArray(end - startingIndex); + for (int i = startingIndex; i < end; ++i) { + this.keys[this.size] = highLowContainer.getKeyAtIndex(i); + this.values[this.size] = highLowContainer.getContainerAtIndex(i).clone(); + this.size++; + } + } + + protected void appendCopy(short key, MappeableContainer value) { + extendArray(1); + this.keys[this.size] = key; + this.values[this.size] = value.clone(); + this.size++; + } + + private int binarySearch(int begin, int end, short key) { + return Util.unsignedBinarySearch(keys, begin, end, key); + } + + protected void clear() { + this.keys = null; + this.values = null; + this.size = 0; + } + + @Override + public MutableRoaringArray clone() { + MutableRoaringArray sa; + try { + sa = (MutableRoaringArray) super.clone(); + + // OFK: do we need runcontainer bitmap? Guess not, this is just a directory + // and each container knows what kind it is. + sa.keys = Arrays.copyOf(this.keys, this.size); + sa.values = Arrays.copyOf(this.values, this.size); + for (int k = 0; k < this.size; ++k) { + sa.values[k] = sa.values[k].clone(); + } + sa.size = this.size; + return sa; + + } catch (CloneNotSupportedException e) { + return null; + } + } + + protected void copyRange(int begin, int end, int newBegin) { + // assuming begin <= end and newBegin < begin + final int range = end - begin; + System.arraycopy(this.keys, begin, this.keys, newBegin, range); + System.arraycopy(this.values, begin, this.values, newBegin, range); + } + + /** + * Deserialize. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + this.clear(); + // little endian + final int cookie = Integer.reverseBytes(in.readInt()); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + throw new IOException("I failed to find the one of the right cookies."); + } + this.size = ((cookie & 0xFFFF) == SERIAL_COOKIE) ? (cookie >>> 16) + 1 + : Integer.reverseBytes(in.readInt()); + if ((this.keys == null) || (this.keys.length < this.size)) { + this.keys = new short[this.size]; + this.values = new MappeableContainer[this.size]; + } + + byte[] bitmapOfRunContainers = null; + boolean hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + bitmapOfRunContainers = new byte[(size + 7) / 8]; + in.readFully(bitmapOfRunContainers); + } + + final short keys[] = new short[this.size]; + final int cardinalities[] = new int[this.size]; + final boolean isBitmap[] = new boolean[this.size]; + for (int k = 0; k < this.size; ++k) { + keys[k] = Short.reverseBytes(in.readShort()); + cardinalities[k] = 1 + (0xFFFF & Short.reverseBytes(in.readShort())); + isBitmap[k] = cardinalities[k] > MappeableArrayContainer.DEFAULT_MAX_SIZE; + if (bitmapOfRunContainers != null && (bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isBitmap[k] = false; + } + } + if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + in.skipBytes(this.size * 4); + } + // Reading the containers + for (int k = 0; k < this.size; ++k) { + MappeableContainer val; + if (isBitmap[k]) { + final LongBuffer bitmapArray = + LongBuffer.allocate(MappeableBitmapContainer.MAX_CAPACITY / 64); + // little endian + for (int l = 0; l < bitmapArray.limit(); ++l) { + bitmapArray.put(l, Long.reverseBytes(in.readLong())); + } + val = new MappeableBitmapContainer(bitmapArray, cardinalities[k]); + } else if (bitmapOfRunContainers != null + && ((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0)) { + int nbrruns = BufferUtil.toIntUnsigned(Short.reverseBytes(in.readShort())); + final ShortBuffer shortArray = ShortBuffer.allocate(2 * nbrruns); + for (int l = 0; l < shortArray.limit(); ++l) { + shortArray.put(l, Short.reverseBytes(in.readShort())); + } + val = new MappeableRunContainer(shortArray, nbrruns); + } else { + final ShortBuffer shortArray = ShortBuffer.allocate(cardinalities[k]); + for (int l = 0; l < shortArray.limit(); ++l) { + shortArray.put(l, Short.reverseBytes(in.readShort())); + } + val = new MappeableArrayContainer(shortArray, cardinalities[k]); + } + this.keys[k] = keys[k]; + this.values[k] = val; + } + } + + // make sure there is capacity for at least k more elements + protected void extendArray(int k) { + // size + 1 could overflow + if (this.size + k >= this.keys.length) { + int newCapacity; + if (this.keys.length < 1024) { + newCapacity = 2 * (this.size + k); + } else { + newCapacity = 5 * (this.size + k) / 4; + } + this.keys = Arrays.copyOf(this.keys, newCapacity); + this.values = Arrays.copyOf(this.values, newCapacity); + } + } + + @Override + public int getCardinality(int i) { + return getContainerAtIndex(i).getCardinality(); + } + + // involves a binary search + @Override + public MappeableContainer getContainer(short x) { + final int i = this.binarySearch(0, size, x); + if (i < 0) { + return null; + } + return this.values[i]; + } + + @Override + public MappeableContainer getContainerAtIndex(int i) { + return this.values[i]; + } + + @Override + public MappeableContainerPointer getContainerPointer() { + return getContainerPointer(0); + } + + @Override + public MappeableContainerPointer getContainerPointer(final int startIndex) { + return new MappeableContainerPointer() { + int k = startIndex; + + @Override + public void advance() { + ++k; + } + + @Override + public MappeableContainerPointer clone() { + try { + return (MappeableContainerPointer) super.clone(); + } catch (CloneNotSupportedException e) { + return null;// will not happen + } + } + + @Override + public int compareTo(MappeableContainerPointer o) { + if (key() != o.key()) { + return BufferUtil.toIntUnsigned(key()) - BufferUtil.toIntUnsigned(o.key()); + } + return o.getCardinality() - this.getCardinality(); + } + + @Override + public int getCardinality() { + return getContainer().getCardinality(); + } + + @Override + public MappeableContainer getContainer() { + if (k >= MutableRoaringArray.this.size) { + return null; + } + return MutableRoaringArray.this.values[k]; + } + + @Override + public int getSizeInBytes() { + return getContainer().getArraySizeInBytes(); + } + + @Override + public boolean hasContainer() { + return 0 <= k & k < MutableRoaringArray.this.size; + } + + @Override + public boolean isBitmapContainer() { + return getContainer() instanceof MappeableBitmapContainer; + } + + @Override + public boolean isRunContainer() { + return getContainer() instanceof MappeableRunContainer; + } + + @Override + public short key() { + return MutableRoaringArray.this.keys[k]; + + } + + + @Override + public void previous() { + --k; + } + }; + + } + + // involves a binary search + @Override + public int getIndex(short x) { + // before the binary search, we optimize for frequent cases + if ((size == 0) || (keys[size - 1] == x)) { + return size - 1; + } + // no luck we have to go through the list + return this.binarySearch(0, size, x); + } + + @Override + public short getKeyAtIndex(int i) { + return this.keys[i]; + } + + @Override + public int hashCode() { + int hashvalue = 0; + for (int k = 0; k < this.size; ++k) { + hashvalue = 31 * hashvalue + keys[k] * 0xF0F0F0 + values[k].hashCode(); + } + return hashvalue; + } + + @Override + public boolean hasRunCompression() { + for (int k = 0; k < size; ++k) { + MappeableContainer ck = values[k]; + if (ck instanceof MappeableRunContainer) { + return true; + } + } + return false; + } + + protected int headerSize() { + if (hasRunCompression()) { + if (size < NO_OFFSET_THRESHOLD) {// for small bitmaps, we omit the offsets + return 4 + (size + 7) / 8 + 4 * size; + } + return 4 + (size + 7) / 8 + 8 * size;// - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * size; + } + } + + // insert a new key, it is assumed that it does not exist + protected void insertNewKeyValueAt(int i, short key, MappeableContainer value) { + extendArray(1); + System.arraycopy(keys, i, keys, i + 1, size - i); + System.arraycopy(values, i, values, i + 1, size - i); + keys[i] = key; + values[i] = value; + size++; + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + deserialize(in); + } + + protected void removeAtIndex(int i) { + System.arraycopy(keys, i + 1, keys, i, size - i - 1); + keys[size - 1] = 0; + System.arraycopy(values, i + 1, values, i, size - i - 1); + values[size - 1] = null; + size--; + } + + + protected void removeIndexRange(int begin, int end) { + if (end <= begin) { + return; + } + final int range = end - begin; + System.arraycopy(keys, end, keys, begin, size - end); + System.arraycopy(values, end, values, begin, size - end); + for (int i = 1; i <= range; ++i) { + keys[size - i] = 0; + values[size - i] = null; + } + size -= range; + } + + protected void replaceKeyAndContainerAtIndex(int i, short key, MappeableContainer c) { + this.keys[i] = key; + this.values[i] = c; + } + + + protected void resize(int newLength) { + Arrays.fill(this.keys, newLength, this.size, (short) 0); + Arrays.fill(this.values, newLength, this.size, null); + this.size = newLength; + } + + /** + * Serialize. + *

+ * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + @Override + public void serialize(DataOutput out) throws IOException { + int startOffset = 0; + boolean hasrun = hasRunCompression(); + if (hasrun) { + out.writeInt(Integer.reverseBytes(SERIAL_COOKIE | ((this.size - 1) << 16))); + byte[] bitmapOfRunContainers = new byte[(size + 7) / 8]; + for (int i = 0; i < size; ++i) { + if (this.values[i] instanceof MappeableRunContainer) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); + } + } + out.write(bitmapOfRunContainers); + if (this.size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * this.size + bitmapOfRunContainers.length; + } else { + startOffset = 4 + 8 * this.size + bitmapOfRunContainers.length; + } + } else { // backwards compatibilility + out.writeInt(Integer.reverseBytes(SERIAL_COOKIE_NO_RUNCONTAINER)); + out.writeInt(Integer.reverseBytes(size)); + startOffset = 4 + 4 + this.size * 4 + this.size * 4; + } + for (int k = 0; k < size; ++k) { + out.writeShort(Short.reverseBytes(this.keys[k])); + out.writeShort(Short.reverseBytes((short) (this.values[k].getCardinality() - 1))); + } + if ((!hasrun) || (this.size >= NO_OFFSET_THRESHOLD)) { + for (int k = 0; k < this.size; k++) { + out.writeInt(Integer.reverseBytes(startOffset)); + startOffset = startOffset + values[k].getArraySizeInBytes(); + } + } + for (int k = 0; k < size; ++k) { + values[k].writeArray(out); + } + + } + + /** + * Report the number of bytes required for serialization. + * + * @return the size in bytes + */ + @Override + public int serializedSizeInBytes() { + int count = headerSize(); + // for each container, we store cardinality (16 bits), key (16 bits) and location offset (32 + // bits). + for (int k = 0; k < this.size; ++k) { + count += values[k].getArraySizeInBytes(); + } + return count; + } + + protected void setContainerAtIndex(int i, MappeableContainer c) { + this.values[i] = c; + } + + @Override + public int size() { + return this.size; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + serialize(out); + } +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringBitmap.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringBitmap.java new file mode 100644 index 000000000..902e421a7 --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/MutableRoaringBitmap.java @@ -0,0 +1,1420 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import com.fr.third.bitmap.roaringbitmap.BitmapDataProvider; +import com.fr.third.bitmap.roaringbitmap.ContainerPointer; +import com.fr.third.bitmap.roaringbitmap.RoaringBitmap; +import com.fr.third.bitmap.roaringbitmap.ShortIterator; +import com.fr.third.bitmap.roaringbitmap.Util; + +import java.io.DataInput; +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.io.Serializable; +import java.util.Iterator; + +/** + * MutableRoaringBitmap, a compressed alternative to the BitSet. It is similar to + * RoaringBitmap, but it differs in that it can interact with + * ImmutableRoaringBitmap objects. + *

+ * A MutableRoaringBitmap is an instance of an ImmutableRoaringBitmap (where methods like + * "serialize" are implemented). That is, they both share the same core (immutable) methods, but a + * MutableRoaringBitmap adds methods that allow you to modify the object. This design allows us to + * use MutableRoaringBitmap as ImmutableRoaringBitmap instances when needed. + *

+ * A MutableRoaringBitmap can be used much like an RoaringBitmap instance, and + * they serialize to the same output. The RoaringBitmap instance will be faster since it does not + * carry the overhead of a ByteBuffer back-end, but the MutableRoaringBitmap can be used as an + * ImmutableRoaringBitmap instance. Thus, if you use ImmutableRoaringBitmap, you probably need to + * use MutableRoaringBitmap instances as well; if you do not use ImmutableRoaringBitmap, you + * probably want to use only RoaringBitmap instances. + *

+ *

+ * {@code
+ *      import com.fr.swift.bitmap.roaringbitmap.buffer.*;
+ *
+ *      //...
+ *
+ *      MutableRoaringBitmap rr = MutableRoaringBitmap.bitmapOf(1,2,3,1000);
+ *      MutableRoaringBitmap rr2 = new MutableRoaringBitmap();
+ *      for(int k = 4000; k<4255;++k) rr2.add(k);
+ *
+ *      RoaringBitmap rror = RoaringBitmap.or(rr, rr2);
+ *
+ *      //...
+ *      DataOutputStream wheretoserialize = ...
+ *      rr.runOptimize(); // can help compression
+ *      rr.serialize(wheretoserialize);
+ * }
+ * 
+ * + * @see ImmutableRoaringBitmap + * @see RoaringBitmap + */ +public class MutableRoaringBitmap extends ImmutableRoaringBitmap + implements Cloneable, Serializable, Iterable, Externalizable, BitmapDataProvider { + private static final long serialVersionUID = 4L; // 3L; bumped by ofk for runcontainers + + + /** + * Create an empty bitmap + */ + public MutableRoaringBitmap() { + highLowContainer = new MutableRoaringArray(); + } + + /** + * Create a MutableRoaringBitmap from a RoaringBitmap. The RoaringBitmap is not modified. + * + * @param rb the original bitmap + */ + public MutableRoaringBitmap(RoaringBitmap rb) { + highLowContainer = new MutableRoaringArray(); + ContainerPointer cp = rb.getContainerPointer(); + while (cp.getContainer() != null) { + ((MutableRoaringArray) highLowContainer).append(cp.key(), + cp.getContainer().toMappeableContainer()); + cp.advance(); + } + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) added. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + */ + public static MutableRoaringBitmap add(MutableRoaringBitmap rb, final long rangeStart, + final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return rb.clone(); // empty range + } + + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + ((MutableRoaringArray) answer.highLowContainer).appendCopiesUntil(rb.highLowContainer, + (short) hbStart); + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + final MappeableContainer c = + i >= 0 ? rb.highLowContainer.getContainerAtIndex(i).add(lbStart, lbLast + 1) + : MappeableContainer.rangeOfOnes(lbStart, lbLast + 1); + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + ((MutableRoaringArray) answer.highLowContainer).appendCopiesAfter(rb.highLowContainer, + (short) hbLast); + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + + { + final MappeableContainer c = ifirst >= 0 + ? rb.highLowContainer.getContainerAtIndex(ifirst).add(lbStart, + BufferUtil.maxLowBitAsInteger() + 1) + : MappeableContainer.rangeOfOnes(lbStart, BufferUtil.maxLowBitAsInteger() + 1); + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + } + for (int hb = hbStart + 1; hb < hbLast; ++hb) { + MappeableContainer c = MappeableContainer.rangeOfOnes(0, BufferUtil.maxLowBitAsInteger() + 1); + ((MutableRoaringArray) answer.highLowContainer).append((short) hb, c); + } + { + final MappeableContainer c = + ilast >= 0 ? rb.highLowContainer.getContainerAtIndex(ilast).add(0, lbLast + 1) + : MappeableContainer.rangeOfOnes(0, lbLast + 1); + ((MutableRoaringArray) answer.highLowContainer).append((short) hbLast, c); + } + ((MutableRoaringArray) answer.highLowContainer).appendCopiesAfter(rb.highLowContainer, + (short) hbLast); + return answer; + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) added. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static MutableRoaringBitmap add(MutableRoaringBitmap rb, + final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return add(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return add(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Bitwise AND (intersection) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static MutableRoaringBitmap and(final MutableRoaringBitmap x1, + final MutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.and(c2); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = x1.highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + return answer; + } + + /** + * Bitwise ANDNOT (difference) operation. The provided bitmaps are *not* modified. This operation + * is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static MutableRoaringBitmap andNot(final MutableRoaringBitmap x1, + final MutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final MappeableContainer c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.andNot(c2); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + final int nextPos1 = x1.highLowContainer.advanceUntil(s2, pos1); + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, nextPos1); + pos1 = nextPos1; + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos2 == length2) { + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + /** + * Generate a bitmap with the specified values set to true. The provided integers values don't + * have to be in sorted order, but it may be preferable to sort them from a performance point of + * view. + * + * @param dat set values + * @return a new bitmap + */ + public static MutableRoaringBitmap bitmapOf(final int... dat) { + final MutableRoaringBitmap ans = new MutableRoaringBitmap(); + ans.add(dat); + return ans; + } + + protected static void rangeSanityCheck(final long rangeStart, final long rangeEnd) { + if (rangeStart < 0 || rangeStart > (1L << 32) - 1) { + throw new IllegalArgumentException("rangeStart=" + rangeStart + + " should be in [0, 0xffffffff]"); + } + if (rangeEnd > (1L << 32) || rangeEnd < 0) { + throw new IllegalArgumentException("rangeEnd=" + rangeEnd + + " should be in [0, 0xffffffff + 1]"); + } + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param bm bitmap being negated + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return a new Bitmap + */ + public static MutableRoaringBitmap flip(MutableRoaringBitmap bm, final long rangeStart, + final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return bm.clone(); + } + + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + + + // copy the containers before the active area + answer.getMappeableRoaringArray().appendCopiesUntil(bm.highLowContainer, (short) hbStart); + + for (int hb = hbStart; hb <= hbLast; ++hb) { + final int containerStart = (hb == hbStart) ? lbStart : 0; + final int containerLast = (hb == hbLast) ? lbLast : BufferUtil.maxLowBitAsInteger(); + + final int i = bm.highLowContainer.getIndex((short) hb); + final int j = answer.highLowContainer.getIndex((short) hb); + assert j < 0; + + if (i >= 0) { + final MappeableContainer c = + bm.highLowContainer.getContainerAtIndex(i).not(containerStart, containerLast + 1); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().insertNewKeyValueAt(-j - 1, (short) hb, c); + } + + } else { // *think* the range of ones must never be + // empty. + answer.getMappeableRoaringArray().insertNewKeyValueAt(-j - 1, (short) hb, + MappeableContainer.rangeOfOnes(containerStart, containerLast + 1)); + } + } + // copy the containers after the active area. + answer.getMappeableRoaringArray().appendCopiesAfter(bm.highLowContainer, (short) hbLast); + + return answer; + } + + /** + * Complements the bits in the given range, from rangeStart (inclusive) rangeEnd (exclusive). The + * given bitmap is unchanged. + * + * @param rb bitmap being negated + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return a new Bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static MutableRoaringBitmap flip(MutableRoaringBitmap rb, + final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return flip(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return flip(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + // important: inputs should not have been computed lazily + protected static MutableRoaringBitmap lazyorfromlazyinputs(final MutableRoaringBitmap x1, + final MutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + MappeableContainerPointer i1 = x1.highLowContainer.getContainerPointer(); + MappeableContainerPointer i2 = x2.highLowContainer.getContainerPointer(); + main: + if (i1.hasContainer() && i2.hasContainer()) { + while (true) { + if (i1.key() == i2.key()) { + MappeableContainer c1 = i1.getContainer(); + MappeableContainer c2 = i2.getContainer(); + if ((c2 instanceof MappeableBitmapContainer) + && (!(c1 instanceof MappeableBitmapContainer))) { + MappeableContainer tmp = c1; + c1 = c2; + c2 = tmp; + } + answer.getMappeableRoaringArray().append(i1.key(), c1.lazyIOR(c2)); + i1.advance(); + i2.advance(); + if (!i1.hasContainer() || !i2.hasContainer()) { + break main; + } + } else if (Util.compareUnsigned(i1.key(), i2.key()) < 0) { // i1.key() < i2.key() + answer.getMappeableRoaringArray().append(i1.key(), i1.getContainer()); + i1.advance(); + if (!i1.hasContainer()) { + break main; + } + } else { // i1.key() > i2.key() + answer.getMappeableRoaringArray().append(i2.key(), i2.getContainer()); + i2.advance(); + if (!i2.hasContainer()) { + break main; + } + } + } + } + if (!i1.hasContainer()) { + while (i2.hasContainer()) { + answer.getMappeableRoaringArray().append(i2.key(), i2.getContainer()); + i2.advance(); + } + } else if (!i2.hasContainer()) { + while (i1.hasContainer()) { + answer.getMappeableRoaringArray().append(i1.key(), i1.getContainer()); + i1.advance(); + } + } + return answer; + } + + /** + * Compute overall OR between bitmaps. + *

+ * (Effectively calls {@link BufferFastAggregation#or}) + * + * @param bitmaps input bitmaps + * @return aggregated bitmap + */ + public static MutableRoaringBitmap or(ImmutableRoaringBitmap... bitmaps) { + return BufferFastAggregation.or(bitmaps); + } + + /** + * Bitwise OR (union) operation. The provided bitmaps are *not* modified. This operation is + * thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static MutableRoaringBitmap or(final MutableRoaringBitmap x1, + final MutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + answer.getMappeableRoaringArray().append(s1, x1.highLowContainer.getContainerAtIndex(pos1) + .or(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer.getKeyAtIndex(pos1), + x1.highLowContainer.getContainerAtIndex(pos1)); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + answer.getMappeableRoaringArray().appendCopy(x2.highLowContainer.getKeyAtIndex(pos2), + x2.highLowContainer.getContainerAtIndex(pos2)); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, length1); + } + return answer; + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) removed. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + */ + public static MutableRoaringBitmap remove(MutableRoaringBitmap rb, final long rangeStart, + final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return rb.clone(); // empty range + } + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + MutableRoaringBitmap answer = new MutableRoaringBitmap(); + ((MutableRoaringArray) answer.highLowContainer).appendCopiesUntil(rb.highLowContainer, + (short) hbStart); + + if (hbStart == hbLast) { + final int i = rb.highLowContainer.getIndex((short) hbStart); + if (i >= 0) { + final MappeableContainer c = + rb.highLowContainer.getContainerAtIndex(i).remove(lbStart, lbLast + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + } + } + ((MutableRoaringArray) answer.highLowContainer).appendCopiesAfter(rb.highLowContainer, + (short) hbLast); + return answer; + } + int ifirst = rb.highLowContainer.getIndex((short) hbStart); + int ilast = rb.highLowContainer.getIndex((short) hbLast); + if ((ifirst >= 0) && (lbStart != 0)) { + final MappeableContainer c = rb.highLowContainer.getContainerAtIndex(ifirst).remove(lbStart, + BufferUtil.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbStart, c); + } + } + if ((ilast >= 0) && (lbLast != BufferUtil.maxLowBitAsInteger())) { + final MappeableContainer c = + rb.highLowContainer.getContainerAtIndex(ilast).remove(0, lbLast + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) answer.highLowContainer).append((short) hbLast, c); + } + } + ((MutableRoaringArray) answer.highLowContainer).appendCopiesAfter(rb.highLowContainer, + (short) hbLast); + return answer; + } + + /** + * Generate a new bitmap with all integers in [rangeStart,rangeEnd) removed. + * + * @param rb initial bitmap (will not be modified) + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @return new bitmap + * @deprecated use the version where longs specify the range + */ + @Deprecated + public static MutableRoaringBitmap remove(MutableRoaringBitmap rb, + final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + return remove(rb, (long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + return remove(rb, rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Bitwise XOR (symmetric difference) operation. The provided bitmaps are *not* modified. This + * operation is thread-safe as long as the provided bitmaps remain unchanged. + * + * @param x1 first bitmap + * @param x2 other bitmap + * @return result of the operation + */ + public static MutableRoaringBitmap xor(final MutableRoaringBitmap x1, + final MutableRoaringBitmap x2) { + final MutableRoaringBitmap answer = new MutableRoaringBitmap(); + int pos1 = 0, pos2 = 0; + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = x1.highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + final MappeableContainer c = x1.highLowContainer.getContainerAtIndex(pos1) + .xor(x2.highLowContainer.getContainerAtIndex(pos2)); + if (c.getCardinality() > 0) { + answer.getMappeableRoaringArray().append(s1, c); + } + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer.getKeyAtIndex(pos1), + x1.highLowContainer.getContainerAtIndex(pos1)); + pos1++; + if (pos1 == length1) { + break main; + } + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + } else if (s1 > s2) { + answer.getMappeableRoaringArray().appendCopy(x2.highLowContainer.getKeyAtIndex(pos2), + x2.highLowContainer.getContainerAtIndex(pos2)); + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + answer.getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } else if (pos2 == length2) { + answer.getMappeableRoaringArray().appendCopy(x1.highLowContainer, pos1, length1); + } + + return answer; + } + + /** + * Assume that one wants to store "cardinality" integers in [0, universe_size), + * this function returns an upper bound on the serialized size in bytes. + *

+ * This function is identical to RoaringBitmap.maximumSerializedSize. + * + * @param cardinality maximal cardinality + * @param universe_size maximal value + * @return upper bound on the serialized size in bytes of the bitmap + */ + public static long maximumSerializedSize(int cardinality, int universe_size) { + return RoaringBitmap.maximumSerializedSize(cardinality, universe_size); + } + + /** + * Set all the specified values to true. This can be expected to be slightly + * faster than calling "add" repeatedly. The provided integers values don't + * have to be in sorted order, but it may be preferable to sort them from a performance point of + * view. + * + * @param dat set values + */ + public void add(final int... dat) { + MutableRoaringArray mra = (MutableRoaringArray) highLowContainer; + MappeableContainer currentcont = null; + short currenthb = 0; + int currentcontainerindex = 0; + int j = 0; + if (j < dat.length) { + int val = dat[j]; + currenthb = BufferUtil.highbits(val); + currentcontainerindex = highLowContainer.getIndex(currenthb); + if (currentcontainerindex >= 0) { + currentcont = highLowContainer.getContainerAtIndex(currentcontainerindex); + MappeableContainer newcont = currentcont.add(BufferUtil.lowbits(val)); + if (newcont != currentcont) { + mra.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currentcontainerindex = -currentcontainerindex - 1; + final MappeableArrayContainer newac = new MappeableArrayContainer(); + currentcont = newac.add(BufferUtil.lowbits(val)); + mra.insertNewKeyValueAt(currentcontainerindex, currenthb, currentcont); + } + j++; + } + for (; j < dat.length; ++j) { + int val = dat[j]; + short newhb = BufferUtil.highbits(val); + if (currenthb == newhb) {// easy case + // this could be quite frequent + MappeableContainer newcont = currentcont.add(BufferUtil.lowbits(val)); + if (newcont != currentcont) { + mra.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currenthb = newhb; + currentcontainerindex = highLowContainer.getIndex(currenthb); + if (currentcontainerindex >= 0) { + currentcont = highLowContainer.getContainerAtIndex(currentcontainerindex); + MappeableContainer newcont = currentcont.add(BufferUtil.lowbits(val)); + if (newcont != currentcont) { + mra.setContainerAtIndex(currentcontainerindex, newcont); + currentcont = newcont; + } + } else { + currentcontainerindex = -currentcontainerindex - 1; + final MappeableArrayContainer newac = new MappeableArrayContainer(); + currentcont = newac.add(BufferUtil.lowbits(val)); + mra.insertNewKeyValueAt(currentcontainerindex, currenthb, currentcont); + } + } + } + } + + /** + * Add the value to the container (set the value to "true"), whether it already appears or not. + * + * @param x integer value + */ + @Override + public void add(final int x) { + final short hb = BufferUtil.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + getMappeableRoaringArray().setContainerAtIndex(i, + highLowContainer.getContainerAtIndex(i).add(BufferUtil.lowbits(x))); + } else { + final MappeableArrayContainer newac = new MappeableArrayContainer(); + getMappeableRoaringArray().insertNewKeyValueAt(-i - 1, hb, newac.add(BufferUtil.lowbits(x))); + } + } + + /** + * Add to the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void add(final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return; // empty range + } + + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + for (int hb = hbStart; hb <= hbLast; ++hb) { + + // first container may contain partial range + final int containerStart = (hb == hbStart) ? lbStart : 0; + // last container may contain partial range + final int containerLast = (hb == hbLast) ? lbLast : BufferUtil.maxLowBitAsInteger(); + final int i = highLowContainer.getIndex((short) hb); + + if (i >= 0) { + final MappeableContainer c = + highLowContainer.getContainerAtIndex(i).iadd(containerStart, containerLast + 1); + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(i, c); + } else { + ((MutableRoaringArray) highLowContainer).insertNewKeyValueAt(-i - 1, (short) hb, + MappeableContainer.rangeOfOnes(containerStart, containerLast + 1)); + } + } + } + + /** + * Add to the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void add(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + add((long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + add(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * In-place bitwise AND (intersection) operation. The current bitmap is modified. + * + * @param array other bitmap + */ + public void and(final ImmutableRoaringBitmap array) { + int pos1 = 0, pos2 = 0, intersectionSize = 0; + final int length1 = highLowContainer.size(), length2 = array.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = highLowContainer.getKeyAtIndex(pos1); + final short s2 = array.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final MappeableContainer c1 = highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = array.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.iand(c2); + if (c.getCardinality() > 0) { + getMappeableRoaringArray().replaceKeyAndContainerAtIndex(intersectionSize++, s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1 = highLowContainer.advanceUntil(s2, pos1); + } else { // s1 > s2 + pos2 = array.highLowContainer.advanceUntil(s1, pos2); + } + } + getMappeableRoaringArray().resize(intersectionSize); + } + + /** + * In-place bitwise ANDNOT (difference) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void andNot(final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0, intersectionSize = 0; + final int length1 = highLowContainer.size(), length2 = x2.highLowContainer.size(); + + while (pos1 < length1 && pos2 < length2) { + final short s1 = highLowContainer.getKeyAtIndex(pos1); + final short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final MappeableContainer c1 = highLowContainer.getContainerAtIndex(pos1); + final MappeableContainer c2 = x2.highLowContainer.getContainerAtIndex(pos2); + final MappeableContainer c = c1.iandNot(c2); + if (c.getCardinality() > 0) { + getMappeableRoaringArray().replaceKeyAndContainerAtIndex(intersectionSize++, s1, c); + } + ++pos1; + ++pos2; + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + if (pos1 != intersectionSize) { + final MappeableContainer c1 = highLowContainer.getContainerAtIndex(pos1); + getMappeableRoaringArray().replaceKeyAndContainerAtIndex(intersectionSize, s1, c1); + } + ++intersectionSize; + ++pos1; + } else { // s1 > s2 + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos1 < length1) { + getMappeableRoaringArray().copyRange(pos1, length1, intersectionSize); + intersectionSize += length1 - pos1; + } + getMappeableRoaringArray().resize(intersectionSize); + } + + /** + * Add the value to the container (set the value to "true"), whether it already appears or not. + * + * @param x integer value + * @return true if the added int wasn't already contained in the bitmap. False otherwise. + */ + public boolean checkedAdd(final int x) { + final short hb = BufferUtil.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + MappeableContainer C = highLowContainer.getContainerAtIndex(i); + int oldcard = C.getCardinality(); + C = C.add(BufferUtil.lowbits(x)); + getMappeableRoaringArray().setContainerAtIndex(i, C); + return C.getCardinality() > oldcard; + } else { + final MappeableArrayContainer newac = new MappeableArrayContainer(); + getMappeableRoaringArray().insertNewKeyValueAt(-i - 1, hb, newac.add(BufferUtil.lowbits(x))); + return true; + } + } + + /** + * If present remove the specified integer (effectively, sets its bit value to false) + * + * @param x integer value representing the index in a bitmap + * @return true if the unset bit was already in the bitmap + */ + public boolean checkedRemove(final int x) { + final short hb = BufferUtil.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i < 0) { + return false; + } + MappeableContainer C = highLowContainer.getContainerAtIndex(i); + int oldcard = C.getCardinality(); + C.remove(BufferUtil.lowbits(x)); + int newcard = C.getCardinality(); + if (newcard == oldcard) { + return false; + } + if (newcard > 0) { + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(i, C); + } else { + ((MutableRoaringArray) highLowContainer).removeAtIndex(i); + } + return true; + } + + /** + * reset to an empty bitmap; result occupies as much space a newly created bitmap. + */ + public void clear() { + highLowContainer = new MutableRoaringArray(); // lose references + } + + @Override + public MutableRoaringBitmap clone() { + final MutableRoaringBitmap x = (MutableRoaringBitmap) super.clone(); + x.highLowContainer = highLowContainer.clone(); + return x; + + } + + /** + * Deserialize the bitmap (retrieve from the input stream). The current bitmap is overwritten. + * + * @param in the DataInput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void deserialize(DataInput in) throws IOException { + getMappeableRoaringArray().deserialize(in); + } + + /** + * Add the value if it is not already present, otherwise remove it. + * + * @param x integer value + */ + public void flip(final int x) { + final short hb = BufferUtil.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i >= 0) { + MappeableContainer c = highLowContainer.getContainerAtIndex(i); + c = c.flip(BufferUtil.lowbits(x)); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(i, c); + } else { + ((MutableRoaringArray) highLowContainer).removeAtIndex(i); + } + } else { + final MappeableArrayContainer newac = new MappeableArrayContainer(); + ((MutableRoaringArray) highLowContainer).insertNewKeyValueAt(-i - 1, hb, + newac.add(BufferUtil.lowbits(x))); + } + } + + /** + * Modifies the current bitmap by complementing the bits in the given range, from rangeStart + * (inclusive) rangeEnd (exclusive). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void flip(final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return; // empty range + } + + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + + for (int hb = hbStart; hb <= hbLast; ++hb) { + // first container may contain partial range + final int containerStart = (hb == hbStart) ? lbStart : 0; + // last container may contain partial range + final int containerLast = (hb == hbLast) ? lbLast : BufferUtil.maxLowBitAsInteger(); + final int i = highLowContainer.getIndex((short) hb); + + if (i >= 0) { + final MappeableContainer c = + highLowContainer.getContainerAtIndex(i).inot(containerStart, containerLast + 1); + if (c.getCardinality() > 0) { + getMappeableRoaringArray().setContainerAtIndex(i, c); + } else { + getMappeableRoaringArray().removeAtIndex(i); + } + } else { + getMappeableRoaringArray().insertNewKeyValueAt(-i - 1, (short) hb, + MappeableContainer.rangeOfOnes(containerStart, containerLast + 1)); + } + } + } + + /** + * Modifies the current bitmap by complementing the bits in the given range, from rangeStart + * (inclusive) rangeEnd (exclusive). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void flip(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + flip((long) rangeStart, (long) rangeEnd); + } else { + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + flip(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + } + + /** + * @return a mutable copy of this bitmap + */ + public MutableRoaringArray getMappeableRoaringArray() { + return (MutableRoaringArray) highLowContainer; + } + + @Override + public int hashCode() { + return highLowContainer.hashCode(); + } + + /** + * iterate over the positions of the true values. + * + * @return the iterator + */ + @Override + public Iterator iterator() { + return new Iterator() { + private int hs = 0; + + private ShortIterator iter; + + private int pos = 0; + + private int x; + + @Override + public boolean hasNext() { + return pos < MutableRoaringBitmap.this.highLowContainer.size(); + } + + private Iterator init() { + if (pos < MutableRoaringBitmap.this.highLowContainer.size()) { + iter = MutableRoaringBitmap.this.highLowContainer.getContainerAtIndex(pos) + .getShortIterator(); + hs = BufferUtil + .toIntUnsigned(MutableRoaringBitmap.this.highLowContainer.getKeyAtIndex(pos)) << 16; + } + return this; + } + + @Override + public Integer next() { + x = BufferUtil.toIntUnsigned(iter.next()) | hs; + if (!iter.hasNext()) { + ++pos; + init(); + } + return x; + } + + @Override + public void remove() { + if ((x & hs) == hs) {// still in same container + iter.remove(); + } else { + MutableRoaringBitmap.this.remove(x); + } + } + + }.init(); + } + + // call repairAfterLazy on result, eventually + // important: x2 should not have been computed lazily + protected void lazyor(final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + getMappeableRoaringArray().setContainerAtIndex(pos1, highLowContainer + .getContainerAtIndex(pos1).lazyIOR(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + getMappeableRoaringArray().insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } + } + + // call repairAfterLazy on result, eventually + // important: x2 should not have been computed lazily + // this method is like lazyor except that it will convert + // the current container to a bitset + public void naivelazyor(final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + MappeableContainer c1 = highLowContainer + .getContainerAtIndex(pos1); + c1 = c1.toBitmapContainer(); + getMappeableRoaringArray().setContainerAtIndex(pos1, + c1.lazyIOR(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + getMappeableRoaringArray().insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } + } + + /** + * In-place bitwise OR (union) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void or(final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + getMappeableRoaringArray().setContainerAtIndex(pos1, highLowContainer + .getContainerAtIndex(pos1).ior(x2.highLowContainer.getContainerAtIndex(pos2))); + pos1++; + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + getMappeableRoaringArray().insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + getMappeableRoaringArray().readExternal(in); + + } + + /** + * If present remove the specified integers (effectively, sets its bit value to false) + * + * @param x integer value representing the index in a bitmap + */ + @Override + public void remove(final int x) { + final short hb = BufferUtil.highbits(x); + final int i = highLowContainer.getIndex(hb); + if (i < 0) { + return; + } + getMappeableRoaringArray().setContainerAtIndex(i, + highLowContainer.getContainerAtIndex(i).remove(BufferUtil.lowbits(x))); + if (highLowContainer.getContainerAtIndex(i).getCardinality() == 0) { + getMappeableRoaringArray().removeAtIndex(i); + } + } + + /** + * Remove from the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + */ + public void remove(final long rangeStart, final long rangeEnd) { + rangeSanityCheck(rangeStart, rangeEnd); + if (rangeStart >= rangeEnd) { + return; // empty range + } + final int hbStart = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeStart)); + final int lbStart = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeStart)); + final int hbLast = BufferUtil.toIntUnsigned(BufferUtil.highbits(rangeEnd - 1)); + final int lbLast = BufferUtil.toIntUnsigned(BufferUtil.lowbits(rangeEnd - 1)); + if (hbStart == hbLast) { + final int i = highLowContainer.getIndex((short) hbStart); + if (i < 0) { + return; + } + final MappeableContainer c = + highLowContainer.getContainerAtIndex(i).iremove(lbStart, lbLast + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(i, c); + } else { + ((MutableRoaringArray) highLowContainer).removeAtIndex(i); + } + return; + } + int ifirst = highLowContainer.getIndex((short) hbStart); + int ilast = highLowContainer.getIndex((short) hbLast); + if (ifirst >= 0) { + if (lbStart != 0) { + final MappeableContainer c = highLowContainer.getContainerAtIndex(ifirst).iremove(lbStart, + BufferUtil.maxLowBitAsInteger() + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(ifirst, c); + ifirst++; + } + } + } else { + ifirst = -ifirst - 1; + } + if (ilast >= 0) { + if (lbLast != BufferUtil.maxLowBitAsInteger()) { + final MappeableContainer c = + highLowContainer.getContainerAtIndex(ilast).iremove(0, lbLast + 1); + if (c.getCardinality() > 0) { + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(ilast, c); + } else { + ilast++; + } + } else { + ilast++; + } + } else { + ilast = -ilast - 1; + } + ((MutableRoaringArray) highLowContainer).removeIndexRange(ifirst, ilast); + } + + /** + * Remove from the current bitmap all integers in [rangeStart,rangeEnd). + * + * @param rangeStart inclusive beginning of range + * @param rangeEnd exclusive ending of range + * @deprecated use the version where longs specify the range + */ + @Deprecated + public void remove(final int rangeStart, final int rangeEnd) { + if (rangeStart >= 0) { + remove((long) rangeStart, (long) rangeEnd); + } + // rangeStart being -ve and rangeEnd being positive is not expected) + // so assume both -ve + remove(rangeStart & 0xFFFFFFFFL, rangeEnd & 0xFFFFFFFFL); + } + + /** + * Remove run-length encoding even when it is more space efficient + * + * @return whether a change was applied + */ + public boolean removeRunCompression() { + boolean answer = false; + for (int i = 0; i < this.highLowContainer.size(); i++) { + MappeableContainer c = getMappeableRoaringArray().getContainerAtIndex(i); + if (c instanceof MappeableRunContainer) { + MappeableContainer mc = + ((MappeableRunContainer) c).toBitmapOrArrayContainer(c.getCardinality()); + getMappeableRoaringArray().setContainerAtIndex(i, mc); + answer = true; + } + } + return answer; + } + + // to be used with lazyor + public void repairAfterLazy() { + for (int k = 0; k < highLowContainer.size(); ++k) { + MappeableContainer c = highLowContainer.getContainerAtIndex(k); + ((MutableRoaringArray) highLowContainer).setContainerAtIndex(k, c.repairAfterLazy()); + } + } + + /** + * Use a run-length encoding where it is estimated as more space efficient + * + * @return whether a change was applied + */ + public boolean runOptimize() { + boolean answer = false; + for (int i = 0; i < this.highLowContainer.size(); i++) { + MappeableContainer c = getMappeableRoaringArray().getContainerAtIndex(i).runOptimize(); + if (c instanceof MappeableRunContainer) { + answer = true; + } + getMappeableRoaringArray().setContainerAtIndex(i, c); + } + return answer; + } + + /** + * Convenience method, effectively casts the object to an object of class ImmutableRoaringBitmap. + *

+ *

+ * This function is equivalent to : + *

+ *

+     * {@code
+     *       (ImmutableRoaringBitmap) bitmap
+     * }
+     * 
+ * + * @return a cast of this object + */ + public ImmutableRoaringBitmap toImmutableRoaringBitmap() { + return this; + + } + + /** + * Recover allocated but unused memory. + */ + @Override + public void trim() { + for (int i = 0; i < this.highLowContainer.size(); i++) { + this.highLowContainer.getContainerAtIndex(i).trim(); + } + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + getMappeableRoaringArray().writeExternal(out); + } + + /** + * In-place bitwise XOR (symmetric difference) operation. The current bitmap is modified. + * + * @param x2 other bitmap + */ + public void xor(final ImmutableRoaringBitmap x2) { + int pos1 = 0, pos2 = 0; + int length1 = highLowContainer.size(); + final int length2 = x2.highLowContainer.size(); + + main: + if (pos1 < length1 && pos2 < length2) { + short s1 = highLowContainer.getKeyAtIndex(pos1); + short s2 = x2.highLowContainer.getKeyAtIndex(pos2); + + while (true) { + if (s1 == s2) { + final MappeableContainer c = highLowContainer.getContainerAtIndex(pos1) + .ixor(x2.highLowContainer.getContainerAtIndex(pos2)); + if (c.getCardinality() > 0) { + this.getMappeableRoaringArray().setContainerAtIndex(pos1, c); + pos1++; + } else { + getMappeableRoaringArray().removeAtIndex(pos1); + --length1; + } + pos2++; + if ((pos1 == length1) || (pos2 == length2)) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } else if (Util.compareUnsigned(s1, s2) < 0) { // s1 < s2 + pos1++; + if (pos1 == length1) { + break main; + } + s1 = highLowContainer.getKeyAtIndex(pos1); + } else { // s1 > s2 + getMappeableRoaringArray().insertNewKeyValueAt(pos1, s2, + x2.highLowContainer.getContainerAtIndex(pos2).clone()); + pos1++; + length1++; + pos2++; + if (pos2 == length2) { + break main; + } + s2 = x2.highLowContainer.getKeyAtIndex(pos2); + } + } + } + if (pos1 == length1) { + getMappeableRoaringArray().appendCopy(x2.highLowContainer, pos2, length2); + } + } + + +} diff --git a/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/PointableRoaringArray.java b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/PointableRoaringArray.java new file mode 100644 index 000000000..c9cd8fa7d --- /dev/null +++ b/fine-roaringbitmap/src/com/fr/third/bitmap/roaringbitmap/buffer/PointableRoaringArray.java @@ -0,0 +1,103 @@ +/* + * (c) the authors Licensed under the Apache License, Version 2.0. + */ + +package com.fr.third.bitmap.roaringbitmap.buffer; + +import java.io.DataOutput; +import java.io.IOException; + +/** + * Generic interface for the array underlying roaring bitmap classes. + */ +public interface PointableRoaringArray extends Cloneable { + /** + * Find the smallest integer index larger than pos such that getKeyAtIndex(index)>=x. If none + * can be found, return size. + * + * @param x minimal value + * @param pos index to exceed + * @return the smallest index greater than pos such that getKeyAtIndex(index) is at least as large + * as min, or size if it is not possible. + */ + int advanceUntil(short x, int pos); + + /** + * Create an independent copy of the underlying array + * + * @return a copy + */ + PointableRoaringArray clone(); + + /** + * Returns the cardinality of the container at the given index. This method is expected to be + * fast. + * + * @param i index + * @return the cardinality + */ + public int getCardinality(int i); + + /** + * @param x 16-bit key + * @return matching container + */ + MappeableContainer getContainer(short x); + + /** + * @param i index + * @return matching container + */ + MappeableContainer getContainerAtIndex(int i); + + /** + * @return a ContainerPointer to iterator over the array + */ + MappeableContainerPointer getContainerPointer(); + + /** + * @param startIndex starting index + * @return a ContainerPointer to iterator over the array initially positioned at startIndex + */ + MappeableContainerPointer getContainerPointer(int startIndex); + + /** + * @param x 16-bit key + * @return corresponding index + */ + int getIndex(short x); + + /** + * @param i the index + * @return 16-bit key at the index + */ + short getKeyAtIndex(int i); + + /** + * Check whether this bitmap has had its runs compressed. + * + * @return whether this bitmap has run compression + */ + public boolean hasRunCompression(); + + /** + * Serialize. + *

+ * The current bitmap is not modified. + * + * @param out the DataOutput stream + * @throws IOException Signals that an I/O exception has occurred. + */ + public void serialize(DataOutput out) throws IOException; + + /** + * @return the size that the data structure occupies on disk + */ + public int serializedSizeInBytes(); + + + /** + * @return number of keys + */ + int size(); +}