Browse Source

无JIRA任务、jgit gc 需要googlecode、之前没有打包它

research/11.0
Hugh.C 5 years ago
parent
commit
a7a77e35c7
  1. BIN
      fine-jgit/lib/JavaEWAH-0.7.9.jar
  2. 272
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/dfs/RemoteRepository.java
  3. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java
  4. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java
  5. 4
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java
  6. 5
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java
  7. 4
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java
  8. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java
  9. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java
  10. 4
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java
  11. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java
  12. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java
  13. 2
      fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java
  14. 106
      fine-jgit/src/com/fr/third/googlecode/javaewah/BitCounter.java
  15. 71
      fine-jgit/src/com/fr/third/googlecode/javaewah/BitmapStorage.java
  16. 151
      fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedIterator.java
  17. 175
      fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedRunningLengthWord.java
  18. 24
      fine-jgit/src/com/fr/third/googlecode/javaewah/CloneableIterator.java
  19. 1631
      fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHCompressedBitmap.java
  20. 98
      fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHIterator.java
  21. 436
      fine-jgit/src/com/fr/third/googlecode/javaewah/FastAggregation.java
  22. 31
      fine-jgit/src/com/fr/third/googlecode/javaewah/IntIterator.java
  23. 87
      fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorImpl.java
  24. 89
      fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorOverIteratingRLW.java
  25. 276
      fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingBufferedRunningLengthWord.java
  26. 49
      fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingRLW.java
  27. 616
      fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorAggregation.java
  28. 132
      fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorUtil.java
  29. 61
      fine-jgit/src/com/fr/third/googlecode/javaewah/LogicalElement.java
  30. 92
      fine-jgit/src/com/fr/third/googlecode/javaewah/NonEmptyVirtualStorage.java
  31. 152
      fine-jgit/src/com/fr/third/googlecode/javaewah/RunningLengthWord.java
  32. 284
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark.java
  33. 212
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark32.java
  34. 130
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection.java
  35. 130
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection32.java
  36. 164
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion.java
  37. 165
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion32.java
  38. 134
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR.java
  39. 137
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR32.java
  40. 78
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/ClusteredDataGenerator.java
  41. 114
      fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/UniformDataGenerator.java
  42. 102
      fine-jgit/src/com/fr/third/googlecode/javaewah32/BitCounter32.java
  43. 60
      fine-jgit/src/com/fr/third/googlecode/javaewah32/BitmapStorage32.java
  44. 152
      fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedIterator32.java
  45. 174
      fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedRunningLengthWord32.java
  46. 1608
      fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHCompressedBitmap32.java
  47. 98
      fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHIterator32.java
  48. 377
      fine-jgit/src/com/fr/third/googlecode/javaewah32/FastAggregation32.java
  49. 90
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorImpl32.java
  50. 91
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java
  51. 274
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java
  52. 42
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingRLW32.java
  53. 601
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorAggregation32.java
  54. 135
      fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorUtil32.java
  55. 87
      fine-jgit/src/com/fr/third/googlecode/javaewah32/NonEmptyVirtualStorage32.java
  56. 152
      fine-jgit/src/com/fr/third/googlecode/javaewah32/RunningLengthWord32.java

BIN
fine-jgit/lib/JavaEWAH-0.7.9.jar

Binary file not shown.

272
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/dfs/RemoteRepository.java

@ -0,0 +1,272 @@
package com.fr.third.eclipse.jgit.internal.storage.dfs;
import com.fr.third.eclipse.jgit.internal.storage.pack.PackExt;
import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.fr.third.eclipse.jgit.lib.Ref;
import com.fr.third.eclipse.jgit.revwalk.RevWalk;
import com.fr.third.eclipse.jgit.util.RefList;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
public class RemoteRepository extends DfsRepository {
private static final AtomicInteger packId = new AtomicInteger();
private final DfsObjDatabase objdb;
private final DfsRefDatabase refdb;
/**
* Initialize a new remote repository.
*
* @param repoDesc
* description of the repository.
* @since 2.0
*/
public RemoteRepository(DfsRepositoryDescription repoDesc) {
super(new DfsRepositoryBuilder<DfsRepositoryBuilder, RemoteRepository>() {
@Override
public RemoteRepository build() throws IOException {
throw new UnsupportedOperationException();
}
}.setRepositoryDescription(repoDesc));
objdb = new RemoteRepository.RemoteObjDatabase(this);
refdb = new RemoteRepository.RemoteRefDatabase();
}
@Override
public DfsObjDatabase getObjectDatabase() {
return objdb;
}
@Override
public DfsRefDatabase getRefDatabase() {
return refdb;
}
private class RemoteObjDatabase extends DfsObjDatabase {
private List<DfsPackDescription> packs = new ArrayList<DfsPackDescription>();
RemoteObjDatabase(DfsRepository repo) {
super(repo, new DfsReaderOptions());
}
@Override
protected synchronized List<DfsPackDescription> listPacks() {
return packs;
}
@Override
protected DfsPackDescription newPack(PackSource source) {
int id = packId.incrementAndGet();
DfsPackDescription desc = new RemoteRepository.MemPack(
"pack-" + id + "-" + source.name(), //$NON-NLS-1$ //$NON-NLS-2$
getRepository().getDescription());
return desc.setPackSource(source);
}
@Override
protected synchronized void commitPackImpl(
Collection<DfsPackDescription> desc,
Collection<DfsPackDescription> replace) {
List<DfsPackDescription> n;
n = new ArrayList<DfsPackDescription>(desc.size() + packs.size());
n.addAll(desc);
n.addAll(packs);
if (replace != null)
n.removeAll(replace);
packs = n;
}
@Override
protected void rollbackPack(Collection<DfsPackDescription> desc) {
// Do nothing. Pack is not recorded until commitPack.
}
@Override
protected ReadableChannel openFile(DfsPackDescription desc, PackExt ext)
throws FileNotFoundException, IOException {
RemoteRepository.MemPack memPack = (RemoteRepository.MemPack) desc;
byte[] file = memPack.fileMap.get(ext);
if (file == null)
throw new FileNotFoundException(desc.getFileName(ext));
return new RemoteRepository.ByteArrayReadableChannel(file);
}
@Override
protected DfsOutputStream writeFile(
DfsPackDescription desc, final PackExt ext) throws IOException {
final RemoteRepository.MemPack memPack = (RemoteRepository.MemPack) desc;
return new RemoteRepository.Out() {
@Override
public void flush() {
memPack.fileMap.put(ext, getData());
}
};
}
}
private static class MemPack extends DfsPackDescription {
private final Map<PackExt, byte[]>
fileMap = new HashMap<PackExt, byte[]>();
MemPack(String name, DfsRepositoryDescription repoDesc) {
super(repoDesc, name);
}
}
private abstract static class Out extends DfsOutputStream {
private final ByteArrayOutputStream dst = new ByteArrayOutputStream();
private byte[] data;
@Override
public void write(byte[] buf, int off, int len) {
data = null;
dst.write(buf, off, len);
}
@Override
public int read(long position, ByteBuffer buf) {
byte[] d = getData();
int n = Math.min(buf.remaining(), d.length - (int) position);
if (n == 0)
return -1;
buf.put(d, (int) position, n);
return n;
}
byte[] getData() {
if (data == null)
data = dst.toByteArray();
return data;
}
@Override
public abstract void flush();
@Override
public void close() {
flush();
}
}
private static class ByteArrayReadableChannel implements ReadableChannel {
private final byte[] data;
private int position;
private boolean open = true;
ByteArrayReadableChannel(byte[] buf) {
data = buf;
}
public int read(ByteBuffer dst) {
int n = Math.min(dst.remaining(), data.length - position);
if (n == 0)
return -1;
dst.put(data, position, n);
position += n;
return n;
}
public void close() {
open = false;
}
public boolean isOpen() {
return open;
}
public long position() {
return position;
}
public void position(long newPosition) {
position = (int) newPosition;
}
public long size() {
return data.length;
}
public int blockSize() {
return 0;
}
}
private class RemoteRefDatabase extends DfsRefDatabase {
private final ConcurrentMap<String, Ref> refs = new ConcurrentHashMap<String, Ref>();
RemoteRefDatabase() {
super(RemoteRepository.this);
}
@Override
protected RefCache scanAllRefs() throws IOException {
RefList.Builder<Ref> ids = new RefList.Builder<Ref>();
RefList.Builder<Ref> sym = new RefList.Builder<Ref>();
for (Ref ref : refs.values()) {
if (ref.isSymbolic())
sym.add(ref);
ids.add(ref);
}
ids.sort();
sym.sort();
return new RefCache(ids.toRefList(), sym.toRefList());
}
@Override
protected boolean compareAndPut(Ref oldRef, Ref newRef)
throws IOException {
ObjectId id = newRef.getObjectId();
if (id != null) {
RevWalk rw = new RevWalk(getRepository());
try {
// Validate that the target exists in a new RevWalk, as the RevWalk
// from the RefUpdate might be reading back unflushed objects.
rw.parseAny(id);
} finally {
rw.release();
}
}
String name = newRef.getName();
if (oldRef == null || oldRef.getStorage() == Ref.Storage.NEW)
return refs.putIfAbsent(name, newRef) == null;
Ref cur = refs.get(name);
if (cur != null && eq(cur, oldRef))
return refs.replace(name, cur, newRef);
else
return false;
}
@Override
protected boolean compareAndRemove(Ref oldRef) throws IOException {
String name = oldRef.getName();
Ref cur = refs.get(name);
if (cur != null && eq(cur, oldRef))
return refs.remove(name, cur);
else
return false;
}
private boolean eq(Ref a, Ref b) {
if (a.getObjectId() == null && b.getObjectId() == null)
return true;
if (a.getObjectId() != null)
return a.getObjectId().equals(b.getObjectId());
return false;
}
}
}

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BasePackBitmapIndex.java

@ -45,7 +45,7 @@ package com.fr.third.eclipse.jgit.internal.storage.file;
import com.fr.third.eclipse.jgit.lib.AnyObjectId; import com.fr.third.eclipse.jgit.lib.AnyObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
/** /**
* Base implementation of the PackBitmapIndex. * Base implementation of the PackBitmapIndex.

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitSet.java

@ -43,7 +43,7 @@
package com.fr.third.eclipse.jgit.internal.storage.file; package com.fr.third.eclipse.jgit.internal.storage.file;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import java.util.Arrays; import java.util.Arrays;

4
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/BitmapIndexImpl.java

@ -51,8 +51,8 @@ import com.fr.third.eclipse.jgit.lib.Constants;
import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap;
import com.fr.third.eclipse.jgit.util.BlockList; import com.fr.third.eclipse.jgit.util.BlockList;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.googlecode.javaewah.IntIterator; import com.fr.third.googlecode.javaewah.IntIterator;
import java.text.MessageFormat; import java.text.MessageFormat;
import java.util.Iterator; import java.util.Iterator;

5
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/GC.java

@ -181,8 +181,9 @@ public class GC {
* @param newPacks * @param newPacks
*/ */
private void deleteOldPacks(Collection<PackFile> oldPacks, private void deleteOldPacks(Collection<PackFile> oldPacks,
Collection<PackFile> newPacks) { Collection<PackFile> newPacks) {
oldPackLoop: for (PackFile oldPack : oldPacks) { oldPackLoop:
for (PackFile oldPack : oldPacks) {
String oldName = oldPack.getPackName(); String oldName = oldPack.getPackName();
// check whether an old pack file is also among the list of new // check whether an old pack file is also among the list of new
// pack files. Then we must not delete it. // pack files. Then we must not delete it.

4
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/InflatingBitSet.java

@ -43,8 +43,8 @@
package com.fr.third.eclipse.jgit.internal.storage.file; package com.fr.third.eclipse.jgit.internal.storage.file;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.googlecode.javaewah.IntIterator; import com.fr.third.googlecode.javaewah.IntIterator;
/** /**
* A wrapper around the EWAHCompressedBitmap optimized for the contains * A wrapper around the EWAHCompressedBitmap optimized for the contains

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndex.java

@ -47,7 +47,7 @@ import com.fr.third.eclipse.jgit.errors.CorruptObjectException;
import com.fr.third.eclipse.jgit.internal.JGitText; import com.fr.third.eclipse.jgit.internal.JGitText;
import com.fr.third.eclipse.jgit.lib.AnyObjectId; import com.fr.third.eclipse.jgit.lib.AnyObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexBuilder.java

@ -50,7 +50,7 @@ import com.fr.third.eclipse.jgit.lib.Constants;
import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap;
import com.fr.third.eclipse.jgit.util.BlockList; import com.fr.third.eclipse.jgit.util.BlockList;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.fr.third.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap; import com.fr.third.eclipse.jgit.internal.storage.file.BitmapIndexImpl.CompressedBitmap;
import com.fr.third.eclipse.jgit.lib.BitmapIndex.Bitmap; import com.fr.third.eclipse.jgit.lib.BitmapIndex.Bitmap;
import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;

4
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexRemapper.java

@ -47,8 +47,8 @@ import com.fr.third.eclipse.jgit.lib.AnyObjectId;
import com.fr.third.eclipse.jgit.lib.BitmapIndex; import com.fr.third.eclipse.jgit.lib.BitmapIndex;
import com.fr.third.eclipse.jgit.lib.ObjectId; import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.googlecode.javaewah.IntIterator; import com.fr.third.googlecode.javaewah.IntIterator;
import com.fr.third.eclipse.jgit.internal.storage.file.BasePackBitmapIndex.StoredBitmap; import com.fr.third.eclipse.jgit.internal.storage.file.BasePackBitmapIndex.StoredBitmap;
import java.util.Collections; import java.util.Collections;

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexV1.java

@ -50,7 +50,7 @@ import com.fr.third.eclipse.jgit.lib.ObjectId;
import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap; import com.fr.third.eclipse.jgit.lib.ObjectIdOwnerMap;
import com.fr.third.eclipse.jgit.util.IO; import com.fr.third.eclipse.jgit.util.IO;
import com.fr.third.eclipse.jgit.util.NB; import com.fr.third.eclipse.jgit.util.NB;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import java.io.DataInput; import java.io.DataInput;
import java.io.IOException; import java.io.IOException;

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/file/PackBitmapIndexWriterV1.java

@ -46,7 +46,7 @@ package com.fr.third.eclipse.jgit.internal.storage.file;
import com.fr.third.eclipse.jgit.internal.JGitText; import com.fr.third.eclipse.jgit.internal.JGitText;
import com.fr.third.eclipse.jgit.lib.Constants; import com.fr.third.eclipse.jgit.lib.Constants;
import com.fr.third.eclipse.jgit.util.io.SafeBufferedOutputStream; import com.fr.third.eclipse.jgit.util.io.SafeBufferedOutputStream;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.fr.third.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder.StoredEntry; import com.fr.third.eclipse.jgit.internal.storage.file.PackBitmapIndexBuilder.StoredEntry;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;

2
fine-jgit/src/com/fr/third/eclipse/jgit/internal/storage/pack/PackWriterBitmapPreparer.java

@ -60,7 +60,7 @@ import com.fr.third.eclipse.jgit.revwalk.RevCommit;
import com.fr.third.eclipse.jgit.revwalk.RevObject; import com.fr.third.eclipse.jgit.revwalk.RevObject;
import com.fr.third.eclipse.jgit.revwalk.RevWalk; import com.fr.third.eclipse.jgit.revwalk.RevWalk;
import com.fr.third.eclipse.jgit.util.BlockList; import com.fr.third.eclipse.jgit.util.BlockList;
import com.googlecode.javaewah.EWAHCompressedBitmap; import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder; import com.fr.third.eclipse.jgit.lib.BitmapIndex.BitmapBuilder;
import java.io.IOException; import java.io.IOException;

106
fine-jgit/src/com/fr/third/googlecode/javaewah/BitCounter.java

@ -0,0 +1,106 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* BitCounter is a fake bitset data structure. Instead of storing the actual
* data, it only records the number of set bits.
*
* @since 0.4.0
* @author David McIntosh
*/
public final class BitCounter implements BitmapStorage {
/**
* Virtually add words directly to the bitmap
*
* @param newdata
* the word
*/
@Override
public void add(final long newdata) {
this.oneBits += Long.bitCount(newdata);
return;
}
/**
* virtually add several literal words.
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
@Override
public void addStreamOfLiteralWords(long[] data, int start, int number) {
for (int i = start; i < start + number; i++) {
add(data[i]);
}
return;
}
/**
* virtually add many zeroes or ones.
*
* @param v
* zeros or ones
* @param number
* how many to words add
*/
@Override
public void addStreamOfEmptyWords(boolean v, long number) {
if (v) {
this.oneBits += number * EWAHCompressedBitmap.wordinbits;
}
return;
}
/**
* virtually add several negated literal words.
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
// @Override : causes problems with Java 1.5
@Override
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) {
for (int i = start; i < start + number; i++) {
add(~data[i]);
}
return;
}
/**
* As you act on this class, it records the number of set (true) bits.
*
* @return number of set bits
*/
public int getCount() {
return this.oneBits;
}
/**
* should directly set the sizeinbits field, but is effectively ignored in
* this class.
*
* @param bits
* number of bits
*/
// @Override : causes problems with Java 1.5
@Override
public void setSizeInBits(int bits) {
// no action
}
private int oneBits;
}

71
fine-jgit/src/com/fr/third/googlecode/javaewah/BitmapStorage.java

@ -0,0 +1,71 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Low level bitset writing methods.
*
* @since 0.4.0
* @author David McIntosh
*/
public interface BitmapStorage {
/**
* Adding words directly to the bitmap (for expert use).
*
* This is normally how you add data to the array. So you add bits in streams
* of 8*8 bits.
*
* @param newdata
* the word
*/
public void add(final long newdata);
/**
* if you have several literal words to copy over, this might be faster.
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
public void addStreamOfLiteralWords(final long[] data, final int start,
final int number);
/**
* For experts: You want to add many zeroes or ones? This is the method you
* use.
*
* @param v
* zeros or ones
* @param number
* how many to words add
*/
public void addStreamOfEmptyWords(final boolean v, final long number);
/**
* Like "addStreamOfLiteralWords" but negates the words being added.
*
* @param data
* the literal words
* @param start
* the starting point in the array
* @param number
* the number of literal words to add
*/
public void addStreamOfNegatedLiteralWords(long[] data, final int start,
final int number);
/**
* directly set the sizeinbits field
*
* @param bits
* number of bits
*/
public void setSizeInBits(final int bits);
}

151
fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedIterator.java

@ -0,0 +1,151 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This class can be used to iterate over blocks of bitmap data.
*
* @author Daniel Lemire
*
*/
public class BufferedIterator implements IteratingRLW {
/**
* Instantiates a new iterating buffered running length word.
*
* @param iterator iterator
*/
public BufferedIterator(final CloneableIterator<EWAHIterator> iterator) {
this.masteriterator = iterator;
if(this.masteriterator.hasNext()) {
this.iterator = this.masteriterator.next();
this.brlw = new BufferedRunningLengthWord(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
}
/**
* Discard first words, iterating to the next running length word if needed.
*
* @param x the number of words to be discarded
*/
@Override
public void discardFirstWords(long x) {
while (x > 0) {
if (this.brlw.RunningLength > x) {
this.brlw.RunningLength -= x;
return;
}
x -= this.brlw.RunningLength;
this.brlw.RunningLength = 0;
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x;
this.literalWordStartPosition += toDiscard;
this.brlw.NumberOfLiteralWords -= toDiscard;
x -= toDiscard;
if ((x > 0) || (this.brlw.size() == 0)) {
if (!this.next()) {
break;
}
}
}
}
/**
* Move to the next RunningLengthWord
* @return whether the move was possible
*/
@Override
public boolean next() {
if (!this.iterator.hasNext()) {
if(!reload()) {
this.brlw.NumberOfLiteralWords = 0;
this.brlw.RunningLength = 0;
return false;
}
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
return true;
}
private boolean reload() {
if(!this.masteriterator.hasNext()) {
return false;
}
this.iterator = this.masteriterator.next();
this.buffer = this.iterator.buffer();
return true;
}
/**
* Get the nth literal word for the current running length word
* @param index zero based index
* @return the literal word
*/
@Override
public long getLiteralWordAt(int index) {
return this.buffer[this.literalWordStartPosition + index];
}
/**
* Gets the number of literal words for the current running length word.
*
* @return the number of literal words
*/
@Override
public int getNumberOfLiteralWords() {
return this.brlw.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
@Override
public boolean getRunningBit() {
return this.brlw.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
@Override
public long getRunningLength() {
return this.brlw.RunningLength;
}
/**
* Size in uncompressed words of the current running length word.
*
* @return the size
*/
@Override
public long size() {
return this.brlw.size();
}
@Override
public BufferedIterator clone() throws CloneNotSupportedException {
BufferedIterator answer = (BufferedIterator) super.clone();
answer.brlw = this.brlw.clone();
answer.buffer = this.buffer;
answer.iterator = this.iterator.clone();
answer.literalWordStartPosition = this.literalWordStartPosition;
answer.masteriterator = this.masteriterator.clone();
return answer;
}
private BufferedRunningLengthWord brlw;
private long[] buffer;
private int literalWordStartPosition;
private EWAHIterator iterator;
private CloneableIterator<EWAHIterator> masteriterator;
}

175
fine-jgit/src/com/fr/third/googlecode/javaewah/BufferedRunningLengthWord.java

@ -0,0 +1,175 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use. Similar to RunningLengthWord, but can
* be modified without access to the array, and has faster access.
*
* @author Daniel Lemire
* @since 0.1.0
*
*/
public final class BufferedRunningLengthWord implements Cloneable {
/**
* Instantiates a new buffered running length word.
*
* @param a the word
*/
public BufferedRunningLengthWord(final long a) {
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits));
this.RunningBit = (a & 1) != 0;
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount);
}
/**
* Instantiates a new buffered running length word.
*
* @param rlw the rlw
*/
public BufferedRunningLengthWord(final RunningLengthWord rlw) {
this(rlw.parent.buffer[rlw.position]);
}
/**
* Discard first words.
*
* @param x the x
*/
public void discardFirstWords(long x) {
if (this.RunningLength >= x) {
this.RunningLength -= x;
return;
}
x -= this.RunningLength;
this.RunningLength = 0;
this.literalwordoffset += x;
this.NumberOfLiteralWords -= x;
}
/**
* Gets the number of literal words.
*
* @return the number of literal words
*/
public int getNumberOfLiteralWords() {
return this.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
public boolean getRunningBit() {
return this.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
public long getRunningLength() {
return this.RunningLength;
}
/**
* Reset the values using the provided word.
*
* @param a the word
*/
public void reset(final long a) {
this.NumberOfLiteralWords = (int) (a >>> (1 + RunningLengthWord.runninglengthbits));
this.RunningBit = (a & 1) != 0;
this.RunningLength = (int) ((a >>> 1) & RunningLengthWord.largestrunninglengthcount);
this.literalwordoffset = 0;
}
/**
* Reset the values of this running length word so that it has the same values
* as the other running length word.
*
* @param rlw the other running length word
*/
public void reset(final RunningLengthWord rlw) {
reset(rlw.parent.buffer[rlw.position]);
}
/**
* Sets the number of literal words.
*
* @param number the new number of literal words
*/
public void setNumberOfLiteralWords(final int number) {
this.NumberOfLiteralWords = number;
}
/**
* Sets the running bit.
*
* @param b the new running bit
*/
public void setRunningBit(final boolean b) {
this.RunningBit = b;
}
/**
* Sets the running length.
*
* @param number the new running length
*/
public void setRunningLength(final long number) {
this.RunningLength = number;
}
/**
* Size in uncompressed words.
*
* @return the long
*/
public long size() {
return this.RunningLength + this.NumberOfLiteralWords;
}
/*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "running bit = " + getRunningBit() + " running length = "
+ getRunningLength() + " number of lit. words "
+ getNumberOfLiteralWords();
}
@Override
public BufferedRunningLengthWord clone() throws CloneNotSupportedException {
BufferedRunningLengthWord answer = (BufferedRunningLengthWord) super.clone();
answer.literalwordoffset = this.literalwordoffset;
answer.NumberOfLiteralWords = this.NumberOfLiteralWords;
answer.RunningBit = this.RunningBit;
answer.RunningLength = this.RunningLength;
return answer;
}
/** how many literal words have we read so far? */
public int literalwordoffset = 0;
/** The Number of literal words. */
public int NumberOfLiteralWords;
/** The Running bit. */
public boolean RunningBit;
/** The Running length. */
public long RunningLength;
}

24
fine-jgit/src/com/fr/third/googlecode/javaewah/CloneableIterator.java

@ -0,0 +1,24 @@
package com.fr.third.googlecode.javaewah;
/**
* Like a standard Java iterator, except that you can clone it.
*
* @param <E> the data type of the iterator
*/
public interface CloneableIterator<E> extends Cloneable {
/**
* @return whether there is more
*/
public boolean hasNext();
/**
* @return the next element
*/
public E next();
/**
* @return a copy
* @throws CloneNotSupportedException this should never happen in practice
*/
public CloneableIterator<E> clone() throws CloneNotSupportedException;
}

1631
fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHCompressedBitmap.java

File diff suppressed because it is too large Load Diff

98
fine-jgit/src/com/fr/third/googlecode/javaewah/EWAHIterator.java

@ -0,0 +1,98 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* The class EWAHIterator represents a special type of
* efficient iterator iterating over (uncompressed) words of bits.
* It is not meant for end users.
* @author Daniel Lemire
* @since 0.1.0
*
*/
public final class EWAHIterator implements Cloneable {
/**
* Instantiates a new EWAH iterator.
*
* @param a the array of words
* @param sizeinwords the number of words that are significant in the array of words
*/
public EWAHIterator(final EWAHCompressedBitmap a, final int sizeinwords) {
this.rlw = new RunningLengthWord(a, 0);
this.size = sizeinwords;
this.pointer = 0;
}
/**
* Allow expert developers to instantiate an EWAHIterator.
*
* @param bitmap we want to iterate over
* @return an iterator
*/
public static EWAHIterator getEWAHIterator(EWAHCompressedBitmap bitmap) {
return bitmap.getEWAHIterator();
}
/**
* Access to the array of words
*
* @return the long[]
*/
public long[] buffer() {
return this.rlw.parent.buffer;
}
/**
* Position of the literal words represented by this running length word.
*
* @return the int
*/
public int literalWords() {
return this.pointer - this.rlw.getNumberOfLiteralWords();
}
/**
* Checks for next.
*
* @return true, if successful
*/
public boolean hasNext() {
return this.pointer < this.size;
}
/**
* Next running length word.
*
* @return the running length word
*/
public RunningLengthWord next() {
this.rlw.position = this.pointer;
this.pointer += this.rlw.getNumberOfLiteralWords() + 1;
return this.rlw;
}
@Override
public EWAHIterator clone() throws CloneNotSupportedException {
EWAHIterator ans = (EWAHIterator) super.clone();
ans.rlw = this.rlw.clone();
ans.size = this.size;
ans.pointer = this.pointer;
return ans;
}
/** The pointer represent the location of the current running length
* word in the array of words (embedded in the rlw attribute). */
int pointer;
/** The current running length word. */
RunningLengthWord rlw;
/** The size in words. */
int size;
}

436
fine-jgit/src/com/fr/third/googlecode/javaewah/FastAggregation.java

@ -0,0 +1,436 @@
package com.fr.third.googlecode.javaewah;
import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as
* reference. They may not be faster than the corresponding methods in the
* EWAHCompressedBitmap class.
*
* @author Daniel Lemire
*
*/
public class FastAggregation {
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @return the or aggregate.
*/
public static EWAHCompressedBitmap bufferedand(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedandWithContainer(answer,bufsize, bitmaps);
return answer;
}
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @param bitmaps the source bitmaps
*/
public static void bufferedandWithContainer(final BitmapStorage container,final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
java.util.LinkedList<IteratingBufferedRunningLengthWord> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord>();
for (EWAHCompressedBitmap bitmap : bitmaps) {
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize*bitmaps.length];
for(IteratingRLW i : al)
if (i.size() == 0) {
al.clear();
break;
}
while (!al.isEmpty()) {
Arrays.fill(hardbitmap, ~0l);
long effective = Integer.MAX_VALUE;
for(IteratingRLW i : al) {
int eff = IteratorAggregation.inplaceand(hardbitmap, i);
if (eff < effective)
effective = eff;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
for(IteratingRLW i : al)
if (i.size() == 0) {
al.clear();
break;
}
}
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the or aggregate.
*/
public static EWAHCompressedBitmap bufferedor(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedorWithContainer(answer, bufsize, bitmaps);
return answer;
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedorWithContainer(final BitmapStorage container, final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
int range = 0;
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>();
for (EWAHCompressedBitmap bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize];
int maxr = al.size();
while (maxr > 0) {
long effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation.inplaceor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the xor aggregate.
*/
public static EWAHCompressedBitmap bufferedxor(final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
EWAHCompressedBitmap answer = new EWAHCompressedBitmap();
bufferedxorWithContainer(answer, bufsize,bitmaps);
return answer;
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedxorWithContainer(final BitmapStorage container, final int bufsize,
final EWAHCompressedBitmap... bitmaps) {
int range = 0;
EWAHCompressedBitmap[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap>() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList<IteratingBufferedRunningLengthWord> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord>();
for (EWAHCompressedBitmap bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord(bitmap));
}
long[] hardbitmap = new long[bufsize];
int maxr = al.size();
while (maxr > 0) {
long effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation.inplacexor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Uses a priority queue to compute the or aggregate.
* @param <T> a class extending LogicalElement (like a compressed bitmap)
* @param bitmaps
* bitmaps to be aggregated
* @return the or aggregate
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static <T extends LogicalElement> T or(T... bitmaps) {
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length,
new Comparator<T>() {
@Override
public int compare(T a, T b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (T x : bitmaps) {
pq.add(x);
}
while (pq.size() > 1) {
T x1 = pq.poll();
T x2 = pq.poll();
pq.add((T) x1.or(x2));
}
return pq.poll();
}
/**
* Uses a priority queue to compute the or aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void orToContainer(final BitmapStorage container,
final EWAHCompressedBitmap ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length,
new Comparator<EWAHCompressedBitmap>() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap x1 = pq.poll();
EWAHCompressedBitmap x2 = pq.poll();
pq.add(x1.or(x2));
}
pq.poll().orToContainer(pq.poll(), container);
}
/**
* Uses a priority queue to compute the xor aggregate.
*
* @param <T> a class extending LogicalElement (like a compressed bitmap)
* @param bitmaps
* bitmaps to be aggregated
* @return the xor aggregate
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public static <T extends LogicalElement> T xor(T... bitmaps) {
PriorityQueue<T> pq = new PriorityQueue<T>(bitmaps.length,
new Comparator<T>() {
@Override
public int compare(T a, T b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (T x : bitmaps)
pq.add(x);
while (pq.size() > 1) {
T x1 = pq.poll();
T x2 = pq.poll();
pq.add((T) x1.xor(x2));
}
return pq.poll();
}
/**
* Uses a priority queue to compute the xor aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void xorToContainer(final BitmapStorage container,
final EWAHCompressedBitmap ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue<EWAHCompressedBitmap> pq = new PriorityQueue<EWAHCompressedBitmap>(bitmaps.length,
new Comparator<EWAHCompressedBitmap>() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap x1 = pq.poll();
EWAHCompressedBitmap x2 = pq.poll();
pq.add(x1.xor(x2));
}
pq.poll().xorToContainer(pq.poll(), container);
}
/**
* For internal use. Computes the bitwise or of the provided bitmaps and
* stores the result in the container. (This used to be the default.)
*
* @deprecated use EWAHCompressedBitmap.or instead
* @since 0.4.0
* @param container where store the result
* @param bitmaps to be aggregated
*/
@Deprecated
public static void legacy_orWithContainer(final BitmapStorage container,
final EWAHCompressedBitmap... bitmaps) {
if (bitmaps.length == 2) {
// should be more efficient
bitmaps[0].orToContainer(bitmaps[1], container);
return;
}
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
// sorted bitmaps from right to left.
final EWAHCompressedBitmap[] sortedBitmaps = bitmaps.clone();
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap>() {
@Override
public int compare(EWAHCompressedBitmap a, EWAHCompressedBitmap b) {
return a.sizeinbits < b.sizeinbits ? 1
: a.sizeinbits == b.sizeinbits ? 0 : -1;
}
});
final IteratingBufferedRunningLengthWord[] rlws = new IteratingBufferedRunningLengthWord[bitmaps.length];
int maxAvailablePos = 0;
for (EWAHCompressedBitmap bitmap : sortedBitmaps) {
EWAHIterator iterator = bitmap.getEWAHIterator();
if (iterator.hasNext()) {
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord(
iterator);
}
}
if (maxAvailablePos == 0) { // this never happens...
container.setSizeInBits(0);
return;
}
int maxSize = sortedBitmaps[0].sizeinbits;
while (true) {
long maxOneRl = 0;
long minZeroRl = Long.MAX_VALUE;
long minSize = Long.MAX_VALUE;
int numEmptyRl = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
long size = rlw.size();
if (size == 0) {
maxAvailablePos = i;
break;
}
minSize = Math.min(minSize, size);
if (rlw.getRunningBit()) {
long rl = rlw.getRunningLength();
maxOneRl = Math.max(maxOneRl, rl);
minZeroRl = 0;
if (rl == 0 && size > 0) {
numEmptyRl++;
}
} else {
long rl = rlw.getRunningLength();
minZeroRl = Math.min(minZeroRl, rl);
if (rl == 0 && size > 0) {
numEmptyRl++;
}
}
}
if (maxAvailablePos == 0) {
break;
} else if (maxAvailablePos == 1) {
// only one bitmap is left so just write the rest of it out
rlws[0].discharge(container);
break;
}
if (maxOneRl > 0) {
container.addStreamOfEmptyWords(true, maxOneRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(maxOneRl);
}
} else if (minZeroRl > 0) {
container.addStreamOfEmptyWords(false, minZeroRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(minZeroRl);
}
} else {
int index = 0;
if (numEmptyRl == 1) {
// if one rlw has literal words to process and the rest have a run of
// 0's we can write them out here
IteratingBufferedRunningLengthWord emptyRl = null;
long minNonEmptyRl = Long.MAX_VALUE;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
long rl = rlw.getRunningLength();
if (rl == 0) {
assert emptyRl == null;
emptyRl = rlw;
} else {
minNonEmptyRl = Math.min(minNonEmptyRl, rl);
}
}
long wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl;
if (emptyRl != null)
emptyRl.writeLiteralWords((int) wordsToWrite, container);
index += wordsToWrite;
}
while (index < minSize) {
long word = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
if (rlw.getRunningLength() <= index) {
word |= rlw.getLiteralWordAt(index - (int) rlw.getRunningLength());
}
}
container.add(word);
index++;
}
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord rlw = rlws[i];
rlw.discardFirstWords(minSize);
}
}
}
container.setSizeInBits(maxSize);
}
}

31
fine-jgit/src/com/fr/third/googlecode/javaewah/IntIterator.java

@ -0,0 +1,31 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
*
* The IntIterator interface is used to iterate over a stream of integers.
*
* @author Daniel Lemire
* @since 0.2.0
*
*/
public interface IntIterator {
/**
* Is there more?
*
* @return true, if there is more, false otherwise
*/
public boolean hasNext();
/**
* Return the next integer
*
* @return the integer
*/
public int next();
}

87
fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorImpl.java

@ -0,0 +1,87 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2012, Google Inc.
* Licensed under the Apache License, Version 2.0.
*/
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits;
/**
* The IntIteratorImpl is the 64 bit implementation of the
* IntIterator interface, which efficiently returns the stream of integers
* represented by an EWAHIterator.
*
* @author Colby Ranger
* @since 0.5.6
*/
final class IntIteratorImpl implements IntIterator {
private final EWAHIterator ewahIter;
private final long[] ewahBuffer;
private int position;
private int runningLength;
private long word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
IntIteratorImpl(EWAHIterator ewahIter) {
this.ewahIter = ewahIter;
this.ewahBuffer = ewahIter.buffer();
this.hasnext = this.moveToNext();
}
public final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (!this.ewahIter.hasNext()) {
return false;
}
setRunningLengthWord(this.ewahIter.next());
}
return true;
}
@Override
public boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setRunningLengthWord(RunningLengthWord rlw) {
this.runningLength = wordinbits * (int) rlw.getRunningLength() + this.position;
if (!rlw.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = this.ewahIter.literalWords();
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.ewahBuffer[this.wordPosition++];
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}

89
fine-jgit/src/com/fr/third/googlecode/javaewah/IntIteratorOverIteratingRLW.java

@ -0,0 +1,89 @@
package com.fr.third.googlecode.javaewah;
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Implementation of an IntIterator over an IteratingRLW.
*
*
*/
public class IntIteratorOverIteratingRLW implements IntIterator {
IteratingRLW parent;
private int position;
private int runningLength;
private long word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
/**
* @param p iterator we wish to iterate over
*/
public IntIteratorOverIteratingRLW(final IteratingRLW p) {
this.parent = p;
this.position = 0;
setupForCurrentRunningLengthWord();
this.hasnext = moveToNext();
}
/**
* @return whether we could find another set bit; don't move if there is an unprocessed value
*/
private final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (this.parent.next())
setupForCurrentRunningLengthWord();
else return false;
}
return true;
}
@Override
public boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setupForCurrentRunningLengthWord() {
this.runningLength = wordinbits * (int) this.parent.getRunningLength()
+ this.position;
if (!this.parent.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = 0;
this.wordLength = this.parent.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.parent.getLiteralWordAt(this.wordPosition++);
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}

276
fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingBufferedRunningLengthWord.java

@ -0,0 +1,276 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use. Similar to BufferedRunningLengthWord, but automatically
* advances to the next BufferedRunningLengthWord as words are discarded.
*
* @since 0.4.0
* @author David McIntosh
*/
public final class IteratingBufferedRunningLengthWord implements IteratingRLW, Cloneable{
/**
* Instantiates a new iterating buffered running length word.
*
* @param iterator iterator
*/
public IteratingBufferedRunningLengthWord(final EWAHIterator iterator) {
this.iterator = iterator;
this.brlw = new BufferedRunningLengthWord(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
/**
* Instantiates a new iterating buffered running length word.
* @param bitmap over which we want to iterate
*
*/
public IteratingBufferedRunningLengthWord(final EWAHCompressedBitmap bitmap) {
this.iterator = EWAHIterator.getEWAHIterator(bitmap);
this.brlw = new BufferedRunningLengthWord(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
/**
* Discard first words, iterating to the next running length word if needed.
*
* @param x the number of words to be discarded
*/
@Override
public void discardFirstWords(long x) {
while (x > 0) {
if (this.brlw.RunningLength > x) {
this.brlw.RunningLength -= x;
return;
}
x -= this.brlw.RunningLength;
this.brlw.RunningLength = 0;
long toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x;
this.literalWordStartPosition += toDiscard;
this.brlw.NumberOfLiteralWords -= toDiscard;
x -= toDiscard;
if ((x > 0) || (this.brlw.size() == 0)) {
if (!this.iterator.hasNext()) {
break;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
}
}
}
/**
* Move to the next RunningLengthWord
* @return whether the move was possible
*/
@Override
public boolean next() {
if (!this.iterator.hasNext()) {
this.brlw.NumberOfLiteralWords = 0;
this.brlw.RunningLength = 0;
return false;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
return true;
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public long discharge(BitmapStorage container, long max) {
long index = 0;
while ((index < max) && (size() > 0)) {
// first run
long pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = (int) (max - index);
}
writeLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Write out up to max words (negated), returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public long dischargeNegated(BitmapStorage container, long max) {
long index = 0;
while ((index < max) && (size() > 0)) {
// first run
long pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(!getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = (int) (max - index);
}
writeNegatedLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Write out the remain words, transforming them to zeroes.
* @param container target for writes
*/
public void dischargeAsEmpty(BitmapStorage container) {
while(size()>0) {
container.addStreamOfEmptyWords(false, size());
discardFirstWords(size());
}
}
/**
* Write out the remaining words
* @param container target for writes
*/
public void discharge(BitmapStorage container) {
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords();
discharge(this.brlw, this.iterator, container);
}
/**
* Get the nth literal word for the current running length word
* @param index zero based index
* @return the literal word
*/
@Override
public long getLiteralWordAt(int index) {
return this.buffer[this.literalWordStartPosition + index];
}
/**
* Gets the number of literal words for the current running length word.
*
* @return the number of literal words
*/
@Override
public int getNumberOfLiteralWords() {
return this.brlw.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
@Override
public boolean getRunningBit() {
return this.brlw.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
@Override
public long getRunningLength() {
return this.brlw.RunningLength;
}
/**
* Size in uncompressed words of the current running length word.
*
* @return the long
*/
@Override
public long size() {
return this.brlw.size();
}
/**
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write
*/
public void writeLiteralWords(int numWords, BitmapStorage container) {
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write
*/
public void writeNegatedLiteralWords(int numWords, BitmapStorage container) {
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* For internal use. (One could use the non-static discharge method instead,
* but we expect them to be slower.)
*
* @param initialWord
* the initial word
* @param iterator
* the iterator
* @param container
* the container
*/
private static void discharge(final BufferedRunningLengthWord initialWord,
final EWAHIterator iterator, final BitmapStorage container) {
BufferedRunningLengthWord runningLengthWord = initialWord;
for (;;) {
final long runningLength = runningLengthWord.getRunningLength();
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(),
runningLength);
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords()
+ runningLengthWord.literalwordoffset,
runningLengthWord.getNumberOfLiteralWords());
if (!iterator.hasNext())
break;
runningLengthWord = new BufferedRunningLengthWord(iterator.next());
}
}
@Override
public IteratingBufferedRunningLengthWord clone() throws CloneNotSupportedException {
IteratingBufferedRunningLengthWord answer = (IteratingBufferedRunningLengthWord) super.clone();
answer.brlw = this.brlw.clone();
answer.buffer = this.buffer;
answer.iterator = this.iterator.clone();
answer.literalWordStartPosition = this.literalWordStartPosition;
return answer;
}
private BufferedRunningLengthWord brlw;
private long[] buffer;
private int literalWordStartPosition;
private EWAHIterator iterator;
}

49
fine-jgit/src/com/fr/third/googlecode/javaewah/IteratingRLW.java

@ -0,0 +1,49 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* High-level iterator over a compressed bitmap.
*
*/
public interface IteratingRLW {
/**
* @return whether there is more
*/
public boolean next() ;
/**
* @param index where the literal word is
* @return the literal word at the given index.
*/
public long getLiteralWordAt(int index);
/**
* @return the number of literal (non-fill) words
*/
public int getNumberOfLiteralWords() ;
/**
* @return the bit used for the fill bits
*/
public boolean getRunningBit() ;
/**
* @return sum of getRunningLength() and getNumberOfLiteralWords()
*/
public long size() ;
/**
* @return length of the run of fill words
*/
public long getRunningLength() ;
/**
* @param x the number of words to discard
*/
public void discardFirstWords(long x);
/**
* @return a copy of the iterator
* @throws CloneNotSupportedException this should not be thrown in theory
*/
public IteratingRLW clone() throws CloneNotSupportedException;
}

616
fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorAggregation.java

@ -0,0 +1,616 @@
package com.fr.third.googlecode.javaewah;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Set of helper functions to aggregate bitmaps.
*
*/
public class IteratorAggregation {
/**
* @param x iterator to negate
* @return negated version of the iterator
*/
public static IteratingRLW not(final IteratingRLW x) {
return new IteratingRLW() {
@Override
public boolean next() {
return x.next();
}
@Override
public long getLiteralWordAt(int index) {
return ~x.getLiteralWordAt(index);
}
@Override
public int getNumberOfLiteralWords() {
return x.getNumberOfLiteralWords();
}
@Override
public boolean getRunningBit() {
return ! x.getRunningBit();
}
@Override
public long size() {
return x.size();
}
@Override
public long getRunningLength() {
return x.getRunningLength();
}
@Override
public void discardFirstWords(long y) {
x.discardFirstWords(y);
}
@Override
public IteratingRLW clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
};
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return and aggregate
*/
public static IteratingRLW bufferedand(final IteratingRLW... al) {
return bufferedand(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words (per input iterator)
* @return and aggregate
*/
public static IteratingRLW bufferedand(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedAndIterator(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return or aggregate
*/
public static IteratingRLW bufferedor(final IteratingRLW... al) {
return bufferedor(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return or aggregate
*/
public static IteratingRLW bufferedor(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedORIterator(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al set of iterators to aggregate
* @return xor aggregate
*/
public static IteratingRLW bufferedxor(final IteratingRLW... al) {
return bufferedxor(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return xor aggregate
*/
public static IteratingRLW bufferedxor(final int bufsize, final IteratingRLW... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW> basell = new LinkedList<IteratingRLW>();
for (IteratingRLW i : al)
basell.add(i);
return new BufferedIterator(new BufferedXORIterator(basell, bufsize));
}
/**
* Write out the content of the iterator, but as if it were all zeros.
*
* @param container
* where we write
* @param i
* the iterator
*/
protected static void dischargeAsEmpty(final BitmapStorage container,
final IteratingRLW i) {
while (i.size() > 0) {
container.addStreamOfEmptyWords(false, i.size());
i.next();
}
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) {
long counter = 0;
while (i.size() > 0 && counter < max) {
long L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(i.getRunningBit(), L1);
counter += L1;
}
long L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
/**
* Write out up to max negated words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) {
long counter = 0;
while (i.size() > 0 && counter < max) {
long L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(!i.getRunningBit(), L1);
counter += L1;
}
long L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(~i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
static void andToContainer(final BitmapStorage container,
int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
static void andToContainer(final BitmapStorage container,
final IteratingRLW rlwi, IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
/**
* Compute the first few words of the XOR aggregate between two iterators.
*
* @param container where to write
* @param desiredrlwcount number of words to be written (max)
* @param rlwi first iterator to aggregate
* @param rlwj second iterator to aggregate
*/
public static void xorToContainer(final BitmapStorage container,
int desiredrlwcount, final IteratingRLW rlwi, final IteratingRLW rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
long index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
} else {
long index = dischargeNegated(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(true, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
protected static int inplaceor(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (i.getRunningBit())
Arrays.fill(bitmap, pos, pos + L, ~0l);
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
Arrays.fill(bitmap, pos, bitmap.length, ~0l);
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
Arrays.fill(bitmap, pos, pos + L, ~0l);
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplacexor(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = ~bitmap[k];
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplaceand(long[] bitmap,
IteratingRLW i) {
int pos = 0;
long s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = (int) i.getRunningLength();
if (!i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = (int) i.getRunningLength();
if (pos + L > bitmap.length) {
if (!i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = 0;
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (!i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
/**
* An optimization option. Larger values may improve speed, but at
* the expense of memory.
*/
public final static int DEFAULTMAXBUFSIZE = 65536;
}
class BufferedORIterator implements CloneableIterator<EWAHIterator> {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
long[] hardbitmap;
LinkedList<IteratingRLW> ll;
int buffersize;
BufferedORIterator(LinkedList<IteratingRLW> basell, int bufsize) {
this.ll = basell;
this.hardbitmap = new long[bufsize];
}
@Override
public BufferedXORIterator clone() throws CloneNotSupportedException {
BufferedXORIterator answer = (BufferedXORIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator next() {
this.buffer.clear();
long effective = 0;
Iterator<IteratingRLW> i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation.inplaceor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k) {
this.buffer.add(this.hardbitmap[k]);
}
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class BufferedXORIterator implements CloneableIterator<EWAHIterator> {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
long[] hardbitmap;
LinkedList<IteratingRLW> ll;
int buffersize;
BufferedXORIterator(LinkedList<IteratingRLW> basell, int bufsize) {
this.ll = basell;
this.hardbitmap = new long[bufsize];
}
@Override
public BufferedXORIterator clone() throws CloneNotSupportedException {
BufferedXORIterator answer = (BufferedXORIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator next() {
this.buffer.clear();
long effective = 0;
Iterator<IteratingRLW> i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.add(this.hardbitmap[k]);
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class BufferedAndIterator implements CloneableIterator<EWAHIterator> {
EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
LinkedList<IteratingRLW> ll;
int buffersize;
public BufferedAndIterator(LinkedList<IteratingRLW> basell, int bufsize) {
this.ll = basell;
this.buffersize = bufsize;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public BufferedAndIterator clone() throws CloneNotSupportedException {
BufferedAndIterator answer = (BufferedAndIterator) super.clone();
answer.buffer = this.buffer.clone();
answer.ll = (LinkedList<IteratingRLW>) this.ll.clone();
return answer;
}
@Override
public EWAHIterator next() {
this.buffer.clear();
IteratorAggregation.andToContainer(this.buffer, this.buffersize * this.ll.size(),
this.ll.get(0), this.ll.get(1));
if (this.ll.size() > 2) {
Iterator<IteratingRLW> i = this.ll.iterator();
i.next();
i.next();
EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap();
while (i.hasNext() && this.buffer.sizeInBytes() > 0) {
IteratorAggregation.andToContainer(tmpbuffer,
this.buffer.getIteratingRLW(), i.next());
this.buffer.swap(tmpbuffer);
tmpbuffer.clear();
}
}
Iterator<IteratingRLW> i = this.ll.iterator();
while(i.hasNext()) {
if(i.next().size() == 0) {
this.ll.clear();
break;
}
}
return this.buffer.getEWAHIterator();
}
}

132
fine-jgit/src/com/fr/third/googlecode/javaewah/IteratorUtil.java

@ -0,0 +1,132 @@
package com.fr.third.googlecode.javaewah;
import java.util.Iterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Convenience functions for working over iterators
*
*/
public class IteratorUtil {
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static IntIterator toSetBitsIntIterator(final IteratingRLW i) {
return new IntIteratorOverIteratingRLW(i);
}
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW i) {
return new Iterator<Integer>() {
@Override
public boolean hasNext() {
return this.under.hasNext();
}
@Override
public Integer next() {
return new Integer(this.under.next());
}
@Override
public void remove() {
}
final private IntIterator under = toSetBitsIntIterator(i);
};
}
/**
* Generate a bitmap from an iterator
*
* @param i iterator we wish to materialize
* @param c where we write
*/
public static void materialize(final IteratingRLW i, final BitmapStorage c) {
while (true) {
if (i.getRunningLength() > 0) {
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength());
}
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
c.add(i.getLiteralWordAt(k));
if (!i.next())
break;
}
}
/**
* @param i iterator we wish to iterate over
* @return the cardinality (number of set bits) corresponding to the iterator
*/
public static int cardinality(final IteratingRLW i) {
int answer = 0;
while (true) {
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap.wordinbits;
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
answer += Long.bitCount(i.getLiteralWordAt(k));
if(!i.next()) break;
}
return answer;
}
/**
* @param x set of bitmaps
* @return an array of iterators corresponding to the array of bitmaps
*/
public static IteratingRLW[] toIterators(final EWAHCompressedBitmap... x) {
IteratingRLW[] X = new IteratingRLW[x.length];
for (int k = 0; k < X.length; ++k) {
X[k] = new IteratingBufferedRunningLengthWord(x[k]);
}
return X;
}
/**
* Turn an iterator into a bitmap.
*
* @param i iterator we wish to materialize
* @param c where we write
* @param Max maximum number of words we wish to materialize
* @return how many words were actually materialized
*/
public static long materialize(final IteratingRLW i, final BitmapStorage c, long Max) {
final long origMax = Max;
while (true) {
if (i.getRunningLength() > 0) {
long L = i.getRunningLength();
if(L > Max) L = Max;
c.addStreamOfEmptyWords(i.getRunningBit(), L);
Max -= L;
}
long L = i.getNumberOfLiteralWords();
for (int k = 0; k < L; ++k)
c.add(i.getLiteralWordAt(k));
if(Max>0) {
if (!i.next())
break;
}
else break;
}
return origMax - Max;
}
/**
* Turn an iterator into a bitmap
*
* @param i iterator we wish to materialize
* @return materialized version of the iterator
*/
public static EWAHCompressedBitmap materialize(final IteratingRLW i) {
EWAHCompressedBitmap ewah = new EWAHCompressedBitmap();
materialize(i, ewah);
return ewah;
}
}

61
fine-jgit/src/com/fr/third/googlecode/javaewah/LogicalElement.java

@ -0,0 +1,61 @@
package com.fr.third.googlecode.javaewah;
/**
* A prototypical model for bitmaps. Used by the
* class FastAggregation. Users should probably not
* be concerned by this class.
*
* @author Daniel Lemire
* @param <T> the type of element (e.g., a bitmap class)
*
*/
public interface LogicalElement<T> {
/**
* Compute the bitwise logical and
* @param le element
* @return the result of the operation
*/
public T and(T le);
/**
* Compute the bitwise logical and not
* @param le element
* @return the result of the operation
*/
public T andNot(T le);
/**
* Compute the bitwise logical not (in place)
*/
public void not();
@SuppressWarnings({ "rawtypes", "javadoc" })
/**
* Compute the bitwise logical or
* @param le another element
* @return the result of the operation
*/
public LogicalElement or(T le);
/**
* How many logical bits does this element represent?
*
* @return the number of bits represented by this element
*/
public int sizeInBits();
/**
* Should report the storage requirement
* @return How many bytes
* @since 0.6.2
*/
public int sizeInBytes();
/**
* Compute the bitwise logical Xor
* @param le element
* @return the results of the operation
*/
public T xor(T le);
}

92
fine-jgit/src/com/fr/third/googlecode/javaewah/NonEmptyVirtualStorage.java

@ -0,0 +1,92 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This is a BitmapStorage that can be used to determine quickly if the result
* of an operation is non-trivial... that is, whether there will be at least on
* set bit.
*
* @since 0.4.2
* @author Daniel Lemire and Veronika Zenz
*
*/
public class NonEmptyVirtualStorage implements BitmapStorage {
static class NonEmptyException extends RuntimeException {
private static final long serialVersionUID = 1L;
/**
* Do not fill in the stack trace for this exception
* for performance reasons.
*
* @return this instance
* @see Throwable#fillInStackTrace()
*/
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}
private static final NonEmptyException nonEmptyException = new NonEmptyException();
/**
* If the word to be added is non-zero, a NonEmptyException exception is
* thrown.
*
* @see com.googlecode.javaewah.BitmapStorage#add(long)
*/
@Override
public void add(long newdata) {
if (newdata != 0)
throw nonEmptyException;
return;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfLiteralWords(long[] data, int start, int number) {
if(number>0){
throw nonEmptyException;
}
}
/**
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception,
* otherwise, nothing happens.
*
* @see com.googlecode.javaewah.BitmapStorage#addStreamOfEmptyWords(boolean, long)
*/
@Override
public void addStreamOfEmptyWords(boolean v, long number) {
if (v && (number>0))
throw nonEmptyException;
return;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfNegatedLiteralWords(long[] data, int start, int number) {
if(number>0){
throw nonEmptyException;
}
}
/**
* Does nothing.
*
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int)
*/
@Override
public void setSizeInBits(int bits) {
}
}

152
fine-jgit/src/com/fr/third/googlecode/javaewah/RunningLengthWord.java

@ -0,0 +1,152 @@
package com.fr.third.googlecode.javaewah;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use.
*
* @since 0.1.0
* @author Daniel Lemire
*/
public final class RunningLengthWord implements Cloneable {
/**
* Instantiates a new running length word.
*
* @param a
* an array of 64-bit words
* @param p
* position in the array where the running length word is
* located.
*/
RunningLengthWord(final EWAHCompressedBitmap a, final int p) {
this.parent = a;
this.position = p;
}
/**
* Gets the number of literal words.
*
* @return the number of literal words
*/
public int getNumberOfLiteralWords() {
return (int) (this.parent.buffer[this.position] >>> (1 + runninglengthbits));
}
/**
* Gets the running bit.
*
* @return the running bit
*/
public boolean getRunningBit() {
return (this.parent.buffer[this.position] & 1) != 0;
}
/**
* Gets the running length.
*
* @return the running length
*/
public long getRunningLength() {
return (this.parent.buffer[this.position] >>> 1)
& largestrunninglengthcount;
}
/**
* Sets the number of literal words.
*
* @param number
* the new number of literal words
*/
public void setNumberOfLiteralWords(final long number) {
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit;
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1))
| runninglengthplusrunningbit;
}
/**
* Sets the running bit.
*
* @param b
* the new running bit
*/
public void setRunningBit(final boolean b) {
if (b)
this.parent.buffer[this.position] |= 1l;
else
this.parent.buffer[this.position] &= ~1l;
}
/**
* Sets the running length.
*
* @param number
* the new running length
*/
public void setRunningLength(final long number) {
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount;
this.parent.buffer[this.position] &= (number << 1)
| notshiftedlargestrunninglengthcount;
}
/**
* Return the size in uncompressed words represented by this running
* length word.
*
* @return the size
*/
public long size() {
return getRunningLength() + getNumberOfLiteralWords();
}
/*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "running bit = " + getRunningBit()
+ " running length = " + getRunningLength()
+ " number of lit. words " + getNumberOfLiteralWords();
}
@Override
public RunningLengthWord clone() throws CloneNotSupportedException {
RunningLengthWord answer;
answer = (RunningLengthWord) super.clone();
answer.parent = this.parent;
answer.position = this.position;
return answer;
}
/** The array of words. */
public EWAHCompressedBitmap parent;
/** The position in array. */
public int position;
/**
* number of bits dedicated to marking of the running length of clean
* words
*/
public static final int runninglengthbits = 32;
private static final int literalbits = 64 - 1 - runninglengthbits;
/** largest number of literal words in a run. */
public static final int largestliteralcount = (1 << literalbits) - 1;
/** largest number of clean words in a run */
public static final long largestrunninglengthcount = (1l << runninglengthbits) - 1;
private static final long runninglengthplusrunningbit = (1l << (runninglengthbits + 1)) - 1;
private static final long shiftedlargestrunninglengthcount = largestrunninglengthcount << 1;
private static final long notrunninglengthplusrunningbit = ~runninglengthplusrunningbit;
private static final long notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount;
}

284
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark.java

@ -0,0 +1,284 @@
package com.fr.third.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.List;
import com.fr.third.googlecode.javaewah.EWAHCompressedBitmap;
import com.fr.third.googlecode.javaewah.FastAggregation;
import com.fr.third.googlecode.javaewah.IntIterator;
import com.fr.third.googlecode.javaewah.IteratingRLW;
import com.fr.third.googlecode.javaewah.IteratorAggregation;
import com.fr.third.googlecode.javaewah.IteratorUtil;
/**
* This class is used to benchmark the performance EWAH.
*
* @author Daniel Lemire
*/
public class Benchmark {
/**
* Compute the union between two sorted arrays
* @param set1 first sorted array
* @param set2 second sorted array
* @return merged array
*/
static public int[] unite2by2(final int[] set1, final int[] set2) {
int pos = 0;
int k1 = 0, k2 = 0;
if (0 == set1.length)
return Arrays.copyOf(set2, set2.length);
if (0 == set2.length)
return Arrays.copyOf(set1, set1.length);
int[] buffer = new int[set1.length + set2.length];
while (true) {
if (set1[k1] < set2[k2]) {
buffer[pos++] = set1[k1];
++k1;
if (k1 >= set1.length) {
for (; k2 < set2.length; ++k2)
buffer[pos++] = set2[k2];
break;
}
} else if (set1[k1] == set2[k2]) {
buffer[pos++] = set1[k1];
++k1;
++k2;
if (k1 >= set1.length) {
for (; k2 < set2.length; ++k2)
buffer[pos++] = set2[k2];
break;
}
if (k2 >= set2.length) {
for (; k1 < set1.length; ++k1)
buffer[pos++] = set1[k1];
break;
}
} else {// if (set1[k1]>set2[k2]) {
buffer[pos++] = set2[k2];
++k2;
if (k2 >= set2.length) {
for (; k1 < set1.length; ++k1)
buffer[pos++] = set1[k1];
break;
}
}
}
return Arrays.copyOf(buffer, pos);
}
@SuppressWarnings("javadoc")
public static void main(String args[]) {
//test(2, 24, 1);
test(100, 16, 1);
}
@SuppressWarnings("javadoc")
public static void test(int N, int nbr, int repeat) {
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) {
long bogus = 0;
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
System.out.println("# generating random data...");
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
System.out.println("# generating random data... ok.");
// building
bef = System.currentTimeMillis();
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
int size = 0;
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
size += ewah[k].sizeInBytes();
}
}
aft = System.currentTimeMillis();
line += "\t" + size;
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = ewah[k].toArray();
bogus += array.length;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = new int[ewah[k].cardinality()];
int c = 0;
for (int x : ewah[k])
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
List<Integer> L = ewah[k].getPositions();
int[] array = new int[L.size()];
int c = 0;
for (int x : L)
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IntIterator iter = ewah[k].intIterator();
while (iter.hasNext()) {
bogus += iter.next();
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
// run sanity check
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
EWAHCompressedBitmap ewahorp = EWAHCompressedBitmap.or(Arrays.copyOf(ewah, k+1));
EWAHCompressedBitmap mewahor = IteratorUtil.materialize(ewahor);
if(!ewahorp.equals(mewahor)) throw new RuntimeException("bug");
}
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
bogus += IteratorUtil.materialize(ewahor).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahand = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahand = ewahand.and(ewah[j]);
}
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap
.and(ewahcp);
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp);
EWAHCompressedBitmap ewahandp = EWAHCompressedBitmap.and(Arrays.copyOf(ewah, k+1));
EWAHCompressedBitmap mewahand = IteratorUtil.materialize(ewahand);
if(!ewahandp.equals(mewahand)) throw new RuntimeException("bug");
}
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW ewahand = IteratorAggregation.bufferedand(ewahcp);
bogus += IteratorUtil.materialize(ewahand).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and");
System.out.println(line);
System.out.println("# bogus =" + bogus);
}
}
}

212
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/Benchmark32.java

@ -0,0 +1,212 @@
package com.fr.third.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.text.DecimalFormat;
import java.util.List;
import com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32;
import com.fr.third.googlecode.javaewah.FastAggregation;
import com.fr.third.googlecode.javaewah.IntIterator;
import com.fr.third.googlecode.javaewah32.IteratingRLW32;
import com.fr.third.googlecode.javaewah32.IteratorAggregation32;
import com.fr.third.googlecode.javaewah32.IteratorUtil32;
/**
* This class is used to benchmark the performance EWAH.
*
* @author Daniel Lemire
*/
public class Benchmark32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(100, 16, 1);
// test(2, 24, 1);
}
@SuppressWarnings("javadoc")
public static void test(int N, int nbr, int repeat) {
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity += 2) {
long bogus = 0;
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
System.out.println("# generating random data...");
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
System.out.println("# generating random data... ok.");
// building
bef = System.currentTimeMillis();
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
int size = 0;
for (int r = 0; r < repeat; ++r) {
size = 0;
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
size += ewah[k].sizeInBytes();
}
}
aft = System.currentTimeMillis();
line += "\t" + size;
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = ewah[k].toArray();
bogus += array.length;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
int[] array = new int[ewah[k].cardinality()];
int c = 0;
for (int x : ewah[k])
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
List<Integer> L = ewah[k].getPositions();
int[] array = new int[L.size()];
int c = 0;
for (int x : L)
array[c++] = x;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// uncompressing
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IntIterator iter = ewah[k].intIterator();
while (iter.hasNext()) {
bogus += iter.next();
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedor(ewahcp);
bogus += IteratorUtil32.materialize(ewahor).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
line += "\t\t\t";
// logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahand = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahand = ewahand.and(ewah[j]);
}
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32
.and(ewahcp);
bogus += ewahand.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical and
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j].getIteratingRLW();
}
IteratingRLW32 ewahand = IteratorAggregation32.bufferedand(ewahcp);
bogus += IteratorUtil32.materialize(ewahand).sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("time for building, toArray(), Java iterator, intIterator,\t\t\t logical or (2-by-2), logical or (grouped), FastAggregation.or, iterator-based or, \t\t\t (2-by-2) and, logical and (grouped), iterator-based and");
System.out.println(line);
System.out.println("# bogus =" + bogus);
}
}
}

130
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection.java

@ -0,0 +1,130 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical and (intersection) aggregate.
*/
public class BenchmarkIntersection {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc"})
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].and(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.and(ewah[k]);
EWAHCompressedBitmap ewahand = EWAHCompressedBitmap.and(ewah);
if (!answer.equals(ewahand))
throw new RuntimeException(
"bug EWAHCompressedBitmap.and");
EWAHCompressedBitmap ewahand2 = FastAggregation
.bufferedand(65536,ewah);
if (!ewahand.equals(ewahand2))
throw new RuntimeException(
"bug FastAggregation.bufferedand ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.and(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.and(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedand(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedand(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

130
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkIntersection32.java

@ -0,0 +1,130 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical and (intersection) aggregate.
*/
public class BenchmarkIntersection32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
int[] inter = cdg.generateClustered(1 << (nbr/2), Max);
for (int k = 0; k < N; ++k)
data[k] = Benchmark.unite2by2(cdg.generateClustered(1 << nbr, Max),inter);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap32 answer = ewah[0].and(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.and(ewah[k]);
EWAHCompressedBitmap32 ewahand = EWAHCompressedBitmap32.and(ewah);
if (!answer.equals(ewahand))
throw new RuntimeException(
"bug EWAHCompressedBitmap.and");
EWAHCompressedBitmap32 ewahand2 = FastAggregation32
.bufferedand(65536,ewah);
if (!ewahand.equals(ewahand2))
throw new RuntimeException(
"bug FastAggregation.bufferedand ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.and(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.and(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedand(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(
ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedand(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.and bufferedand iterator-bufferedand");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

164
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion.java

@ -0,0 +1,164 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical or (union) aggregate.
*/
public class BenchmarkUnion {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc", "deprecation" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].or(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.or(ewah[k]);
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap.or(ewah);
if (!answer.equals(ewahor))
throw new RuntimeException(
"bug EWAHCompressedBitmap.or");
EWAHCompressedBitmap ewahor3 = FastAggregation.or(ewah);
if (!ewahor.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.or");
EWAHCompressedBitmap ewahor2 = FastAggregation
.bufferedor(65536,ewah);
if (!ewahor.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedor ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = EWAHCompressedBitmap
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap x = new EWAHCompressedBitmap();
FastAggregation.legacy_orWithContainer(x, ewahcp);
bogus += x.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedor(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

165
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkUnion32.java

@ -0,0 +1,165 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah.FastAggregation;
import com.fr.third.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical or (union) aggregate.
*/
public class BenchmarkUnion32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
}
@SuppressWarnings({ "javadoc", "deprecation" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if(true){
EWAHCompressedBitmap32 answer = ewah[0].or(ewah[1]);
for(int k = 2; k < ewah.length; ++k)
answer = answer.or(ewah[k]);
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewah);
if(!answer.equals(ewahor)) throw new RuntimeException("bug EWAHCompressedBitmap.or");
EWAHCompressedBitmap32 ewahor3 = FastAggregation
.or(ewah);
if(!ewahor.equals(ewahor3)) throw new RuntimeException("bug FastAggregation.or");
EWAHCompressedBitmap32 ewahor2 = FastAggregation32
.bufferedor(65536,ewah);
if(!ewahor.equals(ewahor2)) throw new RuntimeException("bug FastAggregation.bufferedor ");
}
// logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.or(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = EWAHCompressedBitmap32
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation
.or(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 x = new EWAHCompressedBitmap32();
FastAggregation32.legacy_orWithContainer(x, ewahcp);
bogus += x.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical or
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32
.bufferedor(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 EWAHCompressedBitmap.or FastAggregation.or experimentalor bufferedor legacygroupedor iterator-bufferedor");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

134
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR.java

@ -0,0 +1,134 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical xor aggregate.
*/
public class BenchmarkXOR {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
//test(10, 18, 1);
test(2, 22, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap[] ewah = new EWAHCompressedBitmap[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap answer = ewah[0].xor(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.xor(ewah[k]);
EWAHCompressedBitmap ewahor3 = FastAggregation.xor(ewah);
if (!answer.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.xor");
EWAHCompressedBitmap ewahor2 = FastAggregation
.bufferedxor(65536,ewah);
if (!answer.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedxor ");
EWAHCompressedBitmap iwah = IteratorUtil.materialize(IteratorAggregation.bufferedxor(IteratorUtil.toIterators(ewah)));
if (!answer.equals(iwah))
throw new RuntimeException(
"bug xor it ");
}
// logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.xor(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.xor(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap[] ewahcp = new EWAHCompressedBitmap[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap ewahor = FastAggregation
.bufferedxor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW[] ewahcp = new IteratingRLW[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord(
ewah[j]);
}
IteratingRLW ewahor = IteratorAggregation.bufferedxor(ewahcp);
int wordcounter = IteratorUtil.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

137
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/BenchmarkXOR32.java

@ -0,0 +1,137 @@
package com.fr.third.googlecode.javaewah.benchmark;
import java.text.DecimalFormat;
import com.fr.third.googlecode.javaewah.FastAggregation;
import com.fr.third.googlecode.javaewah32.*;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* To benchmark the logical xor aggregate.
*/
public class BenchmarkXOR32 {
@SuppressWarnings("javadoc")
public static void main(String args[]) {
test(10, 18, 1);
//test(2, 22, 1);
}
@SuppressWarnings({ "javadoc" })
public static void test(int N, int nbr, int repeat) {
long bogus = 0;
DecimalFormat df = new DecimalFormat("0.###");
ClusteredDataGenerator cdg = new ClusteredDataGenerator();
for (int sparsity = 1; sparsity < 30 - nbr; sparsity++) {
for (int times = 0; times < 2; ++times) {
String line = "";
long bef, aft;
line += sparsity;
int[][] data = new int[N][];
int Max = (1 << (nbr + sparsity));
for (int k = 0; k < N; ++k)
data[k] = cdg.generateClustered(1 << nbr, Max);
// building
EWAHCompressedBitmap32[] ewah = new EWAHCompressedBitmap32[N];
for (int k = 0; k < N; ++k) {
ewah[k] = new EWAHCompressedBitmap32();
for (int x = 0; x < data[k].length; ++x) {
ewah[k].set(data[k][x]);
}
data[k] = null;
}
// sanity check
if (true) {
EWAHCompressedBitmap32 answer = ewah[0].xor(ewah[1]);
for (int k = 2; k < ewah.length; ++k)
answer = answer.xor(ewah[k]);
EWAHCompressedBitmap32 ewahor3 = FastAggregation.xor(ewah);
if (!answer.equals(ewahor3))
throw new RuntimeException("bug FastAggregation.xor");
EWAHCompressedBitmap32 ewahor2 = FastAggregation32
.bufferedxor(65536,ewah);
if (!answer.equals(ewahor2))
throw new RuntimeException(
"bug FastAggregation.bufferedxor ");
EWAHCompressedBitmap32 iwah = IteratorUtil32.materialize(IteratorAggregation32.bufferedxor(IteratorUtil32.toIterators(ewah)));
if (!answer.equals(iwah))
throw new RuntimeException(
"bug xor it ");
}
// logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32 ewahor = ewah[0];
for (int j = 1; j < k + 1; ++j) {
ewahor = ewahor.xor(ewah[j]);
}
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation
.xor(ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
EWAHCompressedBitmap32[] ewahcp = new EWAHCompressedBitmap32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = ewah[j];
}
EWAHCompressedBitmap32 ewahor = FastAggregation32
.bufferedxor(65536,ewahcp);
bogus += ewahor.sizeInBits();
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
// fast logical xor
bef = System.currentTimeMillis();
for (int r = 0; r < repeat; ++r)
for (int k = 0; k < N; ++k) {
IteratingRLW32[] ewahcp = new IteratingRLW32[k + 1];
for (int j = 0; j < k + 1; ++j) {
ewahcp[j] = new IteratingBufferedRunningLengthWord32(
ewah[j]);
}
IteratingRLW32 ewahor = IteratorAggregation32.bufferedxor(ewahcp);
int wordcounter = IteratorUtil32.cardinality(ewahor);
bogus += wordcounter;
}
aft = System.currentTimeMillis();
line += "\t" + df.format((aft - bef) / 1000.0);
System.out
.println("# times for: 2by2 FastAggregation.xor bufferedxor iterator-based");
System.out.println(line);
}
System.out.println("# bogus =" + bogus);
}
}
}

78
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/ClusteredDataGenerator.java

@ -0,0 +1,78 @@
package com.fr.third.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This class will generate lists of random integers with a "clustered" distribution.
* Reference:
* Anh VN, Moffat A. Index compression using 64-bit words. Software: Practice and Experience 2010; 40(2):131-147.
*
* @author Daniel Lemire
*/
public class ClusteredDataGenerator {
/**
*
*/
public ClusteredDataGenerator() {
this.unidg = new UniformDataGenerator();
}
/**
* @param seed random seed
*/
public ClusteredDataGenerator(final int seed) {
this.unidg = new UniformDataGenerator(seed);
}
/**
* generates randomly N distinct integers from 0 to Max.
* @param N number of integers
* @param Max maximum integer value
* @return a randomly generated array
*/
public int[] generateClustered(int N, int Max) {
int[] array = new int[N];
fillClustered(array, 0, N, 0, Max);
return array;
}
void fillClustered(int[] array, int offset, int length, int Min, int Max) {
final int range = Max - Min;
if ((range == length) || (length <= 10)) {
fillUniform(array, offset, length, Min, Max);
return;
}
final int cut = length / 2
+ ((range - length - 1 > 0) ? this.unidg.rand.nextInt(range - length - 1) : 0);
final double p = this.unidg.rand.nextDouble();
if (p < 0.25) {
fillUniform(array, offset, length / 2, Min, Min + cut);
fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
Max);
} else if (p < 0.5) {
fillClustered(array, offset, length / 2, Min, Min + cut);
fillUniform(array, offset + length / 2, length - length / 2, Min + cut,
Max);
} else {
fillClustered(array, offset, length / 2, Min, Min + cut);
fillClustered(array, offset + length / 2, length - length / 2, Min + cut,
Max);
}
}
void fillUniform(int[] array, int offset, int length, int Min, int Max) {
int[] v = this.unidg.generateUniform(length, Max - Min);
for (int k = 0; k < v.length; ++k)
array[k + offset] = Min + v[k];
}
UniformDataGenerator unidg;
}

114
fine-jgit/src/com/fr/third/googlecode/javaewah/benchmark/UniformDataGenerator.java

@ -0,0 +1,114 @@
package com.fr.third.googlecode.javaewah.benchmark;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
/**
* This class will generate "uniform" lists of random integers.
*
* @author Daniel Lemire
*/
public class UniformDataGenerator {
/**
* construct generator of random arrays.
*/
public UniformDataGenerator() {
this.rand = new Random();
}
/**
* @param seed random seed
*/
public UniformDataGenerator(final int seed) {
this.rand = new Random(seed);
}
/**
* generates randomly N distinct integers from 0 to Max.
*/
int[] generateUniformHash(int N, int Max) {
if (N > Max)
throw new RuntimeException("not possible");
int[] ans = new int[N];
HashSet<Integer> s = new HashSet<Integer>();
while (s.size() < N)
s.add(new Integer(this.rand.nextInt(Max)));
Iterator<Integer> i = s.iterator();
for (int k = 0; k < N; ++k)
ans[k] = i.next().intValue();
Arrays.sort(ans);
return ans;
}
/**
* output all integers from the range [0,Max) that are not
* in the array
*/
static int[] negate(int[] x, int Max) {
int[] ans = new int[Max - x.length];
int i = 0;
int c = 0;
for (int j = 0; j < x.length; ++j) {
int v = x[j];
for (; i < v; ++i)
ans[c++] = i;
++i;
}
while (c < ans.length)
ans[c++] = i++;
return ans;
}
/**
* generates randomly N distinct integers from 0 to Max.
* @param N Number of integers to generate
* @param Max Maximum value of the integers
* @return array containing random integers
*/
public int[] generateUniform(int N, int Max) {
if(N * 2 > Max) {
return negate( generateUniform(Max - N, Max), Max );
}
if (2048 * N > Max)
return generateUniformBitmap(N, Max);
return generateUniformHash(N, Max);
}
/**
* generates randomly N distinct integers from 0 to Max using a bitmap.
* @param N Number of integers to generate
* @param Max Maximum value of the integers
* @return array containing random integers
*/
int[] generateUniformBitmap(int N, int Max) {
if (N > Max)
throw new RuntimeException("not possible");
int[] ans = new int[N];
BitSet bs = new BitSet(Max);
int cardinality = 0;
while (cardinality < N) {
int v = this.rand.nextInt(Max);
if (!bs.get(v)) {
bs.set(v);
cardinality++;
}
}
int pos = 0;
for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
ans[pos++] = i;
}
return ans;
}
Random rand = new Random();
}

102
fine-jgit/src/com/fr/third/googlecode/javaewah32/BitCounter32.java

@ -0,0 +1,102 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
* Licensed under the Apache License, Version 2.0.
*/
/**
* BitCounter is a fake bitset data structure. Instead of storing the actual data,
* it only records the number of set bits.
*
* @since 0.5.0
* @author Daniel Lemire and David McIntosh
*/
public final class BitCounter32 implements BitmapStorage32 {
/**
* Virtually add words directly to the bitmap
*
* @param newdata the word
*/
// @Override : causes problems with Java 1.5
@Override
public void add(final int newdata) {
this.oneBits += Integer.bitCount(newdata);
}
/**
* virtually add several literal words.
*
* @param data the literal words
* @param start the starting point in the array
* @param number the number of literal words to add
*/
// @Override : causes problems with Java 1.5
@Override
public void addStreamOfLiteralWords(int[] data, int start, int number) {
for(int i=start;i<start+number;i++) {
add(data[i]);
}
}
/**
* virtually add many
* zeroes or ones.
*
* @param v zeros or ones
* @param number how many to words add
*/
// @Override : causes problems with Java 1.5
@Override
public void addStreamOfEmptyWords(boolean v, int number) {
if (v) {
this.oneBits += number * EWAHCompressedBitmap32.wordinbits;
}
}
/**
* virtually add several negated literal words.
*
* @param data the literal words
* @param start the starting point in the array
* @param number the number of literal words to add
*/
// @Override : causes problems with Java 1.5
@Override
public void addStreamOfNegatedLiteralWords(int[] data, int start,
int number) {
for(int i=start;i<start+number;i++) {
add(~data[i]);
}
}
/**
* As you act on this class, it records the number of set (true) bits.
*
* @return number of set bits
*/
public int getCount() {
return this.oneBits;
}
/**
* should directly set the sizeinbits field, but is effectively ignored in this class.
*
* @param bits number of bits
*/
// @Override : causes problems with Java 1.5
@Override
public void setSizeInBits(int bits) {
// no action
}
private int oneBits;
}

60
fine-jgit/src/com/fr/third/googlecode/javaewah32/BitmapStorage32.java

@ -0,0 +1,60 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Low level bitset writing methods.
*
* @since 0.5.0
* @author Daniel Lemire and David McIntosh
*/
public interface BitmapStorage32 {
/**
* Adding words directly to the bitmap (for expert use).
*
* This is normally how you add data to the array. So you add bits in streams
* of 8*8 bits.
*
* @param newdata the word
*/
public void add(final int newdata);
/**
* if you have several literal words to copy over, this might be faster.
*
* @param data the literal words
* @param start the starting point in the array
* @param number the number of literal words to add
*/
public void addStreamOfLiteralWords(final int[] data, final int start,
final int number);
/**
* For experts: You want to add many
* zeroes or ones? This is the method you use.
*
* @param v zeros or ones
* @param number how many to words add
*/
public void addStreamOfEmptyWords(final boolean v, final int number);
/**
* Like "addStreamOfLiteralWords" but negates the words being added.
*
* @param data the literal words
* @param start the starting point in the array
* @param number the number of literal words to add
*/
public void addStreamOfNegatedLiteralWords(int[] data, final int start,
final int number);
/**
* directly set the sizeinbits field
* @param bits number of bits
*/
public void setSizeInBits(final int bits);
}

152
fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedIterator32.java

@ -0,0 +1,152 @@
package com.fr.third.googlecode.javaewah32;
import com.fr.third.googlecode.javaewah.CloneableIterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This class can be used to iterate over blocks of bitmap data.
*
* @author Daniel Lemire
*
*/
public class BufferedIterator32 implements IteratingRLW32, Cloneable {
/**
* Instantiates a new iterating buffered running length word.
*
* @param iterator iterator
*/
public BufferedIterator32(final CloneableIterator<EWAHIterator32> iterator) {
this.masteriterator = iterator;
if(this.masteriterator.hasNext()) {
this.iterator = this.masteriterator.next();
this.brlw = new BufferedRunningLengthWord32(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
}
/**
* Discard first words, iterating to the next running length word if needed.
*
* @param x the number of words to be discarded
*/
@Override
public void discardFirstWords(int x) {
while (x > 0) {
if (this.brlw.RunningLength > x) {
this.brlw.RunningLength -= x;
return;
}
x -= this.brlw.RunningLength;
this.brlw.RunningLength = 0;
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x;
this.literalWordStartPosition += toDiscard;
this.brlw.NumberOfLiteralWords -= toDiscard;
x -= toDiscard;
if ((x > 0) || (this.brlw.size() == 0)) {
if (!this.next()) {
break;
}
}
}
}
/**
* Move to the next RunningLengthWord
* @return whether the move was possible
*/
@Override
public boolean next() {
if (!this.iterator.hasNext()) {
if(!reload()) {
this.brlw.NumberOfLiteralWords = 0;
this.brlw.RunningLength = 0;
return false;
}
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
return true;
}
private boolean reload() {
if(!this.masteriterator.hasNext()) {
return false;
}
this.iterator = this.masteriterator.next();
this.buffer = this.iterator.buffer();
return true;
}
/**
* Get the nth literal word for the current running length word
* @param index zero based index
* @return the literal word
*/
@Override
public int getLiteralWordAt(int index) {
return this.buffer[this.literalWordStartPosition + index];
}
/**
* Gets the number of literal words for the current running length word.
*
* @return the number of literal words
*/
@Override
public int getNumberOfLiteralWords() {
return this.brlw.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
@Override
public boolean getRunningBit() {
return this.brlw.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
@Override
public int getRunningLength() {
return this.brlw.RunningLength;
}
/**
* Size in uncompressed words of the current running length word.
*
* @return the size
*/
@Override
public int size() {
return this.brlw.size();
}
@Override
public BufferedIterator32 clone() throws CloneNotSupportedException {
BufferedIterator32 answer = (BufferedIterator32) super.clone();
answer.brlw = this.brlw.clone();
answer.buffer = this.buffer;
answer.iterator = this.iterator.clone();
answer.literalWordStartPosition = this.literalWordStartPosition;
answer.masteriterator = this.masteriterator.clone();
return answer;
}
private BufferedRunningLengthWord32 brlw;
private int[] buffer;
private int literalWordStartPosition;
private EWAHIterator32 iterator;
private CloneableIterator<EWAHIterator32> masteriterator;
}

174
fine-jgit/src/com/fr/third/googlecode/javaewah32/BufferedRunningLengthWord32.java

@ -0,0 +1,174 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use. Similar to RunningLengthWord, but can
* be modified without access to the array, and has faster access.
*
* @author Daniel Lemire
* @since 0.5.0
*
*/
public final class BufferedRunningLengthWord32 implements Cloneable {
/**
* Instantiates a new buffered running length word.
*
* @param a the word
*/
public BufferedRunningLengthWord32(final int a) {
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits));
this.RunningBit = (a & 1) != 0;
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount);
}
/**
* Instantiates a new buffered running length word.
*
* @param rlw the rlw
*/
public BufferedRunningLengthWord32(final RunningLengthWord32 rlw) {
this(rlw.parent.buffer[rlw.position]);
}
/**
* Discard first words.
*
* @param x the number of words to be discarded
*/
public void discardFirstWords(int x) {
if (this.RunningLength >= x) {
this.RunningLength -= x;
return;
}
x -= this.RunningLength;
this.RunningLength = 0;
this.literalwordoffset += x;
this.NumberOfLiteralWords -= x;
}
/**
* Gets the number of literal words.
*
* @return the number of literal words
*/
public int getNumberOfLiteralWords() {
return this.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
public boolean getRunningBit() {
return this.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
public int getRunningLength() {
return this.RunningLength;
}
/**
* Reset the values using the provided word.
*
* @param a the word
*/
public void reset(final int a) {
this.NumberOfLiteralWords = (a >>> (1 + RunningLengthWord32.runninglengthbits));
this.RunningBit = (a & 1) != 0;
this.RunningLength = ((a >>> 1) & RunningLengthWord32.largestrunninglengthcount);
this.literalwordoffset = 0;
}
/**
* Reset the values of this running length word so that it has the same values
* as the other running length word.
*
* @param rlw the other running length word
*/
public void reset(final RunningLengthWord32 rlw) {
reset(rlw.parent.buffer[rlw.position]);
}
/**
* Sets the number of literal words.
*
* @param number the new number of literal words
*/
public void setNumberOfLiteralWords(final int number) {
this.NumberOfLiteralWords = number;
}
/**
* Sets the running bit.
*
* @param b the new running bit
*/
public void setRunningBit(final boolean b) {
this.RunningBit = b;
}
/**
* Sets the running length.
*
* @param number the new running length
*/
public void setRunningLength(final int number) {
this.RunningLength = number;
}
/**
* Size in uncompressed words.
*
* @return the int
*/
public int size() {
return this.RunningLength + this.NumberOfLiteralWords;
}
/*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "running bit = " + getRunningBit() + " running length = "
+ getRunningLength() + " number of lit. words "
+ getNumberOfLiteralWords();
}
@Override
public BufferedRunningLengthWord32 clone() throws CloneNotSupportedException {
BufferedRunningLengthWord32 answer = (BufferedRunningLengthWord32) super.clone();
answer.literalwordoffset = this.literalwordoffset;
answer.NumberOfLiteralWords = this.NumberOfLiteralWords;
answer.RunningBit = this.RunningBit;
answer.RunningLength = this.RunningLength;
return answer;
}
/** how many literal words have we read so far? */
public int literalwordoffset = 0;
/** The Number of literal words. */
public int NumberOfLiteralWords;
/** The Running bit. */
public boolean RunningBit;
/** The Running length. */
public int RunningLength;
}

1608
fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHCompressedBitmap32.java

File diff suppressed because it is too large Load Diff

98
fine-jgit/src/com/fr/third/googlecode/javaewah32/EWAHIterator32.java

@ -0,0 +1,98 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* The class EWAHIterator represents a special type of
* efficient iterator iterating over (uncompressed) words of bits.
*
* @author Daniel Lemire
* @since 0.5.0
*
*/
public final class EWAHIterator32 implements Cloneable {
/**
* Instantiates a new eWAH iterator.
*
* @param a the array of words
* @param sizeinwords the number of words that are significant in the array of words
*/
public EWAHIterator32(final EWAHCompressedBitmap32 a, final int sizeinwords) {
this.rlw = new RunningLengthWord32(a, 0);
this.size = sizeinwords;
this.pointer = 0;
}
/**
* Allow expert developers to instantiate an EWAHIterator.
*
* @param bitmap we want to iterate over
* @return an iterator
*/
public static EWAHIterator32 getEWAHIterator(EWAHCompressedBitmap32 bitmap) {
return bitmap.getEWAHIterator();
}
/**
* Access to the array of words
*
* @return the int[]
*/
public int[] buffer() {
return this.rlw.parent.buffer;
}
/**
* Position of the literal words represented by this running length word.
*
* @return the int
*/
public int literalWords() {
return this.pointer - this.rlw.getNumberOfLiteralWords();
}
/**
* Checks for next.
*
* @return true, if successful
*/
public boolean hasNext() {
return this.pointer < this.size;
}
/**
* Next running length word.
*
* @return the running length word
*/
public RunningLengthWord32 next() {
this.rlw.position = this.pointer;
this.pointer += this.rlw.getNumberOfLiteralWords() + 1;
return this.rlw;
}
@Override
public EWAHIterator32 clone() throws CloneNotSupportedException {
EWAHIterator32 ans = (EWAHIterator32) super.clone();
ans.rlw = this.rlw.clone();
ans.size = this.size;
ans.pointer = this.pointer;
return ans;
}
/** The pointer represent the location of the current running length
* word in the array of words (embedded in the rlw attribute). */
int pointer;
/** The current running length word. */
RunningLengthWord32 rlw;
/** The size in words. */
int size;
}

377
fine-jgit/src/com/fr/third/googlecode/javaewah32/FastAggregation32.java

@ -0,0 +1,377 @@
package com.fr.third.googlecode.javaewah32;
import java.util.Arrays;
import java.util.Comparator;
import java.util.PriorityQueue;
/**
* Fast algorithms to aggregate many bitmaps. These algorithms are just given as
* reference. They may not be faster than the corresponding methods in the
* EWAHCompressedBitmap class.
*
* @author Daniel Lemire
*
*/
public class FastAggregation32 {
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @return the or aggregate.
*/
public static EWAHCompressedBitmap32 bufferedand(final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32();
bufferedandWithContainer(answer,bufsize, bitmaps);
return answer;
}
/**
* Compute the and aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words (per input bitmap)
* @param bitmaps the source bitmaps
*/
public static void bufferedandWithContainer(final BitmapStorage32 container,final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
java.util.LinkedList<IteratingBufferedRunningLengthWord32> al = new java.util.LinkedList<IteratingBufferedRunningLengthWord32>();
for (EWAHCompressedBitmap32 bitmap : bitmaps) {
al.add(new IteratingBufferedRunningLengthWord32(bitmap));
}
int[] hardbitmap = new int[bufsize*bitmaps.length];
for(IteratingRLW32 i : al)
if (i.size() == 0) {
al.clear();
break;
}
while (!al.isEmpty()) {
Arrays.fill(hardbitmap, ~0);
int effective = Integer.MAX_VALUE;
for(IteratingRLW32 i : al) {
int eff = IteratorAggregation32.inplaceand(hardbitmap, i);
if (eff < effective)
effective = eff;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
for(IteratingRLW32 i : al)
if (i.size() == 0) {
al.clear();
break;
}
}
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the or aggregate.
*/
public static EWAHCompressedBitmap32 bufferedor(final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32();
bufferedorWithContainer(answer, bufsize, bitmaps);
return answer;
}
/**
* Compute the or aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedorWithContainer(final BitmapStorage32 container,final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
int range = 0;
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() {
@Override
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>();
for (EWAHCompressedBitmap32 bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord32(bitmap));
}
int[] hardbitmap = new int[bufsize];
int maxr = al.size();
while (maxr > 0) {
int effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation32.inplaceor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
* @param bitmaps the source bitmaps
* @param bufsize buffer size used during the computation in 64-bit words
* @return the xor aggregate.
*/
public static EWAHCompressedBitmap32 bufferedxor(final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
EWAHCompressedBitmap32 answer = new EWAHCompressedBitmap32();
bufferedxorWithContainer(answer, bufsize, bitmaps);
return answer;
}
/**
* Compute the xor aggregate using a temporary uncompressed bitmap.
*
* @param container where the aggregate is written
* @param bufsize buffer size used during the computation in 64-bit words
* @param bitmaps the source bitmaps
*/
public static void bufferedxorWithContainer(final BitmapStorage32 container,final int bufsize,
final EWAHCompressedBitmap32... bitmaps) {
int range = 0;
EWAHCompressedBitmap32[] sbitmaps = bitmaps.clone();
Arrays.sort(sbitmaps, new Comparator<EWAHCompressedBitmap32>() {
@Override
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) {
return b.sizeinbits - a.sizeinbits;
}
});
java.util.ArrayList<IteratingBufferedRunningLengthWord32> al = new java.util.ArrayList<IteratingBufferedRunningLengthWord32>();
for (EWAHCompressedBitmap32 bitmap : sbitmaps) {
if (bitmap.sizeinbits > range)
range = bitmap.sizeinbits;
al.add(new IteratingBufferedRunningLengthWord32(bitmap));
}
int[] hardbitmap = new int[bufsize];
int maxr = al.size();
while (maxr > 0) {
int effective = 0;
for (int k = 0; k < maxr; ++k) {
if (al.get(k).size() > 0) {
int eff = IteratorAggregation32.inplacexor(hardbitmap, al.get(k));
if (eff > effective)
effective = eff;
} else
maxr = k;
}
for (int k = 0; k < effective; ++k)
container.add(hardbitmap[k]);
Arrays.fill(hardbitmap, 0);
}
container.setSizeInBits(range);
}
/**
* Uses a priority queue to compute the or aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void orToContainer(final BitmapStorage32 container,
final EWAHCompressedBitmap32 ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length,
new Comparator<EWAHCompressedBitmap32>() {
@Override
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap32 x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap32 x1 = pq.poll();
EWAHCompressedBitmap32 x2 = pq.poll();
pq.add(x1.or(x2));
}
pq.poll().orToContainer(pq.poll(), container);
}
/**
* Uses a priority queue to compute the xor aggregate.
* @param container where we write the result
* @param bitmaps to be aggregated
*/
public static void xorToContainer(final BitmapStorage32 container,
final EWAHCompressedBitmap32 ... bitmaps) {
if(bitmaps.length < 2) throw new IllegalArgumentException("We need at least two bitmaps");
PriorityQueue<EWAHCompressedBitmap32> pq = new PriorityQueue<EWAHCompressedBitmap32>(bitmaps.length,
new Comparator<EWAHCompressedBitmap32>() {
@Override
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) {
return a.sizeInBytes() - b.sizeInBytes();
}
});
for (EWAHCompressedBitmap32 x : bitmaps) {
pq.add(x);
}
while (pq.size() > 2) {
EWAHCompressedBitmap32 x1 = pq.poll();
EWAHCompressedBitmap32 x2 = pq.poll();
pq.add(x1.xor(x2));
}
pq.poll().xorToContainer(pq.poll(), container);
}
/**
* For internal use. Computes the bitwise or of the provided bitmaps and
* stores the result in the container. (This used to be the default.)
*
* @deprecated use EWAHCompressedBitmap32.or instead
* @since 0.4.0
* @param container where store the result
* @param bitmaps to be aggregated
*/
@Deprecated
public static void legacy_orWithContainer(final BitmapStorage32 container,
final EWAHCompressedBitmap32... bitmaps) {
if (bitmaps.length == 2) {
// should be more efficient
bitmaps[0].orToContainer(bitmaps[1], container);
return;
}
// Sort the bitmaps in descending order by sizeinbits. We will exhaust the
// sorted bitmaps from right to left.
final EWAHCompressedBitmap32[] sortedBitmaps = bitmaps.clone();
Arrays.sort(sortedBitmaps, new Comparator<EWAHCompressedBitmap32>() {
@Override
public int compare(EWAHCompressedBitmap32 a, EWAHCompressedBitmap32 b) {
return a.sizeinbits < b.sizeinbits ? 1
: a.sizeinbits == b.sizeinbits ? 0 : -1;
}
});
final IteratingBufferedRunningLengthWord32[] rlws = new IteratingBufferedRunningLengthWord32[bitmaps.length];
int maxAvailablePos = 0;
for (EWAHCompressedBitmap32 bitmap : sortedBitmaps) {
EWAHIterator32 iterator = bitmap.getEWAHIterator();
if (iterator.hasNext()) {
rlws[maxAvailablePos++] = new IteratingBufferedRunningLengthWord32(
iterator);
}
}
if (maxAvailablePos == 0) { // this never happens...
container.setSizeInBits(0);
return;
}
int maxSize = sortedBitmaps[0].sizeinbits;
while (true) {
int maxOneRl = 0;
int minZeroRl = Integer.MAX_VALUE;
int minSize = Integer.MAX_VALUE;
int numEmptyRl = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
int size = rlw.size();
if (size == 0) {
maxAvailablePos = i;
break;
}
minSize = Math.min(minSize, size);
if (rlw.getRunningBit()) {
int rl = rlw.getRunningLength();
maxOneRl = Math.max(maxOneRl, rl);
minZeroRl = 0;
if (rl == 0 && size > 0) {
numEmptyRl++;
}
} else {
int rl = rlw.getRunningLength();
minZeroRl = Math.min(minZeroRl, rl);
if (rl == 0 && size > 0) {
numEmptyRl++;
}
}
}
if (maxAvailablePos == 0) {
break;
} else if (maxAvailablePos == 1) {
// only one bitmap is left so just write the rest of it out
rlws[0].discharge(container);
break;
}
if (maxOneRl > 0) {
container.addStreamOfEmptyWords(true, maxOneRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
rlw.discardFirstWords(maxOneRl);
}
} else if (minZeroRl > 0) {
container.addStreamOfEmptyWords(false, minZeroRl);
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
rlw.discardFirstWords(minZeroRl);
}
} else {
int index = 0;
if (numEmptyRl == 1) {
// if one rlw has literal words to process and the rest have a run of
// 0's we can write them out here
IteratingBufferedRunningLengthWord32 emptyRl = null;
int minNonEmptyRl = Integer.MAX_VALUE;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
int rl = rlw.getRunningLength();
if (rl == 0) {
assert emptyRl == null;
emptyRl = rlw;
} else {
minNonEmptyRl = Math.min(minNonEmptyRl, rl);
}
}
int wordsToWrite = minNonEmptyRl > minSize ? minSize : minNonEmptyRl;
if (emptyRl != null)
emptyRl.writeLiteralWords(wordsToWrite, container);
index += wordsToWrite;
}
while (index < minSize) {
int word = 0;
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
if (rlw.getRunningLength() <= index) {
word |= rlw.getLiteralWordAt(index - rlw.getRunningLength());
}
}
container.add(word);
index++;
}
for (int i = 0; i < maxAvailablePos; i++) {
IteratingBufferedRunningLengthWord32 rlw = rlws[i];
rlw.discardFirstWords(minSize);
}
}
}
container.setSizeInBits(maxSize);
}
}

90
fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorImpl32.java

@ -0,0 +1,90 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2012, Google Inc.
* Licensed under the Apache License, Version 2.0.
*/
import static com.fr.third.googlecode.javaewah32.EWAHCompressedBitmap32.wordinbits;
import com.fr.third.googlecode.javaewah.IntIterator;
/**
* The IntIteratorImpl32 is the 32 bit implementation of the IntIterator
* interface, which efficiently returns the stream of integers represented by an
* EWAHIterator32.
*
* @author Colby Ranger
* @since 0.5.6
*/
final class IntIteratorImpl32 implements IntIterator {
private final EWAHIterator32 ewahIter;
private final int[] ewahBuffer;
private int position;
private int runningLength;
private int word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
IntIteratorImpl32(EWAHIterator32 ewahIter) {
this.ewahIter = ewahIter;
this.ewahBuffer = ewahIter.buffer();
this.hasnext = this.moveToNext();
}
public final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (!this.ewahIter.hasNext()) {
return false;
}
setRunningLengthWord(this.ewahIter.next());
}
return true;
}
@Override
public final boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setRunningLengthWord(RunningLengthWord32 rlw) {
this.runningLength = wordinbits * rlw.getRunningLength()
+ this.position;
if (!rlw.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = this.ewahIter.literalWords();
this.wordLength = this.wordPosition + rlw.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.ewahBuffer[this.wordPosition++];
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}

91
fine-jgit/src/com/fr/third/googlecode/javaewah32/IntIteratorOverIteratingRLW32.java

@ -0,0 +1,91 @@
package com.fr.third.googlecode.javaewah32;
import static com.fr.third.googlecode.javaewah.EWAHCompressedBitmap.wordinbits;
import com.fr.third.googlecode.javaewah.IntIterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Implementation of an IntIterator over an IteratingRLW.
*
*
*/
public class IntIteratorOverIteratingRLW32 implements IntIterator {
IteratingRLW32 parent;
private int position;
private int runningLength;
private int word;
private int wordPosition;
private int wordLength;
private int literalPosition;
private boolean hasnext;
/**
* @param p iterator we wish to iterate over
*/
public IntIteratorOverIteratingRLW32(final IteratingRLW32 p) {
this.parent = p;
this.position = 0;
setupForCurrentRunningLengthWord();
this.hasnext = moveToNext();
}
/**
* @return whether we could find another set bit; don't move if there is an unprocessed value
*/
private final boolean moveToNext() {
while (!runningHasNext() && !literalHasNext()) {
if (this.parent.next())
setupForCurrentRunningLengthWord();
else return false;
}
return true;
}
@Override
public boolean hasNext() {
return this.hasnext;
}
@Override
public final int next() {
final int answer;
if (runningHasNext()) {
answer = this.position++;
} else {
final int bit = Long.numberOfTrailingZeros(this.word);
this.word ^= (1l << bit);
answer = this.literalPosition + bit;
}
this.hasnext = this.moveToNext();
return answer;
}
private final void setupForCurrentRunningLengthWord() {
this.runningLength = wordinbits * this.parent.getRunningLength()
+ this.position;
if (!this.parent.getRunningBit()) {
this.position = this.runningLength;
}
this.wordPosition = 0;
this.wordLength = this.parent.getNumberOfLiteralWords();
}
private final boolean runningHasNext() {
return this.position < this.runningLength;
}
private final boolean literalHasNext() {
while (this.word == 0 && this.wordPosition < this.wordLength) {
this.word = this.parent.getLiteralWordAt(this.wordPosition++);
this.literalPosition = this.position;
this.position += wordinbits;
}
return this.word != 0;
}
}

274
fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingBufferedRunningLengthWord32.java

@ -0,0 +1,274 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use. Similar to BufferedRunningLengthWord32, but automatically
* advances to the next BufferedRunningLengthWord32 as words are discarded.
*
* @since 0.5.0
* @author Daniel Lemire and David McIntosh
*/
public final class IteratingBufferedRunningLengthWord32 implements IteratingRLW32, Cloneable {
/**
* Instantiates a new iterating buffered running length word.
*
* @param iterator iterator
*/
public IteratingBufferedRunningLengthWord32(final EWAHIterator32 iterator) {
this.iterator = iterator;
this.brlw = new BufferedRunningLengthWord32(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords() + this.brlw.literalwordoffset;
this.buffer = this.iterator.buffer();
}
/**
* Instantiates a new iterating buffered running length word.
* @param bitmap over which we want to iterate
*
*/
public IteratingBufferedRunningLengthWord32(final EWAHCompressedBitmap32 bitmap) {
this(EWAHIterator32.getEWAHIterator(bitmap));
}
/**
* Discard first words, iterating to the next running length word if needed.
*
* @param x the x
*/
@Override
public void discardFirstWords(int x) {
while (x > 0) {
if (this.brlw.RunningLength > x) {
this.brlw.RunningLength -= x;
return;
}
x -= this.brlw.RunningLength;
this.brlw.RunningLength = 0;
int toDiscard = x > this.brlw.NumberOfLiteralWords ? this.brlw.NumberOfLiteralWords : x;
this.literalWordStartPosition += toDiscard;
this.brlw.NumberOfLiteralWords -= toDiscard;
x -= toDiscard;
if ((x > 0) || (this.brlw.size() == 0)) {
if (!this.iterator.hasNext()) {
break;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset == 0;
}
}
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public int discharge(BitmapStorage32 container, int max) {
int index = 0;
while ((index < max) && (size() > 0)) {
// first run
int pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = max - index;
}
writeLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Write out up to max words (negated), returns how many were written
* @param container target for writes
* @param max maximal number of writes
* @return how many written
*/
public int dischargeNegated(BitmapStorage32 container, int max) {
int index = 0;
while ((index < max) && (size() > 0)) {
// first run
int pl = getRunningLength();
if (index + pl > max) {
pl = max - index;
}
container.addStreamOfEmptyWords(!getRunningBit(), pl);
index += pl;
int pd = getNumberOfLiteralWords();
if (pd + index > max) {
pd = max - index;
}
writeNegatedLiteralWords(pd, container);
discardFirstWords(pl+pd);
index += pd;
}
return index;
}
/**
* Move to the next RunningLengthWord
* @return whether the move was possible
*/
@Override
public boolean next() {
if (!this.iterator.hasNext()) {
this.brlw.NumberOfLiteralWords = 0;
this.brlw.RunningLength = 0;
return false;
}
this.brlw.reset(this.iterator.next());
this.literalWordStartPosition = this.iterator.literalWords(); // + this.brlw.literalwordoffset ==0
return true;
}
/**
* Write out the remain words, transforming them to zeroes.
* @param container target for writes
*/
public void dischargeAsEmpty(BitmapStorage32 container) {
while(size()>0) {
container.addStreamOfEmptyWords(false, size());
discardFirstWords(size());
}
}
/**
* Write out the remaining words
* @param container target for writes
*/
public void discharge(BitmapStorage32 container) {
// fix the offset
this.brlw.literalwordoffset = this.literalWordStartPosition - this.iterator.literalWords();
discharge(this.brlw, this.iterator, container);
}
/**
* Get the nth literal word for the current running length word
* @param index zero based index
* @return the literal word
*/
@Override
public int getLiteralWordAt(int index) {
return this.buffer[this.literalWordStartPosition + index];
}
/**
* Gets the number of literal words for the current running length word.
*
* @return the number of literal words
*/
@Override
public int getNumberOfLiteralWords() {
return this.brlw.NumberOfLiteralWords;
}
/**
* Gets the running bit.
*
* @return the running bit
*/
@Override
public boolean getRunningBit() {
return this.brlw.RunningBit;
}
/**
* Gets the running length.
*
* @return the running length
*/
@Override
public int getRunningLength() {
return this.brlw.RunningLength;
}
/**
* Size in uncompressed words of the current running length word.
*
* @return the int
*/
@Override
public int size() {
return this.brlw.size();
}
/**
* write the first N literal words to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write the data
*/
public void writeLiteralWords(int numWords, BitmapStorage32 container) {
container.addStreamOfLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* write the first N literal words (negated) to the target bitmap. Does not discard the words or perform iteration.
* @param numWords number of words to be written
* @param container where we write the data
*/
public void writeNegatedLiteralWords(int numWords, BitmapStorage32 container) {
container.addStreamOfNegatedLiteralWords(this.buffer, this.literalWordStartPosition, numWords);
}
/**
* For internal use. (One could use the non-static discharge method instead,
* but we expect them to be slower.)
*
* @param initialWord
* the initial word
* @param iterator
* the iterator
* @param container
* the container
*/
protected static void discharge(
final BufferedRunningLengthWord32 initialWord,
final EWAHIterator32 iterator, final BitmapStorage32 container) {
BufferedRunningLengthWord32 runningLengthWord = initialWord;
for (;;) {
final int runningLength = runningLengthWord.getRunningLength();
container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(),
runningLength);
container.addStreamOfLiteralWords(iterator.buffer(), iterator.literalWords()
+ runningLengthWord.literalwordoffset,
runningLengthWord.getNumberOfLiteralWords());
if (!iterator.hasNext())
break;
runningLengthWord = new BufferedRunningLengthWord32(iterator.next());
}
}
@Override
public IteratingBufferedRunningLengthWord32 clone() throws CloneNotSupportedException {
IteratingBufferedRunningLengthWord32 answer = (IteratingBufferedRunningLengthWord32) super.clone();
answer.brlw = this.brlw.clone();
answer.buffer = this.buffer;
answer.iterator = this.iterator.clone();
answer.literalWordStartPosition = this.literalWordStartPosition;
return answer;
}
private BufferedRunningLengthWord32 brlw;
private int[] buffer;
private int literalWordStartPosition;
private EWAHIterator32 iterator;
}

42
fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratingRLW32.java

@ -0,0 +1,42 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* High-level iterator over a compressed bitmap.
*
*/
public interface IteratingRLW32 {
/**
* @return whether there is more
*/
public boolean next() ;
/**
* @param index where the literal word is
* @return the literal word at the given index.
*/
public int getLiteralWordAt(int index);
/**
* @return the number of literal (non-fill) words
*/
public int getNumberOfLiteralWords() ;
/**
* @return the bit used for the fill bits
*/
public boolean getRunningBit() ;
/**
* @return sum of getRunningLength() and getNumberOfLiteralWords()
*/
public int size() ;
/**
* @return length of the run of fill words
*/
public int getRunningLength() ;
/**
* @param x the number of words to discard
*/
public void discardFirstWords(int x);
}

601
fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorAggregation32.java

@ -0,0 +1,601 @@
package com.fr.third.googlecode.javaewah32;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import com.fr.third.googlecode.javaewah.CloneableIterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Set of helper functions to aggregate bitmaps.
*
*/
public class IteratorAggregation32 {
/**
* @param x iterator to negate
* @return negated version of the iterator
*/
public static IteratingRLW32 not(final IteratingRLW32 x) {
return new IteratingRLW32() {
@Override
public boolean next() {
return x.next();
}
@Override
public int getLiteralWordAt(int index) {
return ~x.getLiteralWordAt(index);
}
@Override
public int getNumberOfLiteralWords() {
return x.getNumberOfLiteralWords();
}
@Override
public boolean getRunningBit() {
return ! x.getRunningBit();
}
@Override
public int size() {
return x.size();
}
@Override
public int getRunningLength() {
return x.getRunningLength();
}
@Override
public void discardFirstWords(int y) {
x.discardFirstWords(y);
}
};
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return and aggregate
*/
public static IteratingRLW32 bufferedand(final IteratingRLW32... al) {
return bufferedand (DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return and aggregate
*/
public static IteratingRLW32 bufferedand(final int bufsize, final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>();
for (IteratingRLW32 i : al)
basell.add(i);
return new BufferedIterator32(new AndIt(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return or aggregate
*/
public static IteratingRLW32 bufferedor(final IteratingRLW32... al) {
return bufferedor(DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return or aggregate
*/
public static IteratingRLW32 bufferedor(final int bufsize, final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>();
for (IteratingRLW32 i : al)
basell.add(i);
return new BufferedIterator32(new ORIt(basell,bufsize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return xor aggregate
*/
public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) {
return bufferedxor (DEFAULTMAXBUFSIZE,al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufsize size of the internal buffer used by the iterator in 64-bit words
* @return xor aggregate
*/
public static IteratingRLW32 bufferedxor(final int bufsize, final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException("Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList<IteratingRLW32> basell = new LinkedList<IteratingRLW32>();
for (IteratingRLW32 i : al)
basell.add(i);
return new BufferedIterator32(new XORIt(basell,bufsize));
}
/**
* Write out the content of the iterator, but as if it were all zeros.
*
* @param container
* where we write
* @param i
* the iterator
*/
protected static void dischargeAsEmpty(final BitmapStorage32 container,
final IteratingRLW32 i) {
while (i.size() > 0) {
container.addStreamOfEmptyWords(false, i.size());
i.next();
}
}
/**
* Write out up to max words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static int discharge(final BitmapStorage32 container, IteratingRLW32 i, int max) {
int counter = 0;
while (i.size() > 0 && counter < max) {
int L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(i.getRunningBit(), L1);
counter += L1;
}
int L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
/**
* Write out up to max negated words, returns how many were written
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static int dischargeNegated(final BitmapStorage32 container, IteratingRLW32 i, int max) {
int counter = 0;
while (i.size() > 0 && counter < max) {
int L1 = i.getRunningLength();
if (L1 > 0) {
if (L1 + counter > max)
L1 = max - counter;
container.addStreamOfEmptyWords(i.getRunningBit(), L1);
counter += L1;
}
int L = i.getNumberOfLiteralWords();
if(L + counter > max) L = max - counter;
for (int k = 0; k < L; ++k) {
container.add(i.getLiteralWordAt(k));
}
counter += L;
i.discardFirstWords(L+L1);
}
return counter;
}
static void andToContainer(final BitmapStorage32 container,
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final int index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
static void andToContainer(final BitmapStorage32 container,
final IteratingRLW32 rlwi, IteratingRLW32 rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
container.addStreamOfEmptyWords(false, predator.getRunningLength());
prey.discardFirstWords(predator.getRunningLength());
predator.discardFirstWords(predator.getRunningLength());
} else {
final int index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
/**
* Compute the first few words of the XOR aggregate between two iterators.
*
* @param container where to write
* @param desiredrlwcount number of words to be written (max)
* @param rlwi first iterator to aggregate
* @param rlwj second iterator to aggregate
*/
public static void xorToContainer(final BitmapStorage32 container,
int desiredrlwcount, final IteratingRLW32 rlwi, IteratingRLW32 rlwj) {
while ((rlwi.size()>0) && (rlwj.size()>0) && (desiredrlwcount-- >0) ) {
while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi : rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (predator.getRunningBit() == false) {
int index = discharge(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(false, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
} else {
int index = dischargeNegated(container, prey, predator.getRunningLength());
container.addStreamOfEmptyWords(true, predator.getRunningLength()
- index);
predator.discardFirstWords(predator.getRunningLength());
}
}
final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.add(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
protected static int inplaceor(int[] bitmap,
IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (i.getRunningBit())
Arrays.fill(bitmap, pos, pos + L, ~0);
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
Arrays.fill(bitmap, pos, bitmap.length, ~0);
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
Arrays.fill(bitmap, pos, pos + L, ~0);
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplacexor(int[] bitmap,
IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = i.getRunningLength();
if (pos + L > bitmap.length) {
if (i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = ~bitmap[k];
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplaceand(int[] bitmap,
IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (!i.getRunningBit()) {
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int L = i.getRunningLength();
if (pos + L > bitmap.length) {
if (!i.getRunningBit()) {
for(int k = pos ; k < bitmap.length; ++k)
bitmap[k] = 0;
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (!i.getRunningBit())
for(int k = pos ; k < pos + L; ++k)
bitmap[k] = 0;
pos += L;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
/**
* An optimization option. Larger values may improve speed, but at
* the expense of memory.
*/
public final static int DEFAULTMAXBUFSIZE = 65536;
}
class ORIt implements CloneableIterator<EWAHIterator32> {
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
int[] hardbitmap;
LinkedList<IteratingRLW32> ll;
ORIt(LinkedList<IteratingRLW32> basell, final int bufsize) {
this.ll = basell;
this.hardbitmap = new int[bufsize];
}
@Override
public XORIt clone() throws CloneNotSupportedException {
XORIt answer = (XORIt) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
int effective = 0;
Iterator<IteratingRLW32> i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW32 rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation32.inplaceor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.add(this.hardbitmap[k]);
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class XORIt implements CloneableIterator<EWAHIterator32> {
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
int[] hardbitmap;
LinkedList<IteratingRLW32> ll;
XORIt(LinkedList<IteratingRLW32> basell, final int bufsize) {
this.ll = basell;
this.hardbitmap = new int[bufsize];
}
@Override
public XORIt clone() throws CloneNotSupportedException {
XORIt answer = (XORIt) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
int effective = 0;
Iterator<IteratingRLW32> i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW32 rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation32.inplacexor(this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.add(this.hardbitmap[k]);
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class AndIt implements CloneableIterator<EWAHIterator32> {
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
LinkedList<IteratingRLW32> ll;
int buffersize;
public AndIt(LinkedList<IteratingRLW32> basell, final int bufsize) {
this.ll = basell;
this.buffersize = bufsize;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public AndIt clone() throws CloneNotSupportedException {
AndIt answer = (AndIt) super.clone();
answer.buffer = this.buffer.clone();
answer.ll = (LinkedList<IteratingRLW32>) this.ll.clone();
return answer;
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
IteratorAggregation32.andToContainer(this.buffer, this.buffersize * this.ll.size(),
this.ll.get(0), this.ll.get(1));
if (this.ll.size() > 2) {
Iterator<IteratingRLW32> i = this.ll.iterator();
i.next();
i.next();
EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32();
while (i.hasNext() && this.buffer.sizeInBytes() > 0) {
IteratorAggregation32.andToContainer(tmpbuffer,
this.buffer.getIteratingRLW(), i.next());
this.buffer.swap(tmpbuffer);
tmpbuffer.clear();
}
}
Iterator<IteratingRLW32> i = this.ll.iterator();
while(i.hasNext()) {
if(i.next().size() == 0) {
this.ll.clear();
break;
}
}
return this.buffer.getEWAHIterator();
}
}

135
fine-jgit/src/com/fr/third/googlecode/javaewah32/IteratorUtil32.java

@ -0,0 +1,135 @@
package com.fr.third.googlecode.javaewah32;
import java.util.Iterator;
import com.fr.third.googlecode.javaewah.IntIterator;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Convenience functions for working over iterators
*
*/
public class IteratorUtil32 {
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static IntIterator toSetBitsIntIterator(final IteratingRLW32 i) {
return new IntIteratorOverIteratingRLW32(i);
}
/**
* @param i iterator we wish to iterate over
* @return an iterator over the set bits corresponding to the iterator
*/
public static Iterator<Integer> toSetBitsIterator(final IteratingRLW32 i) {
return new Iterator<Integer>() {
@Override
public boolean hasNext() {
return this.under.hasNext();
}
@Override
public Integer next() {
return new Integer(this.under.next());
}
@Override
public void remove() {
}
final private IntIterator under = toSetBitsIntIterator(i);
};
}
/**
* Turn an iterator into a bitmap
* @param i iterator we wish to materialize
* @param c where we write
*/
public static void materialize(final IteratingRLW32 i, final BitmapStorage32 c) {
while (true) {
if (i.getRunningLength() > 0) {
c.addStreamOfEmptyWords(i.getRunningBit(), i.getRunningLength());
}
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
c.add(i.getLiteralWordAt(k));
if (!i.next())
break;
}
}
/**
* @param i iterator we wish to iterate over
* @return the cardinality (number of set bits) corresponding to the iterator
*/
public static int cardinality(final IteratingRLW32 i) {
int answer = 0;
while (true) {
if(i.getRunningBit()) answer += i.getRunningLength() * EWAHCompressedBitmap32.wordinbits;
for (int k = 0; k < i.getNumberOfLiteralWords(); ++k)
answer += Long.bitCount(i.getLiteralWordAt(k));
if(!i.next()) break;
}
return answer;
}
/**
*
* @param x set of bitmaps we wish to iterate over
* @return an array of iterators corresponding to the array of bitmaps
*/
public static IteratingRLW32[] toIterators(final EWAHCompressedBitmap32... x) {
IteratingRLW32[] X = new IteratingRLW32[x.length];
for (int k = 0; k < X.length; ++k) {
X[k] = new IteratingBufferedRunningLengthWord32(x[k]);
}
return X;
}
/**
* Turn an iterator into a bitmap
*
* @param i iterator we wish to materialize
* @param c where we write
* @param Max maximum number of words to materialize
* @return how many words were actually materialized
*/
public static long materialize(final IteratingRLW32 i, final BitmapStorage32 c, int Max) {
final int origMax = Max;
while (true) {
if (i.getRunningLength() > 0) {
int L = i.getRunningLength();
if(L > Max) L = Max;
c.addStreamOfEmptyWords(i.getRunningBit(), L);
Max -= L;
}
long L = i.getNumberOfLiteralWords();
for (int k = 0; k < L; ++k)
c.add(i.getLiteralWordAt(k));
if(Max>0) {
if (!i.next())
break;
}
else break;
}
return origMax - Max;
}
/**
* Turn an iterator into a bitmap
*
* @param i iterator we wish to materialize
* @return materialized version of the iterator
*/
public static EWAHCompressedBitmap32 materialize(final IteratingRLW32 i) {
EWAHCompressedBitmap32 ewah = new EWAHCompressedBitmap32();
materialize(i, ewah);
return ewah;
}
}

87
fine-jgit/src/com/fr/third/googlecode/javaewah32/NonEmptyVirtualStorage32.java

@ -0,0 +1,87 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* This is a BitmapStorage that can be used to determine quickly
* if the result of an operation is non-trivial... that is, whether
* there will be at least on set bit.
*
* @since 0.5.0
* @author Daniel Lemire and Veronika Zenz
*
*/
public class NonEmptyVirtualStorage32 implements BitmapStorage32 {
static class NonEmptyException extends RuntimeException {
private static final long serialVersionUID = 1L;
/**
* Do not fill in the stack trace for this exception
* for performance reasons.
*
* @return this instance
* @see Throwable#fillInStackTrace()
*/
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}
private static final NonEmptyException nonEmptyException = new NonEmptyException();
/**
* If the word to be added is non-zero, a NonEmptyException exception is thrown.
*/
@Override
public void add(int newdata) {
if(newdata!=0) throw nonEmptyException;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfLiteralWords(int[] data, int start, int number) {
if (number > 0){
throw nonEmptyException;
}
}
/**
* If the boolean value is true and number is greater than 0, then it throws a NonEmptyException exception,
* otherwise, nothing happens.
*
*/
@Override
public void addStreamOfEmptyWords(boolean v, int number) {
if(v && (number>0)) throw nonEmptyException;
}
/**
* throws a NonEmptyException exception when number is greater than 0
*
*/
@Override
public void addStreamOfNegatedLiteralWords(int[] data, int start, int number) {
if (number > 0){
throw nonEmptyException;
}
}
/**
* Does nothing.
*
* @see com.googlecode.javaewah.BitmapStorage#setSizeInBits(int)
*/
@Override
public void setSizeInBits(int bits) {
}
}

152
fine-jgit/src/com/fr/third/googlecode/javaewah32/RunningLengthWord32.java

@ -0,0 +1,152 @@
package com.fr.third.googlecode.javaewah32;
/*
* Copyright 2009-2013, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz and Owen Kaser
* Licensed under the Apache License, Version 2.0.
*/
/**
* Mostly for internal use.
*
* @since 0.5.0
* @author Daniel Lemire
*/
public final class RunningLengthWord32 implements Cloneable {
/**
* Instantiates a new running length word.
*
* @param a
* an array of 32-bit words
* @param p
* position in the array where the running length word is
* located.
*/
RunningLengthWord32(final EWAHCompressedBitmap32 a, final int p) {
this.parent = a;
this.position = p;
}
/**
* Gets the number of literal words.
*
* @return the number of literal words
*/
public int getNumberOfLiteralWords() {
return (this.parent.buffer[this.position] >>> (1 + runninglengthbits));
}
/**
* Gets the running bit.
*
* @return the running bit
*/
public boolean getRunningBit() {
return (this.parent.buffer[this.position] & 1) != 0;
}
/**
* Gets the running length.
*
* @return the running length
*/
public int getRunningLength() {
return (this.parent.buffer[this.position] >>> 1)
& largestrunninglengthcount;
}
/**
* Sets the number of literal words.
*
* @param number
* the new number of literal words
*/
public void setNumberOfLiteralWords(final int number) {
this.parent.buffer[this.position] |= notrunninglengthplusrunningbit;
this.parent.buffer[this.position] &= (number << (runninglengthbits + 1))
| runninglengthplusrunningbit;
}
/**
* Sets the running bit.
*
* @param b
* the new running bit
*/
public void setRunningBit(final boolean b) {
if (b)
this.parent.buffer[this.position] |= 1;
else
this.parent.buffer[this.position] &= ~1;
}
/**
* Sets the running length.
*
* @param number
* the new running length
*/
public void setRunningLength(final int number) {
this.parent.buffer[this.position] |= shiftedlargestrunninglengthcount;
this.parent.buffer[this.position] &= (number << 1)
| notshiftedlargestrunninglengthcount;
}
/**
* Return the size in uncompressed words represented by this running
* length word.
*
* @return the int
*/
public int size() {
return getRunningLength() + getNumberOfLiteralWords();
}
/*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "running bit = " + getRunningBit()
+ " running length = " + getRunningLength()
+ " number of lit. words " + getNumberOfLiteralWords();
}
@Override
public RunningLengthWord32 clone() throws CloneNotSupportedException {
RunningLengthWord32 answer;
answer = (RunningLengthWord32) super.clone();
answer.parent = this.parent;
answer.position = this.position;
return answer;
}
/** The array of words. */
public EWAHCompressedBitmap32 parent;
/** The position in array. */
public int position;
/**
* number of bits dedicated to marking of the running length of clean
* words
*/
public static final int runninglengthbits = 16;
private static final int literalbits = 32 - 1 - runninglengthbits;
/** largest number of literal words in a run. */
public static final int largestliteralcount = (1 << literalbits) - 1;
/** largest number of clean words in a run */
public static final int largestrunninglengthcount = (1 << runninglengthbits) - 1;
private static final int runninglengthplusrunningbit = (1 << (runninglengthbits + 1)) - 1;
private static final int shiftedlargestrunninglengthcount = largestrunninglengthcount << 1;
private static final int notrunninglengthplusrunningbit = ~runninglengthplusrunningbit;
private static final int notshiftedlargestrunninglengthcount = ~shiftedlargestrunninglengthcount;
}
Loading…
Cancel
Save