From 83276186fcc2de5497791464deb42ed52230f2fa Mon Sep 17 00:00:00 2001 From: dwivedi Date: Tue, 1 Nov 2016 16:17:08 -0700 Subject: [PATCH 1/5] Migrating extendedset and bytebuffercollections from Metamarkets. --- NOTICE | 16 + .../druid/benchmark/BoundFilterBenchmark.java | 13 +- .../benchmark/ConciseComplementBenchmark.java | 2 +- .../DimensionPredicateFilterBenchmark.java | 10 +- ...tions.bitmap.RangeBitmapBenchmarkTest.html | 145 + ...ions.bitmap.RangeBitmapBenchmarkTest.jsonp | 22 + ...ons.bitmap.UniformBitmapBenchmarkTest.html | 145 + ...ns.bitmap.UniformBitmapBenchmarkTest.jsonp | 23 + bytebuffer-collections/pom.xml | 139 + .../java/io/druid/collections/IntegerSet.java | 204 ++ .../bitmap/BitSetBitmapFactory.java | 98 + .../collections/bitmap/BitmapFactory.java | 83 + .../bitmap/ConciseBitmapFactory.java | 139 + .../collections/bitmap/ImmutableBitmap.java | 84 + .../collections/bitmap/MutableBitmap.java | 111 + .../bitmap/RoaringBitmapFactory.java | 179 + .../bitmap/WrappedBitSetBitmap.java | 149 + .../bitmap/WrappedConciseBitmap.java | 208 ++ .../bitmap/WrappedConciseIntIterator.java | 53 + .../bitmap/WrappedImmutableBitSetBitmap.java | 144 + .../bitmap/WrappedImmutableConciseBitmap.java | 146 + .../bitmap/WrappedImmutableRoaringBitmap.java | 129 + .../bitmap/WrappedRoaringBitmap.java | 265 ++ .../collections/spatial/ImmutableNode.java | 226 ++ .../collections/spatial/ImmutablePoint.java | 64 + .../collections/spatial/ImmutableRTree.java | 144 + .../io/druid/collections/spatial/Node.java | 235 ++ .../io/druid/collections/spatial/Point.java | 113 + .../io/druid/collections/spatial/RTree.java | 245 ++ .../druid/collections/spatial/RTreeUtils.java | 252 ++ .../collections/spatial/search/Bound.java | 48 + .../spatial/search/GutmanSearchStrategy.java | 210 ++ .../spatial/search/PolygonBound.java | 176 + .../spatial/search/RadiusBound.java | 132 + .../spatial/search/RectangularBound.java | 155 + .../spatial/search/SearchStrategy.java | 32 + .../spatial/split/GutmanSplitStrategy.java | 135 + .../split/LinearGutmanSplitStrategy.java | 119 + .../split/QuadraticGutmanSplitStrategy.java | 83 + .../spatial/split/SplitStrategy.java | 31 + .../druid/collections/IntSetTestUtility.java | 114 + .../io/druid/collections/TestIntegerSet.java | 242 ++ .../collections/bitmap/BitmapBenchmark.java | 197 + .../bitmap/ConciseBitmapFactoryTest.java | 94 + .../bitmap/RangeBitmapBenchmarkTest.java | 83 + .../bitmap/RoaringBitmapFactoryTest.java | 95 + .../bitmap/UniformBitmapBenchmarkTest.java | 80 + .../bitmap/WrappedBitSetBitmapBitSetTest.java | 174 + .../bitmap/WrappedRoaringBitmapTest.java | 86 + .../spatial/ImmutableRTreeTest.java | 651 ++++ .../druid/collections/spatial/RTreeTest.java | 116 + .../spatial/search/PolygonBoundTest.java | 49 + .../spatial/search/RadiusBoundTest.java | 51 + .../spatial/search/RectangularBoundTest.java | 49 + .../split/LinearGutmanSplitStrategyTest.java | 118 + .../io/druid/test/annotation/Benchmark.java | 24 + .../java/io/druid/test/annotation/Dummy.java | 24 + extendedset/pom.xml | 66 + .../extendedset/AbstractExtendedSet.java | 1432 ++++++++ .../io/druid/extendedset/ExtendedSet.java | 592 +++ .../extendedset/intset/AbstractIntSet.java | 744 ++++ .../io/druid/extendedset/intset/ArraySet.java | 1157 ++++++ .../druid/extendedset/intset/ConciseSet.java | 3178 +++++++++++++++++ .../extendedset/intset/ConciseSetUtils.java | 563 +++ .../io/druid/extendedset/intset/FastSet.java | 1403 ++++++++ .../druid/extendedset/intset/HashIntSet.java | 1012 ++++++ .../intset/ImmutableConciseSet.java | 1157 ++++++ .../io/druid/extendedset/intset/IntSet.java | 662 ++++ .../druid/extendedset/utilities/ArrayMap.java | 299 ++ .../druid/extendedset/utilities/BitCount.java | 350 ++ .../extendedset/utilities/CollectionMap.java | 317 ++ .../extendedset/utilities/IntHashCode.java | 103 + .../druid/extendedset/utilities/IntList.java | 115 + .../utilities/IntSetStatistics.java | 689 ++++ .../utilities/random/MersenneTwister.java | 869 +++++ .../utilities/random/MersenneTwisterFast.java | 1470 ++++++++ .../wrappers/GenericExtendedSet.java | 885 +++++ .../extendedset/wrappers/IndexedSet.java | 741 ++++ .../extendedset/wrappers/IntegerSet.java | 580 +++ .../druid/extendedset/wrappers/LongSet.java | 1692 +++++++++ .../wrappers/matrix/BinaryMatrix.java | 2052 +++++++++++ .../extendedset/wrappers/matrix/Pair.java | 106 + .../extendedset/wrappers/matrix/PairMap.java | 448 +++ .../extendedset/wrappers/matrix/PairSet.java | 1403 ++++++++ .../test/java/io/druid/extendedset/Debug.java | 1858 ++++++++++ .../io/druid/extendedset/Performance.java | 496 +++ .../io/druid/extendedset/RandomNumbers.java | 242 ++ .../intset/ImmutableConciseSetTest.java | 1972 ++++++++++ .../distinctcount/BitMapFactory.java | 2 +- .../distinctcount/ConciseBitMapFactory.java | 6 +- .../DistinctCountAggregator.java | 2 +- .../DistinctCountBufferAggregator.java | 4 +- .../distinctcount/JavaBitMapFactory.java | 6 +- .../distinctcount/RoaringBitMapFactory.java | 6 +- pom.xml | 2 + processing/pom.xml | 3 +- .../query/filter/BitmapIndexSelector.java | 6 +- .../java/io/druid/query/filter/Filter.java | 2 +- .../query/filter/RowOffsetMatcherFactory.java | 2 +- .../druid/query/filter/SpatialDimFilter.java | 2 +- .../druid/query/search/SearchQueryRunner.java | 6 +- .../java/io/druid/segment/BitmapOffset.java | 8 +- .../ColumnSelectorBitmapIndexSelector.java | 6 +- .../io/druid/segment/DimensionIndexer.java | 4 +- .../main/java/io/druid/segment/IndexIO.java | 10 +- .../java/io/druid/segment/MMappedIndex.java | 4 +- .../java/io/druid/segment/QueryableIndex.java | 2 +- .../segment/QueryableIndexStorageAdapter.java | 3 +- .../druid/segment/SimpleQueryableIndex.java | 2 +- .../druid/segment/StringDimensionIndexer.java | 5 +- .../segment/StringDimensionMergerLegacy.java | 8 +- .../segment/StringDimensionMergerV9.java | 12 +- .../io/druid/segment/column/BitmapIndex.java | 4 +- .../io/druid/segment/column/SpatialIndex.java | 2 +- .../data/BitmapCompressedIndexedInts.java | 2 +- .../segment/data/BitmapSerdeFactory.java | 4 +- .../data/ConciseBitmapSerdeFactory.java | 10 +- .../io/druid/segment/data/IndexedRTree.java | 4 +- .../data/RoaringBitmapSerdeFactory.java | 8 +- .../io/druid/segment/filter/AndFilter.java | 2 +- .../io/druid/segment/filter/BoundFilter.java | 2 +- .../filter/DimensionPredicateFilter.java | 2 +- .../java/io/druid/segment/filter/Filters.java | 2 +- .../io/druid/segment/filter/InFilter.java | 2 +- .../segment/filter/JavaScriptFilter.java | 2 +- .../io/druid/segment/filter/NotFilter.java | 2 +- .../io/druid/segment/filter/OrFilter.java | 2 +- .../druid/segment/filter/SelectorFilter.java | 2 +- .../druid/segment/filter/SpatialFilter.java | 4 +- .../incremental/IncrementalIndexAdapter.java | 4 +- .../serde/BitmapIndexColumnPartSupplier.java | 4 +- .../DictionaryEncodedColumnPartSerde.java | 4 +- .../serde/SpatialIndexColumnPartSupplier.java | 2 +- .../io/druid/segment/BitmapOffsetTest.java | 10 +- .../java/io/druid/segment/EmptyIndexTest.java | 2 +- .../io/druid/segment/IndexMergerTest.java | 3 +- .../IndexMergerV9WithSpatialIndexTest.java | 4 +- .../segment/data/BitmapCreationBenchmark.java | 6 +- .../filter/ExtractionDimFilterTest.java | 12 +- .../filter/SpatialFilterBonusTest.java | 4 +- .../segment/filter/SpatialFilterTest.java | 4 +- .../main/java/io/druid/cli/DumpSegment.java | 8 +- 142 files changed, 35916 insertions(+), 127 deletions(-) create mode 100755 bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.html create mode 100755 bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.jsonp create mode 100755 bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.html create mode 100755 bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.jsonp create mode 100755 bytebuffer-collections/pom.xml create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/IntegerSet.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitSetBitmapFactory.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedBitSetBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableBitSetBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedRoaringBitmap.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutablePoint.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableRTree.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/Node.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/Point.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTreeUtils.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/Bound.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/GutmanSearchStrategy.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RadiusBound.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RectangularBound.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/SearchStrategy.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/QuadraticGutmanSplitStrategy.java create mode 100755 bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/SplitStrategy.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/IntSetTestUtility.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/TestIntegerSet.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RoaringBitmapFactoryTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedBitSetBitmapBitSetTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedRoaringBitmapTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/ImmutableRTreeTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/RTreeTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/PolygonBoundTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RadiusBoundTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RectangularBoundTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategyTest.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/test/annotation/Benchmark.java create mode 100755 bytebuffer-collections/src/test/java/io/druid/test/annotation/Dummy.java create mode 100755 extendedset/pom.xml create mode 100755 extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java create mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java create mode 100755 extendedset/src/test/java/io/druid/extendedset/Debug.java create mode 100755 extendedset/src/test/java/io/druid/extendedset/Performance.java create mode 100755 extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java create mode 100755 extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java diff --git a/NOTICE b/NOTICE index 2c1e672e8e2e..867fd9ee586a 100644 --- a/NOTICE +++ b/NOTICE @@ -36,3 +36,19 @@ This product contains a modified version of TestNG 6.8.7 * http://testng.org/license/ (Apache License, Version 2.0) * HOMEPAGE: * http://testng.org/ + +This product contains a modified version of Metamarkets extendedset library + * LICENSE: + * https://github.com/metamx/extendedset/blob/master/LICENSE (Apache License, Version 2.0) + * HOMEPAGE: + * https://github.com/metamx/extendedset + * COMMIT TAG: + * https://github.com/metamx/extendedset/commit/c9d647d + +This product contains a modified version of Metamarkets bytebuffer-collections library + * LICENSE: + * https://github.com/metamx/bytebuffer-collections/blob/master/LICENSE (Apache License, Version 2.0) + * HOMEPAGE: + * https://github.com/metamx/bytebuffer-collections + * COMMIT TAG: + * https://github.com/metamx/bytebuffer-collections/commit/3d1e7c8 diff --git a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java index fef3d6a8b7ab..3ae4ff03d73e 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java @@ -22,11 +22,13 @@ import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.FluentIterable; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; + +import io.druid.extendedset.intset.ConciseSetUtils; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; import io.druid.query.ordering.StringComparators; @@ -37,7 +39,6 @@ import io.druid.segment.data.RoaringBitmapSerdeFactory; import io.druid.segment.filter.BoundFilter; import io.druid.segment.serde.BitmapIndexColumnPartSupplier; -import it.uniroma3.mat.extendedset.intset.ConciseSetUtils; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; diff --git a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java index dc69035acf96..a31a3f713f39 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java @@ -20,7 +20,7 @@ package io.druid.benchmark; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Mode; diff --git a/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java index 424c003e3691..48799cd01676 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/DimensionPredicateFilterBenchmark.java @@ -23,11 +23,11 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.FluentIterable; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.DruidPredicateFactory; diff --git a/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.html b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.html new file mode 100755 index 000000000000..eba65d3d3c18 --- /dev/null +++ b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.html @@ -0,0 +1,145 @@ + + + + + + + Benchmark results for methods in class RangeBitmapBenchmarkTest + + + + + + + + + + +
+

Benchmark results for methods in class RangeBitmapBenchmarkTest

+ +
+
+ +
+ + +
+
+ + + + diff --git a/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.jsonp b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.jsonp new file mode 100755 index 000000000000..17306131437b --- /dev/null +++ b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.RangeBitmapBenchmarkTest.jsonp @@ -0,0 +1,22 @@ +receiveJsonpData({ +"cols": [ +{"label": "Run", "type": "string"}, +{"label": "Custom key", "type": "string"}, +{"label": "Timestamp", "type": "string"}, +{"label": "timeConciseUnion", "type": "string"} , +{"label": "timeGenericConciseIntersection", "type": "string"} , +{"label": "timeGenericConciseUnion", "type": "string"} , +{"label": "timeGenericRoaringIntersection", "type": "string"} , +{"label": "timeGenericRoaringUnion", "type": "string"} , +{"label": "timeImmutableRoaringUnion", "type": "string"} , +{"label": "timeOffheapConciseUnion", "type": "string"} , +{"label": "timeOffheapRoaringUnion", "type": "string"} , +{"label": "timeRoaringUnion", "type": "string"} ], +"rows": [ +{"c": [{"v": "5"}, {"v": "0.00001"}, {"v": "2014-11-04 14:03:04.268"}, {"v": 80.304}, {"v": 79.758}, {"v": 65.896}, {"v": 0.008}, {"v": 0.278}, {"v": 0.596}, {"v": 70.89}, {"v": 0.275}, {"v": 0.202}]}, +{"c": [{"v": "1"}, {"v": "0.0001"}, {"v": "2014-11-04 13:32:21.752"}, {"v": 30.843}, {"v": 30.863}, {"v": 32.306}, {"v": 0.012}, {"v": 0.272}, {"v": 0.546}, {"v": 32.727}, {"v": 0.327}, {"v": 0.158}]}, +{"c": [{"v": "2"}, {"v": "0.0010"}, {"v": "2014-11-04 13:41:55.608"}, {"v": 3.801}, {"v": 3.441}, {"v": 3.421}, {"v": 0.019}, {"v": 0.272}, {"v": 0.524}, {"v": 3.76}, {"v": 0.271}, {"v": 0.171}]}, +{"c": [{"v": "3"}, {"v": "0.0100"}, {"v": "2014-11-04 13:45:36.077"}, {"v": 0.341}, {"v": 0.541}, {"v": 0.628}, {"v": 0.025}, {"v": 0.276}, {"v": 0.576}, {"v": 0.352}, {"v": 0.276}, {"v": 0.263}]}, +{"c": [{"v": "4"}, {"v": "0.1000"}, {"v": "2014-11-04 13:49:27.509"}, {"v": 0.051}, {"v": 0.062}, {"v": 0.046}, {"v": 0.039}, {"v": 0.295}, {"v": 0.47}, {"v": 0.045}, {"v": 0.299}, {"v": 0.181}]}, +{"c": [{"v": "38"}, {"v": "0.25000"}, {"v": "2014-11-04 15:22:42.446"}, {"v": 0.036}, {"v": 0.041}, {"v": 0.04}, {"v": 0.052}, {"v": 0.179}, {"v": 0.391}, {"v": 0.042}, {"v": 0.18}, {"v": 0.105}]} +]}); diff --git a/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.html b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.html new file mode 100755 index 000000000000..a6dc08ad64fd --- /dev/null +++ b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.html @@ -0,0 +1,145 @@ + + + + + + + Benchmark results for methods in class UniformBitmapBenchmarkTest + + + + + + + + + + +
+

Benchmark results for methods in class UniformBitmapBenchmarkTest

+ +
+
+ +
+ + +
+
+ + + + diff --git a/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.jsonp b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.jsonp new file mode 100755 index 000000000000..1194215e2289 --- /dev/null +++ b/bytebuffer-collections/benchmarks/io.druid.collections.bitmap.UniformBitmapBenchmarkTest.jsonp @@ -0,0 +1,23 @@ +receiveJsonpData({ +"cols": [ +{"label": "Run", "type": "string"}, +{"label": "Custom key", "type": "string"}, +{"label": "Timestamp", "type": "string"}, +{"label": "timeConciseUnion", "type": "string"} , +{"label": "timeGenericConciseIntersection", "type": "string"} , +{"label": "timeGenericConciseUnion", "type": "string"} , +{"label": "timeGenericRoaringIntersection", "type": "string"} , +{"label": "timeGenericRoaringUnion", "type": "string"} , +{"label": "timeImmutableRoaringUnion", "type": "string"} , +{"label": "timeOffheapConciseUnion", "type": "string"} , +{"label": "timeOffheapRoaringUnion", "type": "string"} , +{"label": "timeRoaringUnion", "type": "string"} ], +"rows": [ +{"c": [{"v": "6"}, {"v": "0.0001"}, {"v": "2014-11-04 11:41:24.142"}, {"v": 1.099}, {"v": 0.401}, {"v": 1.391}, {"v": 0.02}, {"v": 0.131}, {"v": 0.104}, {"v": 1.171}, {"v": 0.132}, {"v": 0.091}]}, +{"c": [{"v": "5"}, {"v": "0.0010"}, {"v": "2014-11-04 11:37:12.305"}, {"v": 6.989}, {"v": 0.595}, {"v": 7.4}, {"v": 0.026}, {"v": 0.144}, {"v": 0.098}, {"v": 7.95}, {"v": 0.139}, {"v": 0.066}]}, +{"c": [{"v": "4"}, {"v": "0.0100"}, {"v": "2014-11-04 11:23:42.26"}, {"v": 50.259}, {"v": 4.768}, {"v": 51.716}, {"v": 0.053}, {"v": 0.563}, {"v": 0.223}, {"v": 54.117}, {"v": 0.59}, {"v": 0.175}]}, +{"c": [{"v": "8"}, {"v": "0.1000"}, {"v": "2014-11-04 12:19:56.926"}, {"v": 64.505}, {"v": 23.741}, {"v": 63.565}, {"v": 0.031}, {"v": 0.353}, {"v": 0.528}, {"v": 59.636}, {"v": 0.352}, {"v": 0.155}]}, +{"c": [{"v": "7"}, {"v": "0.2500"}, {"v": "2014-11-04 11:52:42.488"}, {"v": 67.57}, {"v": 64.276}, {"v": 60.747}, {"v": 0.021}, {"v": 0.275}, {"v": 0.523}, {"v": 69.835}, {"v": 0.251}, {"v": 0.178}]}, +{"c": [{"v": "2"}, {"v": "0.5000"}, {"v": "2014-11-04 10:19:33.921"}, {"v": 66.058}, {"v": 67.714}, {"v": 64.162}, {"v": 0.026}, {"v": 0.264}, {"v": 0.541}, {"v": 66.445}, {"v": 0.281}, {"v": 0.168}]}, +{"c": [{"v": "3"}, {"v": "0.7500"}, {"v": "2014-11-04 10:44:45.546"}, {"v": 65.028}, {"v": 70.115}, {"v": 63.475}, {"v": 0.027}, {"v": 0.284}, {"v": 0.574}, {"v": 68.909}, {"v": 0.295}, {"v": 0.195}]} +]}); diff --git a/bytebuffer-collections/pom.xml b/bytebuffer-collections/pom.xml new file mode 100755 index 000000000000..668692de8087 --- /dev/null +++ b/bytebuffer-collections/pom.xml @@ -0,0 +1,139 @@ + + + + + 4.0.0 + + + io.druid + druid + 0.9.3-SNAPSHOT + + + bytebuffer-collections + bytebuffer-collections + ByteBuffer Collections + + + + io.druid + extendedset + ${project.parent.version} + + + com.google.guava + guava + 16.0.1 + + + com.fasterxml.jackson.core + jackson-annotations + 2.4.6 + + + com.fasterxml.jackson.core + jackson-core + 2.4.6 + + + com.fasterxml.jackson.core + jackson-databind + 2.4.6 + + + org.roaringbitmap + RoaringBitmap + 0.5.18 + + + + + junit + junit + test + + + org.easymock + easymock + 3.0 + test + + + com.carrotsearch + junit-benchmarks + 0.7.2 + test + + + com.h2database + h2 + 1.4.182 + test + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + test-jar + + + + + + org.apache.maven.plugins + maven-release-plugin + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18.1 + + io.druid.test.annotation.Benchmark + + + + + + + + benchmark + + + + maven-surefire-plugin + + -server -Xms3G -Xmx3G -Djub.consumers=CONSOLE,H2 -Djub.db.file=benchmarks/benchmarks + io.druid.test.annotation.Benchmark + io.druid.test.annotation.Dummy + + + + + + + diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/IntegerSet.java b/bytebuffer-collections/src/main/java/io/druid/collections/IntegerSet.java new file mode 100755 index 000000000000..0509f30fdc64 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/IntegerSet.java @@ -0,0 +1,204 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections; + +import com.google.common.collect.Sets; +import io.druid.collections.bitmap.MutableBitmap; +import org.roaringbitmap.IntIterator; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Set; + +/** + * + */ +public class IntegerSet implements Set +{ + private final MutableBitmap mutableBitmap; + + private IntegerSet(MutableBitmap mutableBitmap) + { + this.mutableBitmap = mutableBitmap; + } + + public static IntegerSet wrap(MutableBitmap mutableBitmap) + { + return new IntegerSet(mutableBitmap); + } + + @Override + public int size() + { + return this.mutableBitmap.size(); + } + + @Override + public boolean isEmpty() + { + return this.mutableBitmap.isEmpty(); + } + + @Override + public boolean contains(Object o) + { + if (o instanceof Integer) { + return mutableBitmap.get((Integer) o); + } else if (o instanceof Long) { + return this.contains(((Long) o).intValue()); + } + return false; + } + + @Override + public Iterator iterator() + { + return new BitSetIterator(mutableBitmap); + } + + @Override + public Object[] toArray() + { + Integer[] retval = new Integer[mutableBitmap.size()]; + int pos = 0; + for (Integer i : this) { + retval[pos++] = i; + } + return retval; + } + + @Override + public T[] toArray(T[] a) + { + return Sets.newHashSet(this).toArray(a); + } + + @Override + public boolean add(Integer integer) + { + if (null == integer) { + throw new NullPointerException("BitSet cannot contain null values"); + } + if (integer < 0) { + throw new IllegalArgumentException("Only positive integers or zero can be added"); + } + boolean isSet = mutableBitmap.get(integer); + mutableBitmap.add(integer.intValue()); + return !isSet; + } + + @Override + public boolean remove(Object o) + { + if (o == null) { + throw new NullPointerException("BitSet cannot contain null values"); + } + if (o instanceof Integer) { + Integer integer = (Integer) o; + boolean isSet = mutableBitmap.get(integer); + mutableBitmap.remove(integer); + return isSet; + } else { + throw new ClassCastException("Cannot remove non Integer from integer BitSet"); + } + } + + @Override + public boolean containsAll(Collection c) + { + Iterator it = c.iterator(); + while (it.hasNext()) { + if (!this.contains(it.next())) { + return false; + } + } + return true; + } + + @Override + public boolean addAll(Collection c) + { + boolean setChanged = false; + for (Integer i : c) { + if (!this.contains(i)) { + setChanged = true; + this.add(i); + } + } + return setChanged; + } + + @Override + public boolean retainAll(Collection c) + { + // Stub + throw new UnsupportedOperationException("Cannot retainAll ona an IntegerSet"); + } + + @Override + public boolean removeAll(Collection c) + { + Iterator it = c.iterator(); + boolean changed = false; + while (it.hasNext()) { + Integer val = (Integer) it.next(); + changed = remove(val) || changed; + } + return changed; + } + + @Override + public void clear() + { + mutableBitmap.clear(); + } + + public static class BitSetIterator implements Iterator + { + private final IntIterator intIt; + private final MutableBitmap bitSet; + private Integer prior = null; + + public BitSetIterator(MutableBitmap bitSet) + { + this.intIt = bitSet.iterator(); + this.bitSet = bitSet; + } + + @Override + public boolean hasNext() + { + return intIt.hasNext(); + } + + @Override + public Integer next() + { + prior = intIt.next(); + return prior; + } + + @Override + public void remove() + { + bitSet.remove(prior); + } + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitSetBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitSetBitmapFactory.java new file mode 100755 index 000000000000..acb59511109e --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitSetBitmapFactory.java @@ -0,0 +1,98 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import java.nio.ByteBuffer; +import java.util.BitSet; + +/** + * BitSetBitmapFactory implements BitmapFactory as a wrapper for java.util.BitSet + */ +public class BitSetBitmapFactory implements BitmapFactory +{ + @Override + public MutableBitmap makeEmptyMutableBitmap() + { + return new WrappedBitSetBitmap(); + } + + @Override + public ImmutableBitmap makeEmptyImmutableBitmap() + { + return makeEmptyMutableBitmap(); + } + + @Override + public ImmutableBitmap makeImmutableBitmap(MutableBitmap mutableBitmap) + { + return mutableBitmap; + } + + @Override + public ImmutableBitmap mapImmutableBitmap(ByteBuffer b) + { + return new WrappedBitSetBitmap(BitSet.valueOf(b.array())); + } + + @Override + public ImmutableBitmap union(Iterable b) + { + WrappedBitSetBitmap newSet = null; + for (ImmutableBitmap bm : b) { + if (null == newSet) { + newSet = new WrappedBitSetBitmap(((WrappedBitSetBitmap) bm).cloneBitSet()); + } else { + newSet.union(bm); + } + } + return newSet; + } + + @Override + public ImmutableBitmap intersection(Iterable b) + { + + WrappedBitSetBitmap newSet = null; + for (ImmutableBitmap bm : b) { + if (null == newSet) { + newSet = new WrappedBitSetBitmap(((WrappedBitSetBitmap) bm).cloneBitSet()); + } else { + newSet.intersection(bm); + } + } + return newSet; + } + + @Override + public ImmutableBitmap complement(ImmutableBitmap b) + { + BitSet bitSet = ((WrappedBitSetBitmap) b).cloneBitSet(); + bitSet.flip(0, bitSet.size()); + return new WrappedBitSetBitmap(bitSet); + } + + @Override + public ImmutableBitmap complement( + ImmutableBitmap b, int length + ) + { + return null; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java new file mode 100755 index 000000000000..4abcdaf5d4ca --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java @@ -0,0 +1,83 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import java.nio.ByteBuffer; + +public interface BitmapFactory +{ + /** + * Create a new empty bitmap + * + * @return the new bitmap + */ + public MutableBitmap makeEmptyMutableBitmap(); + + public ImmutableBitmap makeEmptyImmutableBitmap(); + + public ImmutableBitmap makeImmutableBitmap(MutableBitmap mutableBitmap); + + /** + * Given a ByteBuffer pointing at a serialized version of a bitmap, + * instantiate an immutable mapped bitmap. + *

+ * When using RoaringBitmap (with the RoaringBitmapFactory class), it is not + * necessary for b.limit() to indicate the end of the serialized content + * whereas it is critical to set b.limit() appropriately with ConciseSet (with + * the ConciseBitmapFactory). + * + * @param b the input byte buffer + * + * @return the new bitmap + */ + public ImmutableBitmap mapImmutableBitmap(ByteBuffer b); + + /** + * Compute the union (bitwise-OR) of a set of bitmaps. They are assumed to be + * instances of of the proper WrappedConciseBitmap otherwise a ClassCastException + * is thrown. + * + * @param b input ImmutableGenericBitmap objects + * + * @return the union. + * + * @throws ClassCastException if one of the ImmutableGenericBitmap objects if not an instance + * of WrappedImmutableConciseBitmap + */ + public ImmutableBitmap union(Iterable b); + + /** + * Compute the intersection (bitwise-AND) of a set of bitmaps. They are assumed to be + * instances of of the proper WrappedConciseBitmap otherwise a ClassCastException + * is thrown. + * + * @param b input ImmutableGenericBitmap objects + * + * @return the union. + * + * @throws ClassCastException if one of the ImmutableGenericBitmap objects if not an instance + * of WrappedImmutableConciseBitmap + */ + public ImmutableBitmap intersection(Iterable b); + + public ImmutableBitmap complement(ImmutableBitmap b); + + public ImmutableBitmap complement(ImmutableBitmap b, int length); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java new file mode 100755 index 000000000000..d9d9324d61ea --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java @@ -0,0 +1,139 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + + +import io.druid.extendedset.intset.ImmutableConciseSet; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * As the name suggests, this class instantiates bitmaps of the types + * WrappedConciseBitmap and WrappedImmutableConciseBitmap. + */ +public class ConciseBitmapFactory implements BitmapFactory +{ + private static final ImmutableConciseSet EMPTY_IMMUTABLE_BITMAP = new ImmutableConciseSet(); + private static final WrappedImmutableConciseBitmap WRAPPED_IMMUTABLE_CONCISE_BITMAP = + new WrappedImmutableConciseBitmap(EMPTY_IMMUTABLE_BITMAP); + + private static Iterable unwrap( + final Iterable b + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + final Iterator i = b.iterator(); + return new Iterator() + { + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNext() + { + return i.hasNext(); + } + + @Override + public ImmutableConciseSet next() + { + final WrappedImmutableConciseBitmap wrappedBitmap = (WrappedImmutableConciseBitmap) i.next(); + + if (wrappedBitmap == null) { + return EMPTY_IMMUTABLE_BITMAP; + } + + return wrappedBitmap.getBitmap(); + } + }; + } + }; + } + + @Override + public MutableBitmap makeEmptyMutableBitmap() + { + return new WrappedConciseBitmap(); + } + + @Override + public ImmutableBitmap makeEmptyImmutableBitmap() + { + return WRAPPED_IMMUTABLE_CONCISE_BITMAP; + } + + @Override + public ImmutableBitmap makeImmutableBitmap(MutableBitmap mutableBitmap) + { + if (!(mutableBitmap instanceof WrappedConciseBitmap)) { + throw new IllegalStateException(String.format("Cannot convert [%s]", mutableBitmap.getClass())); + } + return new WrappedImmutableConciseBitmap( + ImmutableConciseSet.newImmutableFromMutable( + ((WrappedConciseBitmap) mutableBitmap).getBitmap() + ) + ); + } + + @Override + public ImmutableBitmap mapImmutableBitmap(ByteBuffer b) + { + return new WrappedImmutableConciseBitmap(b); + } + + @Override + public ImmutableBitmap union(Iterable b) + throws ClassCastException + { + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.union(unwrap(b))); + } + + @Override + public ImmutableBitmap intersection(Iterable b) + throws ClassCastException + { + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.intersection(unwrap(b))); + } + + @Override + public ImmutableBitmap complement(ImmutableBitmap b) + { + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.complement(((WrappedImmutableConciseBitmap) b).getBitmap())); + } + + @Override + public ImmutableBitmap complement(ImmutableBitmap b, int length) + { + return new WrappedImmutableConciseBitmap( + ImmutableConciseSet.complement( + ((WrappedImmutableConciseBitmap) b).getBitmap(), + length + ) + ); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java new file mode 100755 index 000000000000..11cfe057f57c --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java @@ -0,0 +1,84 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import org.roaringbitmap.IntIterator; + +/** + * This class is meant to represent a simple wrapper around an immutable bitmap + * class. + */ +public interface ImmutableBitmap +{ + /** + * @return an iterator over the set bits of this bitmap + */ + public IntIterator iterator(); + + /** + * @return The number of bits set to true in this bitmap + */ + public int size(); + + public byte[] toBytes(); + + public int compareTo(ImmutableBitmap other); + + /** + * @return True if this bitmap is empty (contains no set bit) + */ + public boolean isEmpty(); + + /** + * Returns true if the bit at position value is set + * + * @param value the position to check + * + * @return true if bit is set + */ + public boolean get(int value); + + /** + * Compute the bitwise-or of this bitmap with another bitmap. A new bitmap is generated. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param otherBitmap other bitmap + */ + public ImmutableBitmap union(ImmutableBitmap otherBitmap); + + /** + * Compute the bitwise-and of this bitmap with another bitmap. A new bitmap is generated. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param otherBitmap other bitmap + */ + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap); + + /** + * Compute the bitwise-andNot of this bitmap with another bitmap. A new bitmap is generated. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param otherBitmap other bitmap + */ + public ImmutableBitmap difference(ImmutableBitmap otherBitmap); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java new file mode 100755 index 000000000000..bb5e8054cebe --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java @@ -0,0 +1,111 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import java.nio.ByteBuffer; + +/** + * This class is meant to represent a simple wrapper around a bitmap class. + */ +public interface MutableBitmap extends ImmutableBitmap +{ + /** + * Empties the content of this bitmap. + */ + public void clear(); + + /** + * Compute the bitwise-or of this bitmap with another bitmap. The current + * bitmap is modified whereas the other bitmap is left intact. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param mutableBitmap other bitmap + */ + public void or(MutableBitmap mutableBitmap); + + /** + * Compute the bitwise-and of this bitmap with another bitmap. The current + * bitmap is modified whereas the other bitmap is left intact. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param mutableBitmap other bitmap + */ + public void and(MutableBitmap mutableBitmap); + + + /** + * Compute the bitwise-xor of this bitmap with another bitmap. The current + * bitmap is modified whereas the other bitmap is left intact. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param mutableBitmap other bitmap + */ + public void xor(MutableBitmap mutableBitmap); + + /** + * Compute the bitwise-andNot of this bitmap with another bitmap. The current + * bitmap is modified whereas the other bitmap is left intact. + *

+ * Note that the other bitmap should be of the same class instance. + * + * @param mutableBitmap other bitmap + */ + public void andNot(MutableBitmap mutableBitmap); + + /** + * Return the size in bytes for the purpose of serialization to a ByteBuffer. + * Note that this is distinct from the memory usage. + * + * @return the total set in bytes + */ + public int getSizeInBytes(); + + /** + * Add the specified integer to the bitmap. This is equivalent to setting the + * ith bit to the value 1. + * + * @param entry integer to be added + */ + public void add(int entry); + + /** + * Remove the specified integer to the bitmap. This is equivalent to setting the + * ith bit to the value 1. + * + * @param entry integer to be remove + */ + public void remove(int entry); + + /** + * Write out a serialized (Immutable) version of the bitmap to the ByteBuffer. We preprend + * the serialized bitmap with a 4-byte int indicating the size in bytes. Thus + * getSizeInBytes() + 4 bytes are written. + *

+ * (These 4 bytes are required by ConciseSet but not by RoaringBitmap. + * Nevertheless, we always write them for the sake of simplicity, even if it + * wastes 4 bytes in some instances.) + * + * @param buffer where we write + */ + public void serialize(ByteBuffer buffer); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java new file mode 100755 index 000000000000..0aa86e05c362 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/RoaringBitmapFactory.java @@ -0,0 +1,179 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.base.Throwables; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.BufferFastAggregation; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * As the name suggests, this class instantiates bitmaps of the types + * WrappedRoaringBitmap and WrappedImmutableRoaringBitmap. + */ +public class RoaringBitmapFactory implements BitmapFactory +{ + static final boolean DEFAULT_COMPRESS_RUN_ON_SERIALIZATION = false; + private static final ImmutableRoaringBitmap EMPTY_IMMUTABLE_BITMAP; + + static { + try { + final RoaringBitmap roaringBitmap = new RoaringBitmap(); + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + roaringBitmap.serialize(new DataOutputStream(out)); + final byte[] bytes = out.toByteArray(); + + ByteBuffer buf = ByteBuffer.wrap(bytes); + EMPTY_IMMUTABLE_BITMAP = new ImmutableRoaringBitmap(buf); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + private static final WrappedImmutableRoaringBitmap WRAPPED_IMMUTABLE_ROARING_BITMAP = + new WrappedImmutableRoaringBitmap(EMPTY_IMMUTABLE_BITMAP); + + private final boolean compressRunOnSerialization; + + public RoaringBitmapFactory() + { + this(DEFAULT_COMPRESS_RUN_ON_SERIALIZATION); + } + + public RoaringBitmapFactory(boolean compressRunOnSerialization) + { + this.compressRunOnSerialization = compressRunOnSerialization; + } + + private static Iterable unwrap( + final Iterable b + ) + { + return new Iterable() + { + @Override + public Iterator iterator() + { + final Iterator i = b.iterator(); + return new Iterator() + { + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNext() + { + return i.hasNext(); + } + + @Override + public ImmutableRoaringBitmap next() + { + WrappedImmutableRoaringBitmap wrappedBitmap = (WrappedImmutableRoaringBitmap) i.next(); + + if (wrappedBitmap == null) { + return EMPTY_IMMUTABLE_BITMAP; + } + + return wrappedBitmap.getBitmap(); + } + }; + } + }; + } + + @Override + public MutableBitmap makeEmptyMutableBitmap() + { + return new WrappedRoaringBitmap(compressRunOnSerialization); + } + + @Override + public ImmutableBitmap makeEmptyImmutableBitmap() + { + return WRAPPED_IMMUTABLE_ROARING_BITMAP; + } + + @Override + public ImmutableBitmap makeImmutableBitmap(MutableBitmap mutableBitmap) + { + if (!(mutableBitmap instanceof WrappedRoaringBitmap)) { + throw new IllegalStateException(String.format("Cannot convert [%s]", mutableBitmap.getClass())); + } + try { + return ((WrappedRoaringBitmap) mutableBitmap).toImmutableBitmap(); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + @Override + public ImmutableBitmap mapImmutableBitmap(ByteBuffer b) + { + return new WrappedImmutableRoaringBitmap(b); + } + + @Override + public ImmutableBitmap union(Iterable b) + { + return new WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap.or(unwrap(b).iterator())); + } + + @Override + public ImmutableBitmap intersection(Iterable b) + { + return new WrappedImmutableRoaringBitmap(BufferFastAggregation.and(unwrap(b).iterator())); + } + + @Override + public ImmutableBitmap complement(ImmutableBitmap b) + { + return new WrappedImmutableRoaringBitmap( + ImmutableRoaringBitmap.flip( + ((WrappedImmutableRoaringBitmap) b).getBitmap(), + 0, + b.size() + ) + ); + } + + @Override + public ImmutableBitmap complement( + ImmutableBitmap b, int length + ) + { + return new WrappedImmutableRoaringBitmap( + ImmutableRoaringBitmap.flip( + ((WrappedImmutableRoaringBitmap) b).getBitmap(), + 0, + length + ) + ); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedBitSetBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedBitSetBitmap.java new file mode 100755 index 000000000000..3781f335aafa --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedBitSetBitmap.java @@ -0,0 +1,149 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import java.nio.ByteBuffer; +import java.util.BitSet; + +/** + * WrappedBitSetBitmap implements MutableBitmap for java.util.BitSet + */ +public class WrappedBitSetBitmap extends WrappedImmutableBitSetBitmap implements MutableBitmap +{ + + public WrappedBitSetBitmap() + { + super(); + } + + public WrappedBitSetBitmap(BitSet bitSet) + { + super(bitSet); + } + + public WrappedBitSetBitmap(ByteBuffer byteBuffer) + { + super(byteBuffer); + } + + protected BitSet cloneBitSet() + { + return (BitSet) bitmap.clone(); + } + + @Override + public void clear() + { + bitmap.clear(); + } + + @Override + public void or(MutableBitmap mutableBitmap) + { + if (mutableBitmap instanceof WrappedBitSetBitmap) { + WrappedBitSetBitmap bitSet = (WrappedBitSetBitmap) mutableBitmap; + this.bitmap.or(bitSet.bitmap); + } else { + throw new IllegalArgumentException( + String.format( + "Unknown class type: %s expected %s", + mutableBitmap.getClass().getCanonicalName(), + WrappedBitSetBitmap.class.getCanonicalName() + ) + ); + } + } + + @Override + public void and(MutableBitmap mutableBitmap) + { + if (mutableBitmap instanceof WrappedBitSetBitmap) { + WrappedBitSetBitmap bitSet = (WrappedBitSetBitmap) mutableBitmap; + this.bitmap.and(bitSet.bitmap); + } else { + throw new IllegalArgumentException( + String.format( + "Unknown class type: %s expected %s", + mutableBitmap.getClass().getCanonicalName(), + WrappedBitSetBitmap.class.getCanonicalName() + ) + ); + } + } + + @Override + public void xor(MutableBitmap mutableBitmap) + { + if (mutableBitmap instanceof WrappedBitSetBitmap) { + WrappedBitSetBitmap bitSet = (WrappedBitSetBitmap) mutableBitmap; + this.bitmap.xor(bitSet.bitmap); + } else { + throw new IllegalArgumentException( + String.format( + "Unknown class type: %s expected %s", + mutableBitmap.getClass().getCanonicalName(), + WrappedBitSetBitmap.class.getCanonicalName() + ) + ); + } + } + + @Override + public void andNot(MutableBitmap mutableBitmap) + { + if (mutableBitmap instanceof WrappedBitSetBitmap) { + WrappedBitSetBitmap bitSet = (WrappedBitSetBitmap) mutableBitmap; + this.bitmap.andNot(bitSet.bitmap); + } else { + throw new IllegalArgumentException( + String.format( + "Unknown class type: %s expected %s", + mutableBitmap.getClass().getCanonicalName(), + WrappedBitSetBitmap.class.getCanonicalName() + ) + ); + } + } + + @Override + public int getSizeInBytes() + { + // BitSet.size() returns the size in *bits* + return this.bitmap.size() / Byte.SIZE; + } + + @Override + public void add(int entry) + { + this.bitmap.set(entry); + } + + @Override + public void remove(int entry) + { + this.bitmap.clear(entry); + } + + @Override + public void serialize(ByteBuffer buffer) + { + buffer.put(this.bitmap.toByteArray()); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java new file mode 100755 index 000000000000..a4651ae4db91 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java @@ -0,0 +1,208 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.primitives.Ints; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.IntSet; +import org.roaringbitmap.IntIterator; + +import java.nio.ByteBuffer; + +public class WrappedConciseBitmap implements MutableBitmap +{ + /** + * Underlying bitmap. + */ + private ConciseSet bitmap; + + /** + * Create a new WrappedConciseBitmap wrapping an empty ConciseSet + */ + public WrappedConciseBitmap() + { + this.bitmap = new ConciseSet(); + } + + /** + * Create a bitmap wrappign the given bitmap + * + * @param conciseSet bitmap to be wrapped + */ + public WrappedConciseBitmap(ConciseSet conciseSet) + { + this.bitmap = conciseSet; + } + + ConciseSet getBitmap() + { + return bitmap; + } + + @Override + public byte[] toBytes() + { + return ImmutableConciseSet.newImmutableFromMutable(bitmap).toBytes(); + } + + @Override + public int compareTo(ImmutableBitmap other) + { + return bitmap.compareTo(((WrappedConciseBitmap) other).getBitmap()); + } + + @Override + public void clear() + { + bitmap.clear(); + } + + @Override + public void or(MutableBitmap mutableBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) mutableBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + bitmap.addAll(unwrappedOtherBitmap); + } + + @Override + public void and(MutableBitmap mutableBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) mutableBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + bitmap = bitmap.intersection(unwrappedOtherBitmap); + } + + @Override + public void xor(MutableBitmap mutableBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) mutableBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + bitmap = bitmap.symmetricDifference(unwrappedOtherBitmap); + } + + @Override + public void andNot(MutableBitmap mutableBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) mutableBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + bitmap = bitmap.difference(unwrappedOtherBitmap); + } + + @Override + public int getSizeInBytes() + { + return bitmap.getWords().length * Ints.BYTES; + } + + @Override + public void add(int entry) + { + bitmap.add(entry); + } + + @Override + public int size() + { + return bitmap.size(); + } + + @Override + public void serialize(ByteBuffer buffer) + { + buffer.put(toBytes()); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + bitmap.toString(); + } + + @Override + public void remove(int entry) + { + bitmap.remove(entry); + } + + @Override + public IntIterator iterator() + { + final IntSet.IntIterator i = bitmap.iterator(); + return new IntIterator() + { + @Override + public IntIterator clone() + { + throw new UnsupportedOperationException("clone is not supported on ConciseSet iterator"); + } + + @Override + public boolean hasNext() + { + return i.hasNext(); + } + + @Override + public int next() + { + return i.next(); + } + + }; + } + + @Override + public boolean isEmpty() + { + return bitmap.size() == 0; + } + + @Override + public ImmutableBitmap union(ImmutableBitmap otherBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) otherBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedConciseBitmap(bitmap.clone().union(unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) otherBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedConciseBitmap(bitmap.clone().intersection(unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap difference(ImmutableBitmap otherBitmap) + { + WrappedConciseBitmap other = (WrappedConciseBitmap) otherBitmap; + ConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedConciseBitmap(bitmap.clone().difference(unwrappedOtherBitmap)); + } + + @Override + public boolean get(int value) + { + return bitmap.contains(value); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java new file mode 100755 index 000000000000..e4a21d9adfd9 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java @@ -0,0 +1,53 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import io.druid.extendedset.intset.IntSet; +import org.roaringbitmap.IntIterator; + +/** + */ +public class WrappedConciseIntIterator implements IntIterator +{ + private final IntSet.IntIterator itr; + + public WrappedConciseIntIterator(IntSet.IntIterator itr) + { + this.itr = itr; + } + + @Override + public boolean hasNext() + { + return itr.hasNext(); + } + + @Override + public int next() + { + return itr.next(); + } + + @Override + public IntIterator clone() + { + return new WrappedConciseIntIterator(itr.clone()); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableBitSetBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableBitSetBitmap.java new file mode 100755 index 000000000000..633a1de063ab --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableBitSetBitmap.java @@ -0,0 +1,144 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import org.roaringbitmap.IntIterator; + +import java.nio.ByteBuffer; +import java.util.BitSet; + +/** + * WrappedImmutableBitSetBitmap implements ImmutableBitmap for java.util.BitSet + */ +public class WrappedImmutableBitSetBitmap implements ImmutableBitmap +{ + protected final BitSet bitmap; + + public WrappedImmutableBitSetBitmap(BitSet bitmap) + { + this.bitmap = bitmap; + } + + public WrappedImmutableBitSetBitmap() + { + this(new BitSet()); + } + + // WARNING: the current implementation of BitSet (1.7) copies the contents of ByteBuffer to + // on heap! + // TODO: make a new BitSet implementation which can use ByteBuffers properly. + public WrappedImmutableBitSetBitmap(ByteBuffer byteBuffer) + { + this(BitSet.valueOf(byteBuffer)); + } + + @Override + public IntIterator iterator() + { + return new BitSetIterator(); + } + + @Override + public boolean get(int value) + { + return bitmap.get(value); + } + + @Override + public int size() + { + return bitmap.cardinality(); + } + + @Override + public byte[] toBytes() + { + return bitmap.toByteArray(); + } + + @Override + public int compareTo(ImmutableBitmap other) + { + // TODO: find out what this is supposed to even do + BitSet otherSet = ((WrappedImmutableBitSetBitmap) other).bitmap; + int lengthCompare = Integer.compare(otherSet.length(), bitmap.length()); + if (lengthCompare != 0) { + return lengthCompare; + } + return Integer.compare(otherSet.nextSetBit(0), bitmap.nextSetBit(0)); + } + + @Override + public boolean isEmpty() + { + return bitmap.isEmpty(); + } + + @Override + public ImmutableBitmap union(ImmutableBitmap otherBitmap) + { + WrappedBitSetBitmap retval = new WrappedBitSetBitmap((BitSet) bitmap.clone()); + retval.or((WrappedBitSetBitmap) otherBitmap); + return retval; + } + + @Override + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap) + { + WrappedBitSetBitmap retval = new WrappedBitSetBitmap((BitSet) bitmap.clone()); + retval.and((WrappedBitSetBitmap) otherBitmap); + return retval; + } + + @Override + public ImmutableBitmap difference(ImmutableBitmap otherBitmap) + { + WrappedBitSetBitmap retval = new WrappedBitSetBitmap((BitSet) bitmap.clone()); + retval.andNot((WrappedBitSetBitmap) otherBitmap); + return retval; + } + + private class BitSetIterator implements IntIterator + { + private int pos = -1; + + @Override + public boolean hasNext() + { + return bitmap.nextSetBit(pos + 1) >= 0; + } + + @Override + public int next() + { + pos = bitmap.nextSetBit(pos + 1); + return pos; + } + + @Override + public IntIterator clone() + { + BitSetIterator newIt = new BitSetIterator(); + newIt.pos = pos; + return newIt; + } + } + +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java new file mode 100755 index 000000000000..a0e0203dcd62 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java @@ -0,0 +1,146 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + + +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.IntSet; +import org.roaringbitmap.IntIterator; + +import java.nio.ByteBuffer; + +public class WrappedImmutableConciseBitmap implements ImmutableBitmap +{ + /** + * Underlying bitmap. + */ + private final ImmutableConciseSet bitmap; + + public WrappedImmutableConciseBitmap(ByteBuffer byteBuffer) + { + this.bitmap = new ImmutableConciseSet(byteBuffer.asReadOnlyBuffer()); + } + + /** + * Wrap an ImmutableConciseSet + * + * @param immutableConciseSet bitmap to be wrapped + */ + public WrappedImmutableConciseBitmap(ImmutableConciseSet immutableConciseSet) + { + this.bitmap = immutableConciseSet; + } + + public ImmutableConciseSet getBitmap() + { + return bitmap; + } + + @Override + public boolean get(int value) + { + return bitmap.contains(value); + } + + @Override + public byte[] toBytes() + { + return bitmap.toBytes(); + } + + @Override + public int compareTo(ImmutableBitmap other) + { + return bitmap.compareTo(((WrappedImmutableConciseBitmap) other).getBitmap()); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + bitmap.toString(); + } + + @Override + public IntIterator iterator() + { + final IntSet.IntIterator i = bitmap.iterator(); + return new IntIterator() + { + @Override + public IntIterator clone() + { + return new WrappedConciseIntIterator(i.clone()); + } + + @Override + public boolean hasNext() + { + return i.hasNext(); + } + + @Override + public int next() + { + return i.next(); + } + }; + } + + @Override + public int size() + { + return bitmap.size(); + } + + @Override + public boolean isEmpty() + { + return bitmap.size() == 0; + } + + @Override + public ImmutableBitmap union(ImmutableBitmap otherBitmap) + { + WrappedImmutableConciseBitmap other = (WrappedImmutableConciseBitmap) otherBitmap; + ImmutableConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.union(bitmap, unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap) + { + WrappedImmutableConciseBitmap other = (WrappedImmutableConciseBitmap) otherBitmap; + ImmutableConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableConciseBitmap(ImmutableConciseSet.intersection(bitmap, unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap difference(ImmutableBitmap otherBitmap) + { + WrappedImmutableConciseBitmap other = (WrappedImmutableConciseBitmap) otherBitmap; + ImmutableConciseSet unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableConciseBitmap( + ImmutableConciseSet.intersection( + bitmap, + ImmutableConciseSet.complement(unwrappedOtherBitmap) + ) + ); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java new file mode 100755 index 000000000000..9b2210ab115d --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableRoaringBitmap.java @@ -0,0 +1,129 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.base.Throwables; +import org.roaringbitmap.IntIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.nio.ByteBuffer; + +public class WrappedImmutableRoaringBitmap implements ImmutableBitmap +{ + /** + * Underlying bitmap. + */ + private final ImmutableRoaringBitmap bitmap; + + protected WrappedImmutableRoaringBitmap(ByteBuffer byteBuffer) + { + this.bitmap = new ImmutableRoaringBitmap(byteBuffer.asReadOnlyBuffer()); + } + + /** + * Wrap an ImmutableRoaringBitmap + * + * @param immutableRoaringBitmap bitmap to be wrapped + */ + public WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap immutableRoaringBitmap) + { + this.bitmap = immutableRoaringBitmap; + } + + public ImmutableRoaringBitmap getBitmap() + { + return bitmap; + } + + @Override + public byte[] toBytes() + { + try { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + bitmap.serialize(new DataOutputStream(out)); + return out.toByteArray(); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + @Override + public int compareTo(ImmutableBitmap other) + { + return 0; + } + + @Override + public String toString() + { + return getClass().getSimpleName() + bitmap.toString(); + } + + @Override + public IntIterator iterator() + { + return bitmap.getIntIterator(); + } + + @Override + public int size() + { + return bitmap.getCardinality(); + } + + @Override + public boolean isEmpty() + { + return bitmap.isEmpty(); + } + + @Override + public ImmutableBitmap union(ImmutableBitmap otherBitmap) + { + WrappedImmutableRoaringBitmap other = (WrappedImmutableRoaringBitmap) otherBitmap; + ImmutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap.or(bitmap, unwrappedOtherBitmap)); + } + + @Override + public boolean get(int value) + { + return bitmap.contains(value); + } + + @Override + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap) + { + WrappedImmutableRoaringBitmap other = (WrappedImmutableRoaringBitmap) otherBitmap; + ImmutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap.and(bitmap, unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap difference(ImmutableBitmap otherBitmap) + { + WrappedImmutableRoaringBitmap other = (WrappedImmutableRoaringBitmap) otherBitmap; + ImmutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(ImmutableRoaringBitmap.andNot(bitmap, unwrappedOtherBitmap)); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedRoaringBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedRoaringBitmap.java new file mode 100755 index 000000000000..eeb7bdb0e260 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedRoaringBitmap.java @@ -0,0 +1,265 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.base.Throwables; +import org.roaringbitmap.IntIterator; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +public class WrappedRoaringBitmap implements MutableBitmap +{ + // attempt to compress long runs prior to serialization (requires RoaringBitmap version 0.5 or better) + // this may improve compression greatly in some cases at the expense of slower serialization + // in the worst case. + private final boolean compressRunOnSerialization; + /** + * Underlying bitmap. + */ + private MutableRoaringBitmap bitmap; + + /** + * Creates a new WrappedRoaringBitmap wrapping an empty MutableRoaringBitmap + */ + public WrappedRoaringBitmap() + { + this(RoaringBitmapFactory.DEFAULT_COMPRESS_RUN_ON_SERIALIZATION); + } + + /** + * Creates a new WrappedRoaringBitmap wrapping an empty MutableRoaringBitmap + * + * @param compressRunOnSerialization indicates whether to call {@link RoaringBitmap#runOptimize()} before serializing + */ + public WrappedRoaringBitmap(boolean compressRunOnSerialization) + { + this.bitmap = new MutableRoaringBitmap(); + this.compressRunOnSerialization = compressRunOnSerialization; + } + + ImmutableBitmap toImmutableBitmap() + { + MutableRoaringBitmap mrb = bitmap.clone(); + if (compressRunOnSerialization) { + mrb.runOptimize(); + } + return new WrappedImmutableRoaringBitmap(mrb); + } + + @Override + public byte[] toBytes() + { + try { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + if (compressRunOnSerialization) { + bitmap.runOptimize(); + } + bitmap.serialize(new DataOutputStream(out)); + return out.toByteArray(); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + @Override + public int compareTo(ImmutableBitmap other) + { + return 0; + } + + @Override + public void clear() + { + this.bitmap.clear(); + } + + @Override + public void or(MutableBitmap mutableBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) mutableBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + bitmap.or(unwrappedOtherBitmap); + } + + @Override + public void and(MutableBitmap mutableBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) mutableBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + bitmap.and(unwrappedOtherBitmap); + } + + + @Override + public void andNot(MutableBitmap mutableBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) mutableBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + bitmap.andNot(unwrappedOtherBitmap); + } + + + @Override + public void xor(MutableBitmap mutableBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) mutableBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + bitmap.xor(unwrappedOtherBitmap); + } + + @Override + public int getSizeInBytes() + { + if (compressRunOnSerialization) { + bitmap.runOptimize(); + } + return bitmap.serializedSizeInBytes(); + } + + @Override + public void add(int entry) + { + bitmap.add(entry); + } + + @Override + public int size() + { + return bitmap.getCardinality(); + } + + @Override + public void serialize(ByteBuffer buffer) + { + if (compressRunOnSerialization) { + bitmap.runOptimize(); + } + try { + bitmap.serialize( + new DataOutputStream( + new OutputStream() + { + ByteBuffer mBB; + + OutputStream init(ByteBuffer mbb) + { + mBB = mbb; + return this; + } + + @Override + public void close() + { + // unnecessary + } + + @Override + public void flush() + { + // unnecessary + } + + @Override + public void write(int b) + { + mBB.put((byte) b); + } + + @Override + public void write(byte[] b) + { + mBB.put(b); + } + + @Override + public void write(byte[] b, int off, int l) + { + mBB.put(b, off, l); + } + }.init(buffer) + ) + ); + } + catch (IOException e) { + e.printStackTrace(); // impossible in theory + } + } + + @Override + public String toString() + { + return getClass().getSimpleName() + bitmap.toString(); + } + + @Override + public void remove(int entry) + { + bitmap.remove(entry); + } + + @Override + public IntIterator iterator() + { + return bitmap.getIntIterator(); + } + + @Override + public boolean isEmpty() + { + return bitmap.isEmpty(); + } + + @Override + public ImmutableBitmap union(ImmutableBitmap otherBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) otherBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(MutableRoaringBitmap.or(bitmap, unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap intersection(ImmutableBitmap otherBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) otherBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(MutableRoaringBitmap.and(bitmap, unwrappedOtherBitmap)); + } + + @Override + public ImmutableBitmap difference(ImmutableBitmap otherBitmap) + { + WrappedRoaringBitmap other = (WrappedRoaringBitmap) otherBitmap; + MutableRoaringBitmap unwrappedOtherBitmap = other.bitmap; + return new WrappedImmutableRoaringBitmap(MutableRoaringBitmap.andNot(bitmap, unwrappedOtherBitmap)); + } + + @Override + public boolean get(int value) + { + return bitmap.contains(value); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java new file mode 100755 index 000000000000..a11a8c90e7b0 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java @@ -0,0 +1,226 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * Byte layout: + * Header + * 0 to 1 : the MSB is a boolean flag for isLeaf, the next 15 bits represent the number of children of a node + * Body + * 2 to 2 + numDims * Floats.BYTES : minCoordinates + * 2 + numDims * Floats.BYTES to 2 + 2 * numDims * Floats.BYTES : maxCoordinates + * concise set + * rest (children) : Every 4 bytes is storing an offset representing the position of a child. + *

+ * The child offset is an offset from the initialOffset + */ +public class ImmutableNode +{ + public static final int HEADER_NUM_BYTES = 2; + + private final int numDims; + private final int initialOffset; + private final int offsetFromInitial; + + private final short numChildren; + private final boolean isLeaf; + private final int childrenOffset; + + private final ByteBuffer data; + + private final BitmapFactory bitmapFactory; + + public ImmutableNode( + int numDims, + int initialOffset, + int offsetFromInitial, + ByteBuffer data, + BitmapFactory bitmapFactory + ) + { + this.bitmapFactory = bitmapFactory; + this.numDims = numDims; + this.initialOffset = initialOffset; + this.offsetFromInitial = offsetFromInitial; + short header = data.getShort(initialOffset + offsetFromInitial); + this.isLeaf = (header & 0x8000) != 0; + this.numChildren = (short) (header & 0x7FFF); + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Floats.BYTES; + int bitmapSize = data.getInt(sizePosition); + this.childrenOffset = initialOffset + + offsetFromInitial + + HEADER_NUM_BYTES + + 2 * numDims * Floats.BYTES + + Ints.BYTES + + bitmapSize; + + this.data = data; + } + + public ImmutableNode( + int numDims, + int initialOffset, + int offsetFromInitial, + short numChildren, + boolean leaf, + ByteBuffer data, + BitmapFactory bitmapFactory + ) + { + this.bitmapFactory = bitmapFactory; + this.numDims = numDims; + this.initialOffset = initialOffset; + this.offsetFromInitial = offsetFromInitial; + this.numChildren = numChildren; + this.isLeaf = leaf; + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Floats.BYTES; + int bitmapSize = data.getInt(sizePosition); + this.childrenOffset = initialOffset + + offsetFromInitial + + HEADER_NUM_BYTES + + 2 * numDims * Floats.BYTES + + Ints.BYTES + + bitmapSize; + + this.data = data; + } + + public BitmapFactory getBitmapFactory() + { + return bitmapFactory; + } + + public int getInitialOffset() + { + return initialOffset; + } + + public int getOffsetFromInitial() + { + return offsetFromInitial; + } + + public int getNumDims() + { + return numDims; + } + + public int getNumChildren() + { + return numChildren; + } + + public boolean isLeaf() + { + return isLeaf; + } + + public float[] getMinCoordinates() + { + return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES); + } + + public float[] getMaxCoordinates() + { + return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES + numDims * Floats.BYTES); + } + + public ImmutableBitmap getImmutableBitmap() + { + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Floats.BYTES; + int numBytes = data.getInt(sizePosition); + data.position(sizePosition + Ints.BYTES); + ByteBuffer tmpBuffer = data.slice(); + tmpBuffer.limit(numBytes); + return bitmapFactory.mapImmutableBitmap(tmpBuffer.asReadOnlyBuffer()); + } + + public Iterable getChildren() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return new Iterator() + { + private volatile int count = 0; + + @Override + public boolean hasNext() + { + return (count < numChildren); + } + + @Override + public ImmutableNode next() + { + if (isLeaf) { + return new ImmutablePoint( + numDims, + initialOffset, + data.getInt(childrenOffset + (count++) * Ints.BYTES), + data, + bitmapFactory + ); + } + return new ImmutableNode( + numDims, + initialOffset, + data.getInt(childrenOffset + (count++) * Ints.BYTES), + data, + bitmapFactory + ); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + public ByteBuffer getData() + { + return data; + } + + private float[] getCoords(int offset) + { + final float[] retVal = new float[numDims]; + + final ByteBuffer readOnlyBuffer = data.asReadOnlyBuffer(); + readOnlyBuffer.position(offset); + readOnlyBuffer.asFloatBuffer().get(retVal); + + return retVal; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutablePoint.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutablePoint.java new file mode 100755 index 000000000000..1ee07020571f --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutablePoint.java @@ -0,0 +1,64 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import io.druid.collections.bitmap.BitmapFactory; + +import java.nio.ByteBuffer; + +public class ImmutablePoint extends ImmutableNode +{ + public ImmutablePoint( + int numDims, + int initialOffset, + int offsetFromInitial, + ByteBuffer data, + BitmapFactory bitmapFactory + ) + { + super(numDims, initialOffset, offsetFromInitial, (short) 0, true, data, bitmapFactory); + } + + public ImmutablePoint(ImmutableNode node) + { + super( + node.getNumDims(), + node.getInitialOffset(), + node.getOffsetFromInitial(), + (short) 0, + true, + node.getData(), + node.getBitmapFactory() + ); + } + + public float[] getCoords() + { + return super.getMinCoordinates(); + } + + @Override + public Iterable getChildren() + { + // should never get here + throw new UnsupportedOperationException(); + } + +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableRTree.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableRTree.java new file mode 100755 index 000000000000..9da5957170bc --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableRTree.java @@ -0,0 +1,144 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.primitives.Ints; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.search.Bound; +import io.druid.collections.spatial.search.GutmanSearchStrategy; +import io.druid.collections.spatial.search.SearchStrategy; + +import java.nio.ByteBuffer; + +/** + * An immutable representation of an {@link RTree} for spatial indexing. + */ +public class ImmutableRTree +{ + private static byte VERSION = 0x0; + private final int numDims; + private final ImmutableNode root; + private final ByteBuffer data; + private final SearchStrategy defaultSearchStrategy = new GutmanSearchStrategy(); + + public ImmutableRTree() + { + this.numDims = 0; + this.data = ByteBuffer.wrap(new byte[]{}); + this.root = null; + } + + public ImmutableRTree(ByteBuffer data, BitmapFactory bitmapFactory) + { + final int initPosition = data.position(); + Preconditions.checkArgument(data.get(0) == VERSION, "Mismatching versions"); + this.numDims = data.getInt(1 + initPosition) & 0x7FFF; + this.data = data; + this.root = new ImmutableNode(numDims, initPosition, 1 + Ints.BYTES, data, bitmapFactory); + } + + public static ImmutableRTree newImmutableFromMutable(RTree rTree) + { + if (rTree.getSize() == 0) { + return new ImmutableRTree(); + } + + ByteBuffer buffer = ByteBuffer.wrap(new byte[calcNumBytes(rTree)]); + + buffer.put(VERSION); + buffer.putInt(rTree.getNumDims()); + rTree.getRoot().storeInByteBuffer(buffer, buffer.position()); + buffer.position(0); + return new ImmutableRTree(buffer.asReadOnlyBuffer(), rTree.getBitmapFactory()); + } + + private static int calcNumBytes(RTree tree) + { + int total = 1 + Ints.BYTES; // VERSION and numDims + + total += calcNodeBytes(tree.getRoot()); + + return total; + } + + private static int calcNodeBytes(Node node) + { + int total = 0; + + // find size of this node + total += node.getSizeInBytes(); + + // recursively find sizes of child nodes + for (Node child : node.getChildren()) { + if (node.isLeaf()) { + total += child.getSizeInBytes(); + } else { + total += calcNodeBytes(child); + } + } + + return total; + } + + public int size() + { + return data.capacity(); + } + + public ImmutableNode getRoot() + { + return root; + } + + public int getNumDims() + { + return numDims; + } + + public Iterable search(Bound bound) + { + return search(defaultSearchStrategy, bound); + } + + public Iterable search(SearchStrategy strategy, Bound bound) + { + if (bound.getNumDims() == numDims) { + return strategy.search(root, bound); + } else { + // If the dimension counts don't match (for example, if this is called on a blank `new ImmutableRTree()`) + return ImmutableList.of(); + } + } + + public byte[] toBytes() + { + ByteBuffer buf = ByteBuffer.allocate(data.capacity()); + buf.put(data.asReadOnlyBuffer()); + return buf.array(); + } + + public int compareTo(ImmutableRTree other) + { + return this.data.compareTo(other.data); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Node.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Node.java new file mode 100755 index 000000000000..1206083799c1 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Node.java @@ -0,0 +1,235 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; + +/** + */ +public class Node +{ + private final float[] minCoordinates; + private final float[] maxCoordinates; + + private final List children; + private final boolean isLeaf; + private final MutableBitmap bitmap; + + private Node parent; + + public Node(float[] minCoordinates, float[] maxCoordinates, boolean isLeaf, BitmapFactory bitmapFactory) + { + this( + minCoordinates, + maxCoordinates, + Lists.newArrayList(), + isLeaf, + null, + bitmapFactory.makeEmptyMutableBitmap() + ); + } + + public Node( + float[] minCoordinates, + float[] maxCoordinates, + List children, + boolean isLeaf, + Node parent, + MutableBitmap bitmap + ) + { + Preconditions.checkArgument(minCoordinates.length == maxCoordinates.length); + + this.minCoordinates = minCoordinates; + this.maxCoordinates = maxCoordinates; + this.children = children; + for (Node child : children) { + child.setParent(this); + } + this.isLeaf = isLeaf; + this.bitmap = bitmap; + this.parent = parent; + } + + public int getNumDims() + { + return minCoordinates.length; + } + + public float[] getMinCoordinates() + { + return minCoordinates; + } + + public float[] getMaxCoordinates() + { + return maxCoordinates; + } + + public Node getParent() + { + return parent; + } + + private void setParent(Node p) + { + parent = p; + } + + public void addChild(Node node) + { + node.setParent(this); + children.add(node); + } + + public List getChildren() + { + return children; + } + + public boolean isLeaf() + { + return isLeaf; + } + + public double getArea() + { + return calculateArea(); + } + + public boolean contains(Node other) + { + Preconditions.checkArgument(getNumDims() == other.getNumDims()); + + for (int i = 0; i < getNumDims(); i++) { + if (other.getMinCoordinates()[i] < minCoordinates[i] || other.getMaxCoordinates()[i] > maxCoordinates[i]) { + return false; + } + } + return true; + } + + public boolean contains(float[] coords) + { + Preconditions.checkArgument(getNumDims() == coords.length); + + for (int i = 0; i < getNumDims(); i++) { + if (coords[i] < minCoordinates[i] || coords[i] > maxCoordinates[i]) { + return false; + } + } + return true; + } + + public boolean enclose() + { + boolean retVal = false; + float[] minCoords = new float[getNumDims()]; + Arrays.fill(minCoords, Float.MAX_VALUE); + float[] maxCoords = new float[getNumDims()]; + Arrays.fill(maxCoords, -Float.MAX_VALUE); + + for (Node child : getChildren()) { + for (int i = 0; i < getNumDims(); i++) { + minCoords[i] = Math.min(child.getMinCoordinates()[i], minCoords[i]); + maxCoords[i] = Math.max(child.getMaxCoordinates()[i], maxCoords[i]); + } + } + + if (!Arrays.equals(minCoords, minCoordinates)) { + System.arraycopy(minCoords, 0, minCoordinates, 0, minCoordinates.length); + retVal = true; + } + if (!Arrays.equals(maxCoords, maxCoordinates)) { + System.arraycopy(maxCoords, 0, maxCoordinates, 0, maxCoordinates.length); + retVal = true; + } + + return retVal; + } + + public MutableBitmap getBitmap() + { + return bitmap; + } + + public void addToBitmapIndex(Node node) + { + bitmap.or(node.getBitmap()); + } + + public void clear() + { + children.clear(); + bitmap.clear(); + } + + public int getSizeInBytes() + { + return ImmutableNode.HEADER_NUM_BYTES + + 2 * getNumDims() * Floats.BYTES + + Ints.BYTES // size of the set + + bitmap.getSizeInBytes() + + getChildren().size() * Ints.BYTES; + } + + public int storeInByteBuffer(ByteBuffer buffer, int position) + { + buffer.position(position); + buffer.putShort((short) (((isLeaf ? 0x1 : 0x0) << 15) | getChildren().size())); + for (float v : getMinCoordinates()) { + buffer.putFloat(v); + } + for (float v : getMaxCoordinates()) { + buffer.putFloat(v); + } + byte[] bytes = bitmap.toBytes(); + buffer.putInt(bytes.length); + buffer.put(bytes); + + int pos = buffer.position(); + int childStartOffset = pos + getChildren().size() * Ints.BYTES; + for (Node child : getChildren()) { + buffer.putInt(pos, childStartOffset); + childStartOffset = child.storeInByteBuffer(buffer, childStartOffset); + pos += Ints.BYTES; + } + + return childStartOffset; + } + + private double calculateArea() + { + double area = 1.0; + for (int i = 0; i < minCoordinates.length; i++) { + area *= (maxCoordinates[i] - minCoordinates[i]); + } + return area; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Point.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Point.java new file mode 100755 index 000000000000..ffb878236941 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/Point.java @@ -0,0 +1,113 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; + +import java.util.Arrays; +import java.util.List; + +/** + */ +public class Point extends Node +{ + private final float[] coords; + private final MutableBitmap bitmap; + + public Point(float[] coords, int entry, BitmapFactory bitmapFactory) + { + super( + coords, + Arrays.copyOf(coords, coords.length), + Lists.newArrayList(), + true, + null, + makeBitmap(entry, bitmapFactory) + ); + + this.coords = coords; + this.bitmap = bitmapFactory.makeEmptyMutableBitmap(); + this.bitmap.add(entry); + } + + public Point(float[] coords, MutableBitmap entry) + { + super(coords, Arrays.copyOf(coords, coords.length), Lists.newArrayList(), true, null, entry); + + this.coords = coords; + this.bitmap = entry; + } + + private static MutableBitmap makeBitmap(int entry, BitmapFactory bitmapFactory) + { + MutableBitmap retVal = bitmapFactory.makeEmptyMutableBitmap(); + retVal.add(entry); + return retVal; + } + + public float[] getCoords() + { + return coords; + } + + @Override + public MutableBitmap getBitmap() + { + return bitmap; + } + + @Override + public void addChild(Node node) + { + throw new UnsupportedOperationException(); + } + + @Override + public List getChildren() + { + return Lists.newArrayList(); + } + + @Override + public boolean isLeaf() + { + return true; + } + + @Override + public double getArea() + { + return 0; + } + + @Override + public boolean contains(Node other) + { + return false; + } + + @Override + public boolean enclose() + { + return false; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java new file mode 100755 index 000000000000..e3d9b08032ee --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java @@ -0,0 +1,245 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.base.Preconditions; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.spatial.split.LinearGutmanSplitStrategy; +import io.druid.collections.spatial.split.SplitStrategy; + +import java.util.Arrays; + +/** + * This RTree has been optimized to work with bitmap inverted indexes. + *

+ * This code will probably make a lot more sense if you read: + * http://www.sai.msu.su/~megera/postgres/gist/papers/gutman-rtree.pdf + */ +public class RTree +{ + private final int numDims; + private final SplitStrategy splitStrategy; + private final BitmapFactory bitmapFactory; + private Node root; + private volatile int size; + + public RTree(BitmapFactory bitmapFactory) + { + this(0, new LinearGutmanSplitStrategy(0, 0, bitmapFactory), bitmapFactory); + } + + public RTree(int numDims, SplitStrategy splitStrategy, BitmapFactory bitmapFactory) + { + this.numDims = numDims; + this.splitStrategy = splitStrategy; + this.bitmapFactory = bitmapFactory; + this.root = buildRoot(true); + } + + public BitmapFactory getBitmapFactory() + { + return bitmapFactory; + } + + /** + * This description is from the original paper. + *

+ * Algorithm Insert: Insert a new index entry E into an R-tree. + *

+ * I1. [Find position for new record]. Invoke {@link #chooseLeaf(Node, Point)} to select + * a leaf node L in which to place E. + *

+ * I2. [Add records to leaf node]. If L has room for another entry, install E. Otherwise invoke + * {@link SplitStrategy} split methods to obtain L and LL containing E and all the old entries of L. + *

+ * I3. [Propagate changes upward]. Invoke {@link #adjustTree(Node, Node)} on L, also passing LL if a split was + * performed. + *

+ * I4. [Grow tree taller]. If node split propagation caused the root to split, create a new record whose + * children are the two resulting nodes. + * + * @param coords - the coordinates of the entry + * @param entry - the integer to insert + */ + public void insert(float[] coords, int entry) + { + Preconditions.checkArgument(coords.length == numDims); + insertInner(new Point(coords, entry, bitmapFactory)); + } + + public void insert(float[] coords, MutableBitmap entry) + { + Preconditions.checkArgument(coords.length == numDims); + insertInner(new Point(coords, entry)); + } + + /** + * Not yet implemented. + * + * @param coords - the coordinates of the entry + * @param entry - the integer to insert + * + * @return - whether the operation completed successfully + */ + public boolean delete(double[] coords, int entry) + { + throw new UnsupportedOperationException(); + } + + public int getSize() + { + return size; + } + + public int getNumDims() + { + return numDims; + } + + public SplitStrategy getSplitStrategy() + { + return splitStrategy; + } + + public Node getRoot() + { + return root; + } + + private Node buildRoot(boolean isLeaf) + { + float[] initMinCoords = new float[numDims]; + float[] initMaxCoords = new float[numDims]; + Arrays.fill(initMinCoords, -Float.MAX_VALUE); + Arrays.fill(initMaxCoords, Float.MAX_VALUE); + + return new Node(initMinCoords, initMaxCoords, isLeaf, bitmapFactory); + } + + private void insertInner(Point point) + { + Node node = chooseLeaf(root, point); + node.addChild(point); + + if (splitStrategy.needToSplit(node)) { + Node[] groups = splitStrategy.split(node); + adjustTree(groups[0], groups[1]); + } else { + adjustTree(node, null); + } + + size++; + } + + + /** + * This description is from the original paper. + *

+ * Algorithm ChooseLeaf. Select a leaf node in which to place a new index entry E. + *

+ * CL1. [Initialize]. Set N to be the root node. + *

+ * CL2. [Leaf check]. If N is a leaf, return N. + *

+ * CL3. [Choose subtree]. If N is not a leaf, let F be the entry in N whose rectangle + * FI needs least enlargement to include EI. Resolve ties by choosing the entry with the rectangle + * of smallest area. + *

+ * CL4. [Descend until a leaf is reached]. Set N to be the child node pointed to by Fp and repeated from CL2. + * + * @param node - current node to evaluate + * @param point - point to insert + * + * @return - leafNode where point can be inserted + */ + private Node chooseLeaf(Node node, Point point) + { + node.addToBitmapIndex(point); + + if (node.isLeaf()) { + return node; + } + + double minCost = Double.MAX_VALUE; + Node optimal = node.getChildren().get(0); + for (Node child : node.getChildren()) { + double cost = RTreeUtils.getExpansionCost(child, point); + if (cost < minCost) { + minCost = cost; + optimal = child; + } else if (cost == minCost) { + // Resolve ties by choosing the entry with the rectangle of smallest area + if (child.getArea() < optimal.getArea()) { + optimal = child; + } + } + } + + return chooseLeaf(optimal, point); + } + + /** + * This description is from the original paper. + *

+ * AT1. [Initialize]. Set N=L. If L was split previously, set NN to be the resulting second node. + *

+ * AT2. [Check if done]. If N is the root, stop. + *

+ * AT3. [Adjust covering rectangle in parent entry]. Let P be the parent node of N, and let Ev(N)I be N's entry in P. + * Adjust Ev(N)I so that it tightly encloses all entry rectangles in N. + *

+ * AT4. [Propagate node split upward]. If N has a partner NN resulting from an earlier split, create a new entry + * Ev(NN) with Ev(NN)p pointing to NN and Ev(NN)I enclosing all rectangles in NN. Add Ev(NN) to p is there is room. + * Otherwise, invoke {@link SplitStrategy} split to product p and pp containing Ev(NN) and all p's old entries. + * + * @param n - first node to adjust + * @param nn - optional second node to adjust + */ + private void adjustTree(Node n, Node nn) + { + // special case for root + if (n == root) { + if (nn != null) { + root = buildRoot(false); + root.addChild(n); + root.addChild(nn); + } + root.enclose(); + return; + } + + boolean updateParent = n.enclose(); + + if (nn != null) { + nn.enclose(); + updateParent = true; + + if (splitStrategy.needToSplit(n.getParent())) { + Node[] groups = splitStrategy.split(n.getParent()); + adjustTree(groups[0], groups[1]); + } + } + + if (n.getParent() != null && updateParent) { + adjustTree(n.getParent(), null); + } + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTreeUtils.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTreeUtils.java new file mode 100755 index 000000000000..e5c00b642850 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTreeUtils.java @@ -0,0 +1,252 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import com.google.common.collect.Iterables; + +/** + */ +public class RTreeUtils +{ + private static ObjectMapper jsonMapper = new ObjectMapper(); + + public static double getEnclosingArea(Node a, Node b) + { + Preconditions.checkArgument(a.getNumDims() == b.getNumDims()); + + double[] minCoords = new double[a.getNumDims()]; + double[] maxCoords = new double[a.getNumDims()]; + + for (int i = 0; i < minCoords.length; i++) { + minCoords[i] = Math.min(a.getMinCoordinates()[i], b.getMinCoordinates()[i]); + maxCoords[i] = Math.max(a.getMaxCoordinates()[i], b.getMaxCoordinates()[i]); + } + + double area = 1.0; + for (int i = 0; i < minCoords.length; i++) { + area *= (maxCoords[i] - minCoords[i]); + } + + return area; + } + + public static double getExpansionCost(Node node, Point point) + { + Preconditions.checkArgument(node.getNumDims() == point.getNumDims()); + + if (node.contains(point.getCoords())) { + return 0; + } + + double expanded = 1.0; + for (int i = 0; i < node.getNumDims(); i++) { + double min = Math.min(point.getCoords()[i], node.getMinCoordinates()[i]); + double max = Math.max(point.getCoords()[i], node.getMinCoordinates()[i]); + expanded *= (max - min); + } + + return (expanded - node.getArea()); + } + + public static void enclose(Node[] nodes) + { + for (Node node : nodes) { + node.enclose(); + } + } + + public static Iterable getBitmaps(ImmutableRTree tree) + { + return depthFirstSearch(tree.getRoot()); + } + + public static Iterable depthFirstSearch(ImmutableNode node) + { + if (node.isLeaf()) { + return Iterables.transform( + node.getChildren(), + new Function() + { + @Override + public ImmutablePoint apply(ImmutableNode tNode) + { + return new ImmutablePoint(tNode); + } + } + ); + } else { + return Iterables.concat( + Iterables.transform( + + node.getChildren(), + new Function>() + { + @Override + public Iterable apply(ImmutableNode child) + { + return depthFirstSearch(child); + } + } + ) + ); + } + } + + public static void print(RTree tree) + { + System.out.printf("numDims : %d%n", tree.getNumDims()); + try { + printRTreeNode(tree.getRoot(), 0); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + public static void print(ImmutableRTree tree) + { + System.out.printf("numDims : %d%n", tree.getNumDims()); + try { + printNode(tree.getRoot(), 0); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + + public static void printRTreeNode(Node node, int level) throws Exception + { + System.out.printf( + "%sminCoords: %s, maxCoords: %s, numChildren: %d, isLeaf:%s%n", + makeDashes(level), + jsonMapper.writeValueAsString(node.getMinCoordinates()), + jsonMapper.writeValueAsString( + node.getMaxCoordinates() + ), + node.getChildren().size(), + node.isLeaf() + ); + if (node.isLeaf()) { + for (Node child : node.getChildren()) { + Point point = (Point) (child); + System.out + .printf( + "%scoords: %s, conciseSet: %s%n", + makeDashes(level), + jsonMapper.writeValueAsString(point.getCoords()), + point.getBitmap() + ); + } + } else { + level++; + for (Node child : node.getChildren()) { + printRTreeNode(child, level); + } + } + } + + public static boolean verifyEnclose(Node node) + { + for (Node child : node.getChildren()) { + for (int i = 0; i < node.getNumDims(); i++) { + if (child.getMinCoordinates()[i] < node.getMinCoordinates()[i] + || child.getMaxCoordinates()[i] > node.getMaxCoordinates()[i]) { + return false; + } + } + } + + if (!node.isLeaf()) { + for (Node child : node.getChildren()) { + if (!verifyEnclose(child)) { + return false; + } + } + } + + return true; + } + + public static boolean verifyEnclose(ImmutableNode node) + { + for (ImmutableNode child : node.getChildren()) { + for (int i = 0; i < node.getNumDims(); i++) { + if (child.getMinCoordinates()[i] < node.getMinCoordinates()[i] + || child.getMaxCoordinates()[i] > node.getMaxCoordinates()[i]) { + return false; + } + } + } + + if (!node.isLeaf()) { + for (ImmutableNode child : node.getChildren()) { + if (!verifyEnclose(child)) { + return false; + } + } + } + + return true; + } + + private static void printNode(ImmutableNode node, int level) throws Exception + { + System.out.printf( + "%sminCoords: %s, maxCoords: %s, numChildren: %d, isLeaf: %s%n", + makeDashes(level), + jsonMapper.writeValueAsString(node.getMinCoordinates()), + jsonMapper.writeValueAsString( + node.getMaxCoordinates() + ), + node.getNumChildren(), + node.isLeaf() + ); + if (node.isLeaf()) { + for (ImmutableNode immutableNode : node.getChildren()) { + ImmutablePoint point = new ImmutablePoint(immutableNode); + System.out + .printf( + "%scoords: %s, conciseSet: %s%n", + makeDashes(level), + jsonMapper.writeValueAsString(point.getCoords()), + point.getImmutableBitmap() + ); + } + } else { + level++; + for (ImmutableNode immutableNode : node.getChildren()) { + printNode(immutableNode, level); + } + } + } + + private static String makeDashes(int level) + { + String retVal = ""; + for (int i = 0; i < level; i++) { + retVal += "-"; + } + return retVal; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/Bound.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/Bound.java new file mode 100755 index 000000000000..17d8934234bf --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/Bound.java @@ -0,0 +1,48 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import io.druid.collections.spatial.ImmutableNode; +import io.druid.collections.spatial.ImmutablePoint; + +/** + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { + @JsonSubTypes.Type(name = "rectangular", value = RectangularBound.class), + @JsonSubTypes.Type(name = "radius", value = RadiusBound.class), + @JsonSubTypes.Type(name = "polygon", value = PolygonBound.class) +}) +public interface Bound +{ + public int getLimit(); + + public int getNumDims(); + + public boolean overlaps(ImmutableNode node); + + public boolean contains(float[] coords); + + public Iterable filter(Iterable points); + + public byte[] getCacheKey(); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/GutmanSearchStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/GutmanSearchStrategy.java new file mode 100755 index 000000000000..6dc635f139bb --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/GutmanSearchStrategy.java @@ -0,0 +1,210 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import com.google.common.base.Function; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableNode; +import io.druid.collections.spatial.ImmutablePoint; + +/** + */ +public class GutmanSearchStrategy implements SearchStrategy +{ + @Override + public Iterable search(ImmutableNode node, Bound bound) + { + if (bound.getLimit() > 0) { + return Iterables.transform( + breadthFirstSearch(node, bound), + new Function() + { + @Override + public ImmutableBitmap apply(ImmutableNode immutableNode) + { + return immutableNode.getImmutableBitmap(); + } + } + ); + } + + return Iterables.transform( + depthFirstSearch(node, bound), + new Function() + { + @Override + public ImmutableBitmap apply(ImmutablePoint immutablePoint) + { + return immutablePoint.getImmutableBitmap(); + } + } + ); + } + + public Iterable depthFirstSearch(ImmutableNode node, final Bound bound) + { + if (node.isLeaf()) { + return bound.filter( + Iterables.transform( + node.getChildren(), + new Function() + { + @Override + public ImmutablePoint apply(ImmutableNode tNode) + { + return new ImmutablePoint(tNode); + } + } + ) + ); + } else { + return Iterables.concat( + Iterables.transform( + Iterables.filter( + node.getChildren(), + new Predicate() + { + @Override + public boolean apply(ImmutableNode child) + { + return bound.overlaps(child); + } + } + ), + new Function>() + { + @Override + public Iterable apply(ImmutableNode child) + { + return depthFirstSearch(child, bound); + } + } + ) + ); + } + } + + public Iterable breadthFirstSearch( + ImmutableNode node, + final Bound bound + ) + { + if (node.isLeaf()) { + return Iterables.filter( + node.getChildren(), + new Predicate() + { + @Override + public boolean apply(ImmutableNode immutableNode) + { + return bound.contains(immutableNode.getMinCoordinates()); + } + } + ); + } + return breadthFirstSearch(node.getChildren(), bound, 0); + } + + public Iterable breadthFirstSearch( + Iterable nodes, + final Bound bound, + int total + ) + { + Iterable points = Iterables.concat( + Iterables.transform( + Iterables.filter( + nodes, + new Predicate() + { + @Override + public boolean apply(ImmutableNode immutableNode) + { + return immutableNode.isLeaf(); + } + } + ), + new Function>() + { + @Override + public Iterable apply(ImmutableNode immutableNode) + { + return Iterables.filter( + immutableNode.getChildren(), + new Predicate() + { + @Override + public boolean apply(ImmutableNode immutableNode) + { + return bound.contains(immutableNode.getMinCoordinates()); + } + } + ); + } + } + ) + ); + + Iterable overlappingNodes = Iterables.filter( + nodes, + new Predicate() + { + @Override + public boolean apply(ImmutableNode immutableNode) + { + return !immutableNode.isLeaf() && bound.overlaps(immutableNode); + } + } + ); + + int totalPoints = Iterables.size(points); + int totalOverlap = Iterables.size(overlappingNodes); + + if (totalOverlap == 0 || (totalPoints + totalOverlap + total) >= bound.getLimit()) { + return Iterables.concat( + points, + overlappingNodes + ); + } else { + return Iterables.concat( + points, + breadthFirstSearch( + Iterables.concat( + Iterables.transform( + overlappingNodes, + new Function>() + { + @Override + public Iterable apply(ImmutableNode immutableNode) + { + return immutableNode.getChildren(); + } + } + ) + ), + bound, + totalPoints + ) + ); + } + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java new file mode 100755 index 000000000000..ec870b9f0936 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java @@ -0,0 +1,176 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; +import io.druid.collections.spatial.ImmutablePoint; + +import java.nio.ByteBuffer; + +/** + */ +public class PolygonBound extends RectangularBound +{ + private static final byte CACHE_TYPE_ID = 0x02; + + private final float[] abscissa; + private final float[] ordinate; + + private PolygonBound(float[] abscissa, float[] ordinate, int limit) + { + super(getMinCoords(abscissa, ordinate), getMaxCoords(abscissa, ordinate), limit); + this.abscissa = abscissa; + this.ordinate = ordinate; + } + + private static float[] getMinCoords(float[] abscissa, float[] ordinate) + { + float[] retVal = new float[2]; + retVal[0] = abscissa[0]; + retVal[1] = ordinate[0]; + + for (int i = 1; i < abscissa.length; i++) { + if (abscissa[i] < retVal[0]) { + retVal[0] = abscissa[i]; + } + if (ordinate[i] < retVal[1]) { + retVal[1] = ordinate[i]; + } + } + return retVal; + } + + private static float[] getMaxCoords(float[] abscissa, float[] ordinate) + { + float[] retVal = new float[2]; + retVal[0] = abscissa[0]; + retVal[1] = ordinate[0]; + for (int i = 1; i < abscissa.length; i++) { + if (abscissa[i] > retVal[0]) { + retVal[0] = abscissa[i]; + } + if (ordinate[i] > retVal[1]) { + retVal[1] = ordinate[i]; + } + } + return retVal; + } + + /** + * abscissa and ordinate contain the coordinates of polygon. + * abscissa[i] is the horizontal coordinate for the i'th corner of the polygon, + * and ordinate[i] is the vertical coordinate for the i'th corner. + * The polygon must have more than 2 corners, so the length of abscissa or ordinate must be equal or greater than 3. + *

+ * if the polygon is a rectangular, which corners are {0.0, 0.0}, {0.0, 1.0}, {1.0, 1.0}, {1.0, 0.0}, + * the abscissa should be {0.0, 0.0, 1.0, 1.0} and ordinate should be {0.0, 1.0, 1.0, 0.0} + */ + @JsonCreator + public static PolygonBound from( + @JsonProperty("abscissa") float[] abscissa, + @JsonProperty("ordinate") float[] ordinate, + @JsonProperty("limit") int limit + ) + { + Preconditions.checkArgument(abscissa.length == ordinate.length); //abscissa and ordinate should be the same length + Preconditions.checkArgument(abscissa.length > 2); //a polygon should have more than 2 corners + return new PolygonBound(abscissa, ordinate, limit); + } + + public static PolygonBound from(float[] abscissa, float[] ordinate) + { + return PolygonBound.from(abscissa, ordinate, 0); + } + + @JsonProperty + public float[] getOrdinate() + { + return ordinate; + } + + @JsonProperty + public float[] getAbscissa() + { + return abscissa; + } + + @Override + public boolean contains(float[] coords) + { + int polyCorners = abscissa.length; + int j = polyCorners - 1; + boolean oddNodes = false; + for (int i = 0; i < polyCorners; i++) { + if ((ordinate[i] < coords[1] && ordinate[j] >= coords[1] + || ordinate[j] < coords[1] && ordinate[i] >= coords[1]) + && (abscissa[i] <= coords[0] || abscissa[j] <= coords[0])) { + if (abscissa[i] + (coords[1] - ordinate[i]) / (ordinate[j] - ordinate[i]) * (abscissa[j] - abscissa[i]) + < coords[0]) { + oddNodes = !oddNodes; + } + } + j = i; + } + return oddNodes; + } + + @Override + public Iterable filter(Iterable points) + { + return Iterables.filter( + points, + new Predicate() + { + @Override + public boolean apply(ImmutablePoint immutablePoint) + { + return contains(immutablePoint.getCoords()); + } + } + ); + } + + @Override + public byte[] getCacheKey() + { + ByteBuffer abscissaBuffer = ByteBuffer.allocate(abscissa.length * Floats.BYTES); + abscissaBuffer.asFloatBuffer().put(abscissa); + final byte[] abscissaCacheKey = abscissaBuffer.array(); + + ByteBuffer ordinateBuffer = ByteBuffer.allocate(ordinate.length * Floats.BYTES); + ordinateBuffer.asFloatBuffer().put(ordinate); + final byte[] ordinateCacheKey = ordinateBuffer.array(); + + final ByteBuffer cacheKey = ByteBuffer + .allocate(1 + abscissaCacheKey.length + ordinateCacheKey.length + Ints.BYTES) + .put(abscissaCacheKey) + .put(ordinateCacheKey) + .putInt(getLimit()) + .put(CACHE_TYPE_ID); + + return cacheKey.array(); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RadiusBound.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RadiusBound.java new file mode 100755 index 000000000000..cb905062d61f --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RadiusBound.java @@ -0,0 +1,132 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; +import io.druid.collections.spatial.ImmutablePoint; + +import java.nio.ByteBuffer; + +/** + */ +public class RadiusBound extends RectangularBound +{ + private static final byte CACHE_TYPE_ID = 0x01; + private final float[] coords; + private final float radius; + + @JsonCreator + public RadiusBound( + @JsonProperty("coords") float[] coords, + @JsonProperty("radius") float radius, + @JsonProperty("limit") int limit + ) + { + super(getMinCoords(coords, radius), getMaxCoords(coords, radius), limit); + + this.coords = coords; + this.radius = radius; + } + + public RadiusBound( + float[] coords, + float radius + ) + { + this(coords, radius, 0); + } + + private static float[] getMinCoords(float[] coords, float radius) + { + float[] retVal = new float[coords.length]; + for (int i = 0; i < coords.length; i++) { + retVal[i] = coords[i] - radius; + } + return retVal; + } + + private static float[] getMaxCoords(float[] coords, float radius) + { + float[] retVal = new float[coords.length]; + for (int i = 0; i < coords.length; i++) { + retVal[i] = coords[i] + radius; + } + return retVal; + } + + @JsonProperty + public float[] getCoords() + { + return coords; + } + + @JsonProperty + public float getRadius() + { + return radius; + } + + @Override + public boolean contains(float[] otherCoords) + { + double total = 0.0; + for (int i = 0; i < coords.length; i++) { + total += Math.pow(otherCoords[i] - coords[i], 2); + } + + return (total <= Math.pow(radius, 2)); + } + + @Override + public Iterable filter(Iterable points) + { + return Iterables.filter( + points, + new Predicate() + { + @Override + public boolean apply(ImmutablePoint point) + { + return contains(point.getCoords()); + } + } + ); + } + + @Override + public byte[] getCacheKey() + { + final ByteBuffer minCoordsBuffer = ByteBuffer.allocate(coords.length * Floats.BYTES); + minCoordsBuffer.asFloatBuffer().put(coords); + final byte[] minCoordsCacheKey = minCoordsBuffer.array(); + final ByteBuffer cacheKey = ByteBuffer + .allocate(1 + minCoordsCacheKey.length + Ints.BYTES + Floats.BYTES) + .put(minCoordsCacheKey) + .putFloat(radius) + .putInt(getLimit()) + .put(CACHE_TYPE_ID); + return cacheKey.array(); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RectangularBound.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RectangularBound.java new file mode 100755 index 000000000000..0d6ab38f7818 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/RectangularBound.java @@ -0,0 +1,155 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.primitives.Floats; +import com.google.common.primitives.Ints; +import io.druid.collections.spatial.ImmutableNode; +import io.druid.collections.spatial.ImmutablePoint; + +import java.nio.ByteBuffer; + +/** + */ +public class RectangularBound implements Bound +{ + private static final byte CACHE_TYPE_ID = 0x0; + + private final float[] minCoords; + private final float[] maxCoords; + private final int limit; + private final int numDims; + + @JsonCreator + public RectangularBound( + @JsonProperty("minCoords") float[] minCoords, + @JsonProperty("maxCoords") float[] maxCoords, + @JsonProperty("limit") int limit + ) + { + Preconditions.checkArgument(minCoords.length == maxCoords.length); + + this.numDims = minCoords.length; + + this.minCoords = minCoords; + this.maxCoords = maxCoords; + this.limit = limit; + } + + public RectangularBound( + float[] minCoords, + float[] maxCoords + ) + { + this(minCoords, maxCoords, 0); + } + + @JsonProperty + public float[] getMinCoords() + { + return minCoords; + } + + @JsonProperty + public float[] getMaxCoords() + { + return maxCoords; + } + + @JsonProperty + public int getLimit() + { + return limit; + } + + @Override + public int getNumDims() + { + return numDims; + } + + @Override + public boolean overlaps(ImmutableNode node) + { + final float[] nodeMinCoords = node.getMinCoordinates(); + final float[] nodeMaxCoords = node.getMaxCoordinates(); + + for (int i = 0; i < numDims; i++) { + if (nodeMaxCoords[i] < minCoords[i] || nodeMinCoords[i] > maxCoords[i]) { + return false; + } + } + + return true; + } + + @Override + public boolean contains(float[] coords) + { + for (int i = 0; i < numDims; i++) { + if (coords[i] < minCoords[i] || coords[i] > maxCoords[i]) { + return false; + } + } + + return true; + } + + @Override + public Iterable filter(Iterable points) + { + return Iterables.filter( + points, + new Predicate() + { + @Override + public boolean apply(ImmutablePoint immutablePoint) + { + return contains(immutablePoint.getCoords()); + } + } + ); + } + + @Override + public byte[] getCacheKey() + { + ByteBuffer minCoordsBuffer = ByteBuffer.allocate(minCoords.length * Floats.BYTES); + minCoordsBuffer.asFloatBuffer().put(minCoords); + final byte[] minCoordsCacheKey = minCoordsBuffer.array(); + + ByteBuffer maxCoordsBuffer = ByteBuffer.allocate(maxCoords.length * Floats.BYTES); + maxCoordsBuffer.asFloatBuffer().put(maxCoords); + final byte[] maxCoordsCacheKey = maxCoordsBuffer.array(); + + final ByteBuffer cacheKey = ByteBuffer + .allocate(1 + minCoordsCacheKey.length + maxCoordsCacheKey.length + Ints.BYTES) + .put(minCoordsCacheKey) + .put(maxCoordsCacheKey) + .putInt(limit) + .put(CACHE_TYPE_ID); + return cacheKey.array(); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/SearchStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/SearchStrategy.java new file mode 100755 index 000000000000..c018a5a92972 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/SearchStrategy.java @@ -0,0 +1,32 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableNode; +//import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; + + +/** + */ +public interface SearchStrategy +{ + public Iterable search(ImmutableNode node, Bound bound); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java new file mode 100755 index 000000000000..ba221ac017ee --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java @@ -0,0 +1,135 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.split; + +import com.google.common.collect.Lists; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.spatial.Node; +import io.druid.collections.spatial.RTreeUtils; + +import java.util.Arrays; +import java.util.List; + +/** + */ +public abstract class GutmanSplitStrategy implements SplitStrategy +{ + private final int minNumChildren; + private final int maxNumChildren; + private final BitmapFactory bf; + + protected GutmanSplitStrategy(int minNumChildren, int maxNumChildren, BitmapFactory b) + { + this.minNumChildren = minNumChildren; + this.maxNumChildren = maxNumChildren; + this.bf = b; + } + + @Override + public boolean needToSplit(Node node) + { + return (node.getChildren().size() > maxNumChildren); + } + + /** + * This algorithm is from the original paper. + *

+ * Algorithm Split. Divide a set of M+1 index entries into two groups. + *

+ * S1. [Pick first entry for each group]. Apply Algorithm {@link #pickSeeds(java.util.List)} to choose + * two entries to be the first elements of the groups. Assign each to a group. + *

+ * S2. [Check if done]. If all entries have been assigned, stop. If one group has so few entries that all the rest + * must be assigned to it in order for it to have the minimum number m, assign them and stop. + *

+ * S3. [Select entry to assign]. Invoke Algorithm {@link #pickNext(java.util.List, Node[])} + * to choose the next entry to assign. Add it to the group whose covering rectangle will have to be enlarged least to + * accommodate it. Resolve ties by adding the entry to the group smaller area, then to the one with fewer entries, then + * to either. Repeat from S2. + */ + @Override + public Node[] split(Node node) + { + List children = Lists.newArrayList(node.getChildren()); + Node[] seeds = pickSeeds(children); + + node.clear(); + node.addChild(seeds[0]); + node.addToBitmapIndex(seeds[0]); + + Node group1 = new Node( + Arrays.copyOf(seeds[1].getMinCoordinates(), seeds[1].getMinCoordinates().length), + Arrays.copyOf(seeds[1].getMaxCoordinates(), seeds[1].getMaxCoordinates().length), + Lists.newArrayList(seeds[1]), + node.isLeaf(), + node.getParent(), + bf.makeEmptyMutableBitmap() + ); + group1.addToBitmapIndex(seeds[1]); + if (node.getParent() != null) { + node.getParent().addChild(group1); + } + Node[] groups = new Node[]{ + node, group1 + }; + + RTreeUtils.enclose(groups); + + while (!children.isEmpty()) { + for (Node group : groups) { + if (group.getChildren().size() + children.size() <= minNumChildren) { + for (Node child : children) { + group.addToBitmapIndex(child); + group.addChild(child); + } + RTreeUtils.enclose(groups); + return groups; + } + } + + Node nextToAssign = pickNext(children, groups); + double group0ExpandedArea = RTreeUtils.getEnclosingArea(groups[0], nextToAssign); + double group1ExpandedArea = RTreeUtils.getEnclosingArea(groups[1], nextToAssign); + + Node optimal; + if (group0ExpandedArea < group1ExpandedArea) { + optimal = groups[0]; + } else if (group0ExpandedArea == group1ExpandedArea) { + if (groups[0].getArea() < groups[1].getArea()) { + optimal = groups[0]; + } else { + optimal = groups[1]; + } + } else { + optimal = groups[1]; + } + + optimal.addToBitmapIndex(nextToAssign); + optimal.addChild(nextToAssign); + optimal.enclose(); + } + + return groups; + } + + public abstract Node[] pickSeeds(List nodes); + + public abstract Node pickNext(List nodes, Node[] groups); +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java new file mode 100755 index 000000000000..df61f01060f9 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java @@ -0,0 +1,119 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.split; + +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.spatial.Node; + +import java.util.List; + +/** + */ +public class LinearGutmanSplitStrategy extends GutmanSplitStrategy +{ + public LinearGutmanSplitStrategy(int minNumChildren, int maxNumChildren, BitmapFactory bf) + { + super(minNumChildren, maxNumChildren, bf); + } + + /** + * This algorithm is from the original paper. + *

+ * Algorithm LinearPickSeeds. Select two entries to be the first elements of the groups. + *

+ * LPS1. [Find extreme rectangles along all dimensions]. Along each dimension, find the entry whose rectangle has + * the highest low side, and the one with the lowest high side. Record the separation. + *

+ * LPS2. [Adjust for shape of the rectangle cluster]. Normalize the separations by dividing by the width of the + * entire set along the corresponding dimension. + *

+ * LPS3. [Select the most extreme pair]. Choose the pair with the greatest normalized separation along any dimension. + * + * @param nodes - nodes to choose from + * + * @return - two groups representing the seeds + */ + @Override + public Node[] pickSeeds(List nodes) + { + int[] optimalIndices = new int[2]; + int numDims = nodes.get(0).getNumDims(); + + double bestNormalized = 0.0; + for (int i = 0; i < numDims; i++) { + float minCoord = Float.MAX_VALUE; + float maxCoord = -Float.MAX_VALUE; + float highestLowSide = -Float.MAX_VALUE; + float lowestHighside = Float.MAX_VALUE; + int highestLowSideIndex = 0; + int lowestHighSideIndex = 0; + + int counter = 0; + for (Node node : nodes) { + minCoord = Math.min(minCoord, node.getMinCoordinates()[i]); + maxCoord = Math.max(maxCoord, node.getMaxCoordinates()[i]); + + if (node.getMinCoordinates()[i] > highestLowSide) { + highestLowSide = node.getMinCoordinates()[i]; + highestLowSideIndex = counter; + } + if (node.getMaxCoordinates()[i] < lowestHighside) { + lowestHighside = node.getMaxCoordinates()[i]; + lowestHighSideIndex = counter; + } + + counter++; + } + double normalizedSeparation = (highestLowSideIndex == lowestHighSideIndex) ? -1.0 : + Math.abs((highestLowSide - lowestHighside) / (maxCoord - minCoord)); + if (normalizedSeparation > bestNormalized) { + optimalIndices[0] = highestLowSideIndex; + optimalIndices[1] = lowestHighSideIndex; + bestNormalized = normalizedSeparation; + } + } + + // Didn't actually find anything, just return first 2 children + if (bestNormalized == 0) { + optimalIndices[0] = 0; + optimalIndices[1] = 1; + } + + int indexToRemove1 = Math.min(optimalIndices[0], optimalIndices[1]); + int indexToRemove2 = Math.max(optimalIndices[0], optimalIndices[1]); + return new Node[]{nodes.remove(indexToRemove1), nodes.remove(indexToRemove2 - 1)}; + } + + /** + * This algorithm is from the original paper. + *

+ * Algorithm LinearPickNext. PickNext simply choose any of the remaining entries. + * + * @param nodes - remaining nodes + * @param groups - the left and right groups + * + * @return - the optimal selected node + */ + @Override + public Node pickNext(List nodes, Node[] groups) + { + return nodes.remove(0); + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/QuadraticGutmanSplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/QuadraticGutmanSplitStrategy.java new file mode 100755 index 000000000000..444fc5ecaeb0 --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/QuadraticGutmanSplitStrategy.java @@ -0,0 +1,83 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.split; + +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.spatial.Node; +import io.druid.collections.spatial.RTreeUtils; + +import java.util.List; + +/** + */ +public class QuadraticGutmanSplitStrategy extends GutmanSplitStrategy +{ + public QuadraticGutmanSplitStrategy(int minNumChildren, int maxNumChildren, BitmapFactory bf) + { + super(minNumChildren, maxNumChildren, bf); + } + + @Override + public Node[] pickSeeds(List nodes) + { + double highestCost = Double.MIN_VALUE; + int[] highestCostIndices = new int[2]; + + for (int i = 0; i < nodes.size() - 1; i++) { + for (int j = i + 1; j < nodes.size(); j++) { + double cost = RTreeUtils.getEnclosingArea(nodes.get(i), nodes.get(j)) - + nodes.get(i).getArea() - nodes.get(j).getArea(); + if (cost > highestCost) { + highestCost = cost; + highestCostIndices[0] = i; + highestCostIndices[1] = j; + } + } + } + + return new Node[]{nodes.remove(highestCostIndices[0]), nodes.remove(highestCostIndices[1] - 1)}; + } + + @Override + public Node pickNext(List nodes, Node[] groups) + { + double highestCost = Double.MIN_VALUE; + Node costlyNode = null; + int counter = 0; + int index = -1; + for (Node node : nodes) { + double group0Cost = RTreeUtils.getEnclosingArea(node, groups[0]); + double group1Cost = RTreeUtils.getEnclosingArea(node, groups[1]); + double cost = Math.abs(group0Cost - group1Cost); + if (cost > highestCost) { + highestCost = cost; + costlyNode = node; + index = counter; + } + counter++; + } + + if (costlyNode != null) { + nodes.remove(index); + } + + return costlyNode; + } +} diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/SplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/SplitStrategy.java new file mode 100755 index 000000000000..5b1f8a46c77d --- /dev/null +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/SplitStrategy.java @@ -0,0 +1,31 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.split; + +import io.druid.collections.spatial.Node; + +/** + */ +public interface SplitStrategy +{ + public boolean needToSplit(Node node); + + public Node[] split(Node node); +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/IntSetTestUtility.java b/bytebuffer-collections/src/test/java/io/druid/collections/IntSetTestUtility.java new file mode 100755 index 000000000000..ac2a54ab2d1d --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/IntSetTestUtility.java @@ -0,0 +1,114 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import org.roaringbitmap.IntIterator; + +import java.util.BitSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +/** + * + */ +public class IntSetTestUtility +{ + + private static Set setBits = Sets.newTreeSet(Lists.newArrayList(1, 2, 3, 5, 8, 13, 21)); + + public static Set getSetBits() + { + return Sets.newTreeSet(setBits); + } + + public static final BitSet createSimpleBitSet(Set setBits) + { + BitSet retval = new BitSet(); + for (int i : setBits) { + retval.set(i); + } + return retval; + } + + public static final void addAllToMutable(MutableBitmap mutableBitmap, Iterable intSet) + { + for (Integer integer : intSet) { + mutableBitmap.add(integer); + } + } + + public static Boolean equalSets(Set s1, ImmutableBitmap s2) + { + Set s3 = new HashSet<>(); + for (Integer i : new IntIt(s2.iterator())) { + s3.add(i); + } + return Sets.difference(s1, s3).isEmpty(); + } + + private static class IntIt implements Iterable + { + private final Iterator intIter; + + public IntIt(IntIterator intIt) + { + this.intIter = new IntIter(intIt); + } + + @Override + public Iterator iterator() + { + return intIter; + } + + private static class IntIter implements Iterator + { + private final IntIterator intIt; + + public IntIter(IntIterator intIt) + { + this.intIt = intIt; + } + + @Override + public boolean hasNext() + { + return intIt.hasNext(); + } + + @Override + public Integer next() + { + return intIt.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException("Cannot remove ints from int iterator"); + } + } + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/TestIntegerSet.java b/bytebuffer-collections/src/test/java/io/druid/collections/TestIntegerSet.java new file mode 100755 index 000000000000..a837bd972b26 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/TestIntegerSet.java @@ -0,0 +1,242 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.WrappedBitSetBitmap; +import io.druid.collections.bitmap.WrappedConciseBitmap; +import io.druid.collections.bitmap.WrappedRoaringBitmap; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Set; + +/** + * + */ +public class TestIntegerSet +{ + private static Iterable> clazzes = Lists.newArrayList( + WrappedBitSetBitmap.class, + WrappedConciseBitmap.class, + WrappedRoaringBitmap.class + ); + + @Test + public void testSimpleSet() + { + WrappedBitSetBitmap wrappedBitSetBitmapBitSet = new WrappedBitSetBitmap(); + IntSetTestUtility.addAllToMutable(wrappedBitSetBitmapBitSet, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitSetBitmapBitSet); + + Assert.assertTrue(Sets.difference(integerSet, IntSetTestUtility.getSetBits()).isEmpty()); + } + + @Test + public void testSimpleAdd() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + set.add(999); + integerSet.add(999); + + Assert.assertTrue(Sets.difference(integerSet, set).isEmpty()); + + integerSet.add(58577); + Assert.assertFalse(Sets.difference(integerSet, set).isEmpty()); + } + } + + @Test + public void testContainsAll() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + Assert.assertTrue(integerSet.containsAll(set)); + + set.add(999); + Assert.assertFalse(integerSet.containsAll(set)); + } + } + + @Test + public void testRemoveEverything() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + + integerSet.removeAll(set); + boolean isEmpty = integerSet.isEmpty(); + Assert.assertTrue(isEmpty); + } + } + + @Test + public void testRemoveOneThing() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + + integerSet.remove(1); + set.remove(1); + + Assert.assertTrue(Sets.difference(set, integerSet).isEmpty()); + } + } + + + @Test + public void testIsEmpty() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Assert.assertFalse(integerSet.isEmpty()); + + integerSet.clear(); + + Assert.assertTrue(integerSet.isEmpty()); + + integerSet.add(1); + Assert.assertFalse(integerSet.isEmpty()); + } + } + + @Test + public void testSize() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + + Assert.assertEquals(set.size(), integerSet.size()); + } + } + + + @Test + public void testRetainAll() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Set set = IntSetTestUtility.getSetBits(); + + set.remove(1); + set.add(9999); + + boolean threwError = false; + try { + integerSet.retainAll(set); + } + catch (UnsupportedOperationException ex) { + threwError = true; + } + Assert.assertTrue(threwError); + } + } + + @Test + public void testIntOverflow() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + Exception e = null; + try { + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + integerSet.add(Integer.MAX_VALUE + 1); + } + catch (java.lang.IllegalArgumentException ex) { + e = ex; + } + Assert.assertNotNull(e); + } + } + + @Test + public void testToArray() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + Exception e = null; + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + Set set = Sets.newHashSet((Integer[]) integerSet.toArray()); + Assert.assertTrue(Sets.difference(integerSet, set).isEmpty()); + } + } + + + @Test + public void testToSmallArray() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + Exception e = null; + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + Set set = Sets.newHashSet((Integer[]) integerSet.toArray(new Integer[0])); + Assert.assertTrue(Sets.difference(integerSet, set).isEmpty()); + } + } + + + @Test + public void testToBigArray() throws IllegalAccessException, InstantiationException + { + for (Class clazz : clazzes) { + Exception e = null; + MutableBitmap wrappedBitmap = clazz.newInstance(); + IntSetTestUtility.addAllToMutable(wrappedBitmap, IntSetTestUtility.getSetBits()); + IntegerSet integerSet = IntegerSet.wrap(wrappedBitmap); + + Integer[] bigArray = new Integer[1024]; + integerSet.toArray(bigArray); + Set set = Sets.newHashSet(bigArray); + Assert.assertTrue(Sets.difference(integerSet, set).isEmpty()); + } + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java new file mode 100755 index 000000000000..6ffe81a79a3e --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java @@ -0,0 +1,197 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.carrotsearch.junitbenchmarks.BenchmarkOptions; +import com.carrotsearch.junitbenchmarks.BenchmarkRule; +import com.carrotsearch.junitbenchmarks.Clock; +import com.google.common.collect.Lists; +import io.druid.extendedset.intset.ImmutableConciseSet; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.roaringbitmap.buffer.BufferFastAggregation; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Random; + + +@BenchmarkOptions(clock = Clock.NANO_TIME, benchmarkRounds = 50) +public class BitmapBenchmark +{ + public static final int LENGTH = 500_000; + public static final int SIZE = 10_000; + final static ImmutableConciseSet concise[] = new ImmutableConciseSet[SIZE]; + final static ImmutableConciseSet offheapConcise[] = new ImmutableConciseSet[SIZE]; + final static ImmutableRoaringBitmap roaring[] = new ImmutableRoaringBitmap[SIZE]; + final static ImmutableRoaringBitmap immutableRoaring[] = new ImmutableRoaringBitmap[SIZE]; + final static ImmutableRoaringBitmap offheapRoaring[] = new ImmutableRoaringBitmap[SIZE]; + final static ImmutableBitmap genericConcise[] = new ImmutableBitmap[SIZE]; + final static ImmutableBitmap genericRoaring[] = new ImmutableBitmap[SIZE]; + final static ConciseBitmapFactory conciseFactory = new ConciseBitmapFactory(); + final static RoaringBitmapFactory roaringFactory = new RoaringBitmapFactory(); + static Random rand = new Random(0); + static long totalConciseBytes = 0; + static long totalRoaringBytes = 0; + static long conciseCount = 0; + static long roaringCount = 0; + static long unionCount = 0; + static long minIntersection = 0; + @Rule + public TestRule benchmarkRun = new BenchmarkRule(); + + protected static ImmutableConciseSet makeOffheapConcise(ImmutableConciseSet concise) + { + final byte[] bytes = concise.toBytes(); + totalConciseBytes += bytes.length; + conciseCount++; + final ByteBuffer buf = ByteBuffer.allocateDirect(bytes.length).put(bytes); + buf.rewind(); + return new ImmutableConciseSet(buf); + } + + protected static ImmutableRoaringBitmap writeImmutable(MutableRoaringBitmap r, ByteBuffer buf) throws IOException + { + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + r.serialize(new DataOutputStream(out)); + final byte[] bytes = out.toByteArray(); + Assert.assertEquals(buf.remaining(), bytes.length); + buf.put(bytes); + buf.rewind(); + return new ImmutableRoaringBitmap(buf.asReadOnlyBuffer()); + } + + protected static void reset() + { + conciseCount = 0; + roaringCount = 0; + totalConciseBytes = 0; + totalRoaringBytes = 0; + unionCount = 0; + minIntersection = 0; + rand = new Random(0); + } + + protected static void printSizeStats(double density, String name) + { + System.out.println(""); + System.out.println("## " + name); + System.out.println(""); + System.out.printf(" d = %06.5f | Concise | Roaring" + System.lineSeparator(), density); + System.out.println("-------------|---------|---------"); + System.out.printf("Count | %5d | %5d " + System.lineSeparator(), conciseCount, roaringCount); + System.out.printf( + "Average size | %5d | %5d " + System.lineSeparator(), + totalConciseBytes / conciseCount, + totalRoaringBytes / roaringCount + ); + System.out.println("-------------|---------|---------"); + System.out.println(""); + System.out.flush(); + } + + protected static ImmutableRoaringBitmap makeOffheapRoaring(MutableRoaringBitmap r) throws IOException + { + final int size = r.serializedSizeInBytes(); + final ByteBuffer buf = ByteBuffer.allocateDirect(size); + totalRoaringBytes += size; + roaringCount++; + return writeImmutable(r, buf); + } + + protected static ImmutableRoaringBitmap makeImmutableRoaring(MutableRoaringBitmap r) throws IOException + { + final ByteBuffer buf = ByteBuffer.allocate(r.serializedSizeInBytes()); + return writeImmutable(r, buf); + } + + @Test + @BenchmarkOptions(warmupRounds = 1, benchmarkRounds = 2) + public void timeConciseUnion() throws Exception + { + ImmutableConciseSet union = ImmutableConciseSet.union(concise); + Assert.assertEquals(unionCount, union.size()); + } + + @Test + @BenchmarkOptions(warmupRounds = 1, benchmarkRounds = 2) + public void timeOffheapConciseUnion() throws Exception + { + ImmutableConciseSet union = ImmutableConciseSet.union(offheapConcise); + Assert.assertEquals(unionCount, union.size()); + } + + @Test + @BenchmarkOptions(warmupRounds = 1, benchmarkRounds = 2) + public void timeGenericConciseUnion() throws Exception + { + ImmutableBitmap union = conciseFactory.union(Lists.newArrayList(genericConcise)); + Assert.assertEquals(unionCount, union.size()); + } + + @Test + @BenchmarkOptions(warmupRounds = 1, benchmarkRounds = 5) + public void timeGenericConciseIntersection() throws Exception + { + ImmutableBitmap intersection = conciseFactory.intersection(Lists.newArrayList(genericConcise)); + Assert.assertTrue(intersection.size() >= minIntersection); + } + + @Test + public void timeRoaringUnion() throws Exception + { + ImmutableRoaringBitmap union = BufferFastAggregation.horizontal_or(Lists.newArrayList(roaring).iterator()); + Assert.assertEquals(unionCount, union.getCardinality()); + } + + @Test + public void timeImmutableRoaringUnion() throws Exception + { + ImmutableRoaringBitmap union = BufferFastAggregation.horizontal_or(Lists.newArrayList(immutableRoaring).iterator()); + Assert.assertEquals(unionCount, union.getCardinality()); + } + + @Test + public void timeOffheapRoaringUnion() throws Exception + { + ImmutableRoaringBitmap union = BufferFastAggregation.horizontal_or(Lists.newArrayList(offheapRoaring).iterator()); + Assert.assertEquals(unionCount, union.getCardinality()); + } + + @Test + public void timeGenericRoaringUnion() throws Exception + { + ImmutableBitmap union = roaringFactory.union(Lists.newArrayList(genericRoaring)); + Assert.assertEquals(unionCount, union.size()); + } + + @Test + public void timeGenericRoaringIntersection() throws Exception + { + ImmutableBitmap intersection = roaringFactory.intersection(Lists.newArrayList(genericRoaring)); + Assert.assertTrue(intersection.size() >= minIntersection); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java new file mode 100755 index 000000000000..673431e6b6e0 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java @@ -0,0 +1,94 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import junit.framework.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Set; + +public class ConciseBitmapFactoryTest +{ + @Test + public void testUnwrapWithNull() throws Exception + { + ConciseBitmapFactory factory = new ConciseBitmapFactory(); + + ImmutableBitmap bitmap = factory.union( + Iterables.transform( + Lists.newArrayList(new WrappedConciseBitmap()), + new Function() + { + @Override + public ImmutableBitmap apply(WrappedConciseBitmap input) + { + return null; + } + } + ) + ); + + Assert.assertEquals(0, bitmap.size()); + } + + @Test + public void testUnwrapMerge() throws Exception + { + ConciseBitmapFactory factory = new ConciseBitmapFactory(); + + WrappedConciseBitmap set = new WrappedConciseBitmap(); + set.add(1); + set.add(3); + set.add(5); + + ImmutableBitmap bitmap = factory.union( + Arrays.asList( + factory.makeImmutableBitmap(set), + null + ) + ); + + Assert.assertEquals(3, bitmap.size()); + } + + @Test + public void testGetOutOfBounds() + { + final ConciseSet conciseSet = new ConciseSet(); + final Set ints = ImmutableSet.of(0, 4, 9); + for (int i : ints) { + conciseSet.add(i); + } + final ImmutableBitmap immutableBitmap = new WrappedImmutableConciseBitmap( + ImmutableConciseSet.newImmutableFromMutable(conciseSet)); + final MutableBitmap mutableBitmap = new WrappedConciseBitmap(conciseSet); + for (int i = 0; i < 10; ++i) { + Assert.assertEquals(Integer.toString(i), ints.contains(i), mutableBitmap.get(i)); + Assert.assertEquals(Integer.toString(i), ints.contains(i), immutableBitmap.get(i)); + } + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java new file mode 100755 index 000000000000..8800d167350f --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java @@ -0,0 +1,83 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; +import com.carrotsearch.junitbenchmarks.annotation.LabelType; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.test.annotation.Benchmark; + +import org.junit.BeforeClass; +import org.junit.experimental.categories.Category; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.util.BitSet; + +@Category({Benchmark.class}) +@BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) +public class RangeBitmapBenchmarkTest extends BitmapBenchmark +{ + + public static final double DENSITY = 0.001; + public static final int MIN_INTERSECT = 50; + + @BeforeClass + public static void prepareRandomRanges() throws Exception + { + System.setProperty("jub.customkey", String.format("%06.5f", DENSITY)); + reset(); + + final BitSet expectedUnion = new BitSet(); + for (int i = 0; i < SIZE; ++i) { + ConciseSet c = new ConciseSet(); + MutableRoaringBitmap r = new MutableRoaringBitmap(); + { + int k = 0; + boolean fill = true; + while (k < LENGTH) { + int runLength = (int) (LENGTH * DENSITY) + rand.nextInt((int) (LENGTH * DENSITY)); + for (int j = k; fill && j < LENGTH && j < k + runLength; ++j) { + c.add(j); + r.add(j); + expectedUnion.set(j); + } + k += runLength; + fill = !fill; + } + } + minIntersection = MIN_INTERSECT; + for (int k = LENGTH / 2; k < LENGTH / 2 + minIntersection; ++k) { + c.add(k); + r.add(k); + expectedUnion.set(k); + } + concise[i] = ImmutableConciseSet.newImmutableFromMutable(c); + offheapConcise[i] = makeOffheapConcise(concise[i]); + roaring[i] = r; + immutableRoaring[i] = makeImmutableRoaring(r); + offheapRoaring[i] = makeOffheapRoaring(r); + genericConcise[i] = new WrappedImmutableConciseBitmap(offheapConcise[i]); + genericRoaring[i] = new WrappedImmutableRoaringBitmap(offheapRoaring[i]); + } + unionCount = expectedUnion.cardinality(); + printSizeStats(DENSITY, "Random Alternating Bitmap"); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RoaringBitmapFactoryTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RoaringBitmapFactoryTest.java new file mode 100755 index 000000000000..1cfa59e7f5e4 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RoaringBitmapFactoryTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.base.Function; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.Test; +import org.roaringbitmap.IntIterator; + +import java.util.Arrays; + +public class RoaringBitmapFactoryTest +{ + + // testing https://github.com/metamx/bytebuffer-collections/issues/26 + @Test + public void testIssue26() throws Exception + { + checkEmptyComplement(new ConciseBitmapFactory()); + checkEmptyComplement(new RoaringBitmapFactory()); + } + + // used by issue 26 + private void checkEmptyComplement(BitmapFactory bitmapFactory) throws Exception + { + int numRow = 5104234; + ImmutableBitmap bitmap = bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), numRow); + ImmutableBitmap notBitmap = bitmapFactory.complement(bitmap, numRow); + Assert.assertTrue(notBitmap.size() == 0); + Assert.assertTrue(notBitmap.isEmpty()); + IntIterator intIter = notBitmap.iterator(); + Assert.assertFalse(intIter.hasNext()); + } + + @Test + public void testUnwrapWithNull() throws Exception + { + RoaringBitmapFactory factory = new RoaringBitmapFactory(); + + ImmutableBitmap bitmap = factory.union( + Iterables.transform( + Lists.newArrayList(new WrappedRoaringBitmap()), + new Function() + { + @Override + public ImmutableBitmap apply(WrappedRoaringBitmap input) + { + return null; + } + } + ) + ); + + Assert.assertEquals(0, bitmap.size()); + } + + @Test + public void testUnwrapMerge() throws Exception + { + RoaringBitmapFactory factory = new RoaringBitmapFactory(); + + WrappedRoaringBitmap set = new WrappedRoaringBitmap(); + set.add(1); + set.add(3); + set.add(5); + + ImmutableBitmap bitmap = factory.union( + Arrays.asList( + factory.makeImmutableBitmap(set), + null + ) + ); + + Assert.assertEquals(3, bitmap.size()); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java new file mode 100755 index 000000000000..7e7306d9f4c6 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; +import com.carrotsearch.junitbenchmarks.annotation.LabelType; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.test.annotation.Benchmark; + +import org.junit.BeforeClass; +import org.junit.experimental.categories.Category; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + +import java.util.BitSet; + +@Category({Benchmark.class}) +@BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) +public class UniformBitmapBenchmarkTest extends BitmapBenchmark +{ + + public static final double DENSITY = 0.01; + public static final int MIN_INTERSECT = 50; + + @BeforeClass + public static void prepareMostlyUniform() throws Exception + { + System.setProperty("jub.customkey", String.format("%05.4f", DENSITY)); + reset(); + + final BitSet expectedUnion = new BitSet(); + final int[] knownTrue = new int[MIN_INTERSECT]; + for (int i = 0; i < knownTrue.length; ++i) { + knownTrue[i] = rand.nextInt(LENGTH); + } + for (int i = 0; i < SIZE; ++i) { + ConciseSet c = new ConciseSet(); + MutableRoaringBitmap r = new MutableRoaringBitmap(); + for (int k = 0; k < LENGTH; ++k) { + if (rand.nextDouble() < DENSITY) { + c.add(k); + r.add(k); + expectedUnion.set(k); + } + } + for (int k : knownTrue) { + c.add(k); + r.add(k); + expectedUnion.set(k); + } + concise[i] = ImmutableConciseSet.newImmutableFromMutable(c); + offheapConcise[i] = makeOffheapConcise(concise[i]); + roaring[i] = r; + immutableRoaring[i] = makeImmutableRoaring(r); + offheapRoaring[i] = makeOffheapRoaring(r); + genericConcise[i] = new WrappedImmutableConciseBitmap(offheapConcise[i]); + genericRoaring[i] = new WrappedImmutableRoaringBitmap(offheapRoaring[i]); + } + unionCount = expectedUnion.cardinality(); + minIntersection = knownTrue.length; + printSizeStats(DENSITY, "Uniform Bitmap"); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedBitSetBitmapBitSetTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedBitSetBitmapBitSetTest.java new file mode 100755 index 000000000000..59af18ddf26e --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedBitSetBitmapBitSetTest.java @@ -0,0 +1,174 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import com.google.common.collect.Sets; + +import io.druid.collections.IntSetTestUtility; + +import org.junit.Assert; +import org.junit.Test; +import org.roaringbitmap.IntIterator; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.BitSet; +import java.util.Set; + +/** + * + */ +public class WrappedBitSetBitmapBitSetTest +{ + + private static final WrappedBitSetBitmap defaultBitSet() + { + return new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(IntSetTestUtility.getSetBits())); + } + + @Test + public void testIterator() + { + WrappedBitSetBitmap bitSet = new WrappedBitSetBitmap(); + for (int i : IntSetTestUtility.getSetBits()) { + bitSet.add(i); + } + IntIterator intIt = bitSet.iterator(); + for (int i : IntSetTestUtility.getSetBits()) { + Assert.assertTrue(intIt.hasNext()); + Assert.assertEquals(i, intIt.next()); + } + } + + @Test + public void testSize() + { + BitSet bitSet = IntSetTestUtility.createSimpleBitSet(IntSetTestUtility.getSetBits()); + WrappedBitSetBitmap wrappedBitSetBitmapBitSet = new WrappedBitSetBitmap(bitSet); + Assert.assertEquals(bitSet.cardinality(), wrappedBitSetBitmapBitSet.size()); + } + + @Test + public void testOffHeap() + { + ByteBuffer buffer = ByteBuffer.allocateDirect(Long.SIZE * 100 / 8).order(ByteOrder.LITTLE_ENDIAN); + BitSet testSet = BitSet.valueOf(buffer); + testSet.set(1); + WrappedImmutableBitSetBitmap bitMap = new WrappedImmutableBitSetBitmap(testSet); + Assert.assertTrue(bitMap.get(1)); + testSet.set(2); + Assert.assertTrue(bitMap.get(2)); + } + + @Test + public void testSimpleBitSet() + { + WrappedBitSetBitmap bitSet = new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(IntSetTestUtility.getSetBits())); + Assert.assertTrue(IntSetTestUtility.equalSets(IntSetTestUtility.getSetBits(), bitSet)); + } + + @Test + public void testUnion() + { + WrappedBitSetBitmap bitSet = new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(IntSetTestUtility.getSetBits())); + + Set extraBits = Sets.newHashSet(6, 9); + WrappedBitSetBitmap bitExtraSet = new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(extraBits)); + + Set union = Sets.union(extraBits, IntSetTestUtility.getSetBits()); + + Assert.assertTrue(IntSetTestUtility.equalSets(union, (WrappedBitSetBitmap) bitSet.union(bitExtraSet))); + } + + @Test + public void testIntersection() + { + WrappedBitSetBitmap bitSet = new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(IntSetTestUtility.getSetBits())); + + Set extraBits = Sets.newHashSet(1, 2, 3, 4, 5, 6, 7, 8); + WrappedBitSetBitmap bitExtraSet = new WrappedBitSetBitmap(IntSetTestUtility.createSimpleBitSet(extraBits)); + + Set intersection = Sets.intersection(extraBits, IntSetTestUtility.getSetBits()); + + Assert.assertTrue(IntSetTestUtility.equalSets( + intersection, + (WrappedBitSetBitmap) bitSet.intersection(bitExtraSet) + )); + } + + @Test + public void testAnd() + { + WrappedBitSetBitmap bitSet = defaultBitSet(); + WrappedBitSetBitmap bitSet2 = defaultBitSet(); + Set defaultBitSet = IntSetTestUtility.getSetBits(); + bitSet.remove(1); + bitSet2.remove(2); + + bitSet.and(bitSet2); + + defaultBitSet.remove(1); + defaultBitSet.remove(2); + + Assert.assertTrue(IntSetTestUtility.equalSets(defaultBitSet, bitSet)); + } + + + @Test + public void testOr() + { + WrappedBitSetBitmap bitSet = defaultBitSet(); + WrappedBitSetBitmap bitSet2 = defaultBitSet(); + Set defaultBitSet = IntSetTestUtility.getSetBits(); + bitSet.remove(1); + bitSet2.remove(2); + + bitSet.or(bitSet2); + + Assert.assertTrue(IntSetTestUtility.equalSets(defaultBitSet, bitSet)); + } + + @Test + public void testAndNot() + { + WrappedBitSetBitmap bitSet = defaultBitSet(); + WrappedBitSetBitmap bitSet2 = defaultBitSet(); + Set defaultBitSet = Sets.newHashSet(); + bitSet.remove(1); + bitSet2.remove(2); + + bitSet.andNot(bitSet2); + + defaultBitSet.add(2); + + Assert.assertTrue(IntSetTestUtility.equalSets(defaultBitSet, bitSet)); + } + + + @Test + public void testSerialize() + { + WrappedBitSetBitmap bitSet = defaultBitSet(); + Set defaultBitSet = IntSetTestUtility.getSetBits(); + byte[] buffer = new byte[bitSet.getSizeInBytes()]; + ByteBuffer byteBuffer = ByteBuffer.wrap(buffer); + bitSet.serialize(byteBuffer); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedRoaringBitmapTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedRoaringBitmapTest.java new file mode 100755 index 000000000000..402a494c90c5 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/WrappedRoaringBitmapTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.bitmap; + +import junit.framework.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; + +@RunWith(Parameterized.class) +public class WrappedRoaringBitmapTest +{ + private final RoaringBitmapFactory factory; + + public WrappedRoaringBitmapTest(RoaringBitmapFactory factory) + { + this.factory = factory; + } + + @Parameterized.Parameters + public static List factoryClasses() + { + return Arrays.asList( + (RoaringBitmapFactory[]) Arrays.asList( + new RoaringBitmapFactory(false) + ).toArray(), + (RoaringBitmapFactory[]) Arrays.asList( + new RoaringBitmapFactory(true) + ).toArray() + ); + } + + private WrappedRoaringBitmap createWrappedRoaringBitmap() + { + WrappedRoaringBitmap set = (WrappedRoaringBitmap) factory.makeEmptyMutableBitmap(); + set.add(1); + set.add(3); + set.add(5); + set.add(7); + set.add(9); + return set; + } + + @Test + public void testSerialize() + { + WrappedRoaringBitmap set = createWrappedRoaringBitmap(); + + byte[] buffer = new byte[set.getSizeInBytes()]; + ByteBuffer byteBuffer = ByteBuffer.wrap(buffer); + set.serialize(byteBuffer); + byteBuffer.flip(); + ImmutableBitmap immutableBitmap = new RoaringBitmapFactory().mapImmutableBitmap(byteBuffer); + Assert.assertEquals(5, immutableBitmap.size()); + } + + @Test + public void testToByteArray() + { + WrappedRoaringBitmap set = createWrappedRoaringBitmap(); + ImmutableBitmap immutableBitmap = new RoaringBitmapFactory().mapImmutableBitmap(ByteBuffer.wrap(set.toBytes())); + Assert.assertEquals(5, immutableBitmap.size()); + } + +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/ImmutableRTreeTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/ImmutableRTreeTest.java new file mode 100755 index 000000000000..4ae8bc744316 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/ImmutableRTreeTest.java @@ -0,0 +1,651 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import com.google.common.base.Stopwatch; +import com.google.common.base.Throwables; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.search.PolygonBound; +import io.druid.collections.spatial.search.RadiusBound; +import io.druid.collections.spatial.search.RectangularBound; +import io.druid.collections.spatial.split.LinearGutmanSplitStrategy; +import junit.framework.Assert; +import org.junit.Test; +import org.roaringbitmap.IntIterator; + +import java.nio.ByteBuffer; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + */ +public class ImmutableRTreeTest +{ + @Test + public void testToAndFromByteBuffer() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 1}, 2); + tree.insert(new float[]{2, 2}, 3); + tree.insert(new float[]{3, 3}, 4); + tree.insert(new float[]{4, 4}, 5); + + ImmutableRTree firstTree = ImmutableRTree.newImmutableFromMutable(tree); + ByteBuffer buffer = ByteBuffer.wrap(firstTree.toBytes()); + ImmutableRTree secondTree = new ImmutableRTree(buffer, bf); + Iterable points = secondTree.search(new RadiusBound(new float[]{0, 0}, 10)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testToAndFromByteBufferRoaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 1}, 2); + tree.insert(new float[]{2, 2}, 3); + tree.insert(new float[]{3, 3}, 4); + tree.insert(new float[]{4, 4}, 5); + + ImmutableRTree firstTree = ImmutableRTree.newImmutableFromMutable(tree); + ByteBuffer buffer = ByteBuffer.wrap(firstTree.toBytes()); + ImmutableRTree secondTree = new ImmutableRTree(buffer, bf); + Iterable points = secondTree.search(new RadiusBound(new float[]{0, 0}, 10)); + ImmutableBitmap finalSet = bf.union(points); + + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchNoSplit() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{10, 10}, 10); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{27, 34}, 20); + tree.insert(new float[]{106, 19}, 30); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{4, 72}, 40); + tree.insert(new float[]{-4, -3}, 5); + tree.insert(new float[]{119, -78}, 50); + + Assert.assertEquals(tree.getRoot().getChildren().size(), 10); + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchNoSplitRoaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{10, 10}, 10); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{27, 34}, 20); + tree.insert(new float[]{106, 19}, 30); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{4, 72}, 40); + tree.insert(new float[]{-4, -3}, 5); + tree.insert(new float[]{119, -78}, 50); + + Assert.assertEquals(tree.getRoot().getChildren().size(), 10); + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{-4, -3}, 5); + + Random rand = new Random(); + for (int i = 0; i < 95; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplitRoaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{-4, -3}, 5); + + Random rand = new Random(); + for (int i = 0; i < 95; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + + @Test + public void testSearchWithSplit2() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, 0); + tree.insert(new float[]{1.0f, 3.0f}, 1); + tree.insert(new float[]{4.0f, 2.0f}, 2); + tree.insert(new float[]{7.0f, 3.0f}, 3); + tree.insert(new float[]{8.0f, 6.0f}, 4); + + Random rand = new Random(); + for (int i = 5; i < 5000; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RectangularBound( + new float[]{0, 0}, + new float[]{9, 9} + ) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(0, 1, 2, 3, 4); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit2Roaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, 0); + tree.insert(new float[]{1.0f, 3.0f}, 1); + tree.insert(new float[]{4.0f, 2.0f}, 2); + tree.insert(new float[]{7.0f, 3.0f}, 3); + tree.insert(new float[]{8.0f, 6.0f}, 4); + + Random rand = new Random(); + for (int i = 5; i < 5000; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RectangularBound( + new float[]{0, 0}, + new float[]{9, 9} + ) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(0, 1, 2, 3, 4); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit3() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, 0); + tree.insert(new float[]{1.0f, 3.0f}, 1); + tree.insert(new float[]{4.0f, 2.0f}, 2); + tree.insert(new float[]{7.0f, 3.0f}, 3); + tree.insert(new float[]{8.0f, 6.0f}, 4); + + Random rand = new Random(); + for (int i = 5; i < 5000; i++) { + tree.insert( + new float[]{(float) (rand.nextFloat() * 10 + 10.0), (float) (rand.nextFloat() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RadiusBound(new float[]{0.0f, 0.0f}, 5) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 3); + + Set expected = Sets.newHashSet(0, 1, 2); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit3Roaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, 0); + tree.insert(new float[]{1.0f, 3.0f}, 1); + tree.insert(new float[]{4.0f, 2.0f}, 2); + tree.insert(new float[]{7.0f, 3.0f}, 3); + tree.insert(new float[]{8.0f, 6.0f}, 4); + + Random rand = new Random(); + for (int i = 5; i < 5000; i++) { + tree.insert( + new float[]{(float) (rand.nextFloat() * 10 + 10.0), (float) (rand.nextFloat() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RadiusBound(new float[]{0.0f, 0.0f}, 5) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 3); + + Set expected = Sets.newHashSet(0, 1, 2); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit4() + { + BitmapFactory bf = new ConciseBitmapFactory(); + //RTree tree = new RTree(2, new QuadraticGutmanSplitStrategy(0, 100, bf), bf); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + Random rand = new Random(); + + int outPolygon = 0, inPolygon = 0; + for (; inPolygon < 500; ) { + double abscissa = rand.nextDouble() * 5; + double ordinate = rand.nextDouble() * 4; + + if (abscissa < 1 || abscissa > 4 || ordinate < 1 || ordinate > 3 || abscissa < 2 && ordinate > 2) { + tree.insert( + new float[]{(float) abscissa, (float) ordinate}, + outPolygon + 500 + ); + outPolygon++; + } else if (abscissa > 1 && abscissa < 4 && ordinate > 1 && ordinate < 2 + || abscissa > 2 && abscissa < 4 && ordinate >= 2 && ordinate < 3) { + tree.insert( + new float[]{(float) abscissa, (float) ordinate}, + inPolygon + ); + inPolygon++; + } + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(PolygonBound.from( + new float[]{1.0f, 1.0f, 2.0f, 2.0f, 4.0f, 4.0f}, + new float[]{1.0f, 2.0f, 2.0f, 3.0f, 3.0f, 1.0f} + )); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() == 500); + + Set expected = Sets.newHashSet(); + for (int i = 0; i < 500; i++) { + expected.add(i); + } + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testSearchWithSplit4Roaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + Random rand = new Random(); + + int outPolygon = 0, inPolygon = 0; + for (; inPolygon < 500; ) { + double abscissa = rand.nextDouble() * 5; + double ordinate = rand.nextDouble() * 4; + + if (abscissa < 1 || abscissa > 4 || ordinate < 1 || ordinate > 3 || abscissa < 2 && ordinate > 2) { + tree.insert( + new float[]{(float) abscissa, (float) ordinate}, + outPolygon + 500 + ); + outPolygon++; + } else if (abscissa > 1 && abscissa < 4 && ordinate > 1 && ordinate < 2 + || abscissa > 2 && abscissa < 4 && ordinate >= 2 && ordinate < 3) { + tree.insert( + new float[]{(float) abscissa, (float) ordinate}, + inPolygon + ); + inPolygon++; + } + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(PolygonBound.from( + new float[]{1.0f, 1.0f, 2.0f, 2.0f, 4.0f, 4.0f}, + new float[]{1.0f, 2.0f, 2.0f, 3.0f, 3.0f, 1.0f} + )); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() == 500); + + Set expected = Sets.newHashSet(); + for (int i = 0; i < 500; i++) { + expected.add(i); + } + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + @Test + public void testEmptyConciseSet() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, bf.makeEmptyMutableBitmap()); + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RadiusBound(new float[]{0.0f, 0.0f}, 5) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertEquals(finalSet.size(), 0); + } + + @Test + public void testEmptyRoaringBitmap() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0.0f, 0.0f}, bf.makeEmptyMutableBitmap()); + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search( + new RadiusBound(new float[]{0.0f, 0.0f}, 5) + ); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertEquals(finalSet.size(), 0); + Assert.assertTrue(finalSet.isEmpty()); + } + + @Test + public void testSearchWithSplitLimitedBound() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{-4, -3}, 5); + + Random rand = new Random(); + for (int i = 0; i < 4995; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5, 2)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + + @Test + public void testSearchWithSplitLimitedBoundRoaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + tree.insert(new float[]{0, 0}, 1); + tree.insert(new float[]{1, 3}, 2); + tree.insert(new float[]{4, 2}, 3); + tree.insert(new float[]{5, 0}, 4); + tree.insert(new float[]{-4, -3}, 5); + + Random rand = new Random(); + for (int i = 0; i < 4995; i++) { + tree.insert( + new float[]{(float) (rand.nextDouble() * 10 + 10.0), (float) (rand.nextDouble() * 10 + 10.0)}, + i + ); + } + + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound(new float[]{0, 0}, 5, 2)); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() >= 5); + + Set expected = Sets.newHashSet(1, 2, 3, 4, 5); + IntIterator iter = finalSet.iterator(); + while (iter.hasNext()) { + Assert.assertTrue(expected.contains(iter.next())); + } + } + + //@Test + public void showBenchmarks() + { + final int start = 1; + final int factor = 10; + final int end = 10000000; + final int radius = 10; + + for (int numPoints = start; numPoints <= end; numPoints *= factor) { + try { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + + Stopwatch stopwatch = Stopwatch.createStarted(); + Random rand = new Random(); + for (int i = 0; i < numPoints; i++) { + tree.insert(new float[]{(float) (rand.nextDouble() * 100), (float) (rand.nextDouble() * 100)}, i); + } + long stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: insert = %,d ms%n", numPoints, stop); + + stopwatch.reset().start(); + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: size = %,d bytes%n", numPoints, searchTree.toBytes().length); + System.out.printf("[%,d]: buildImmutable = %,d ms%n", numPoints, stop); + + stopwatch.reset().start(); + + Iterable points = searchTree.search(new RadiusBound(new float[]{50, 50}, radius)); + + Iterables.size(points); + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + + System.out.printf("[%,d]: search = %,dms%n", numPoints, stop); + + stopwatch.reset().start(); + + ImmutableBitmap finalSet = bf.union(points); + + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: union of %,d points in %,d ms%n", numPoints, finalSet.size(), stop); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } + + //@Test + public void showBenchmarksBoundWithLimits() + { + //final int start = 1; + final int start = 10000000; + final int factor = 10; + final int end = 10000000; + //final int end = 10; + + for (int numPoints = start; numPoints <= end; numPoints *= factor) { + try { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + + Stopwatch stopwatch = Stopwatch.createStarted(); + Random rand = new Random(); + for (int i = 0; i < numPoints; i++) { + tree.insert(new float[]{(float) (rand.nextDouble() * 100), (float) (rand.nextDouble() * 100)}, i); + } + long stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: insert = %,d ms%n", numPoints, stop); + + stopwatch.reset().start(); + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: size = %,d bytes%n", numPoints, searchTree.toBytes().length); + System.out.printf("[%,d]: buildImmutable = %,d ms%n", numPoints, stop); + + stopwatch.reset().start(); + + Iterable points = searchTree.search( + new RectangularBound( + new float[]{40, 40}, + new float[]{60, 60}, + 100 + ) + ); + + Iterables.size(points); + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + + System.out.printf("[%,d]: search = %,dms%n", numPoints, stop); + + stopwatch.reset().start(); + + ImmutableBitmap finalSet = bf.union(points); + + stop = stopwatch.elapsed(TimeUnit.MILLISECONDS); + System.out.printf("[%,d]: union of %,d points in %,d ms%n", numPoints, finalSet.size(), stop); + } + catch (Exception e) { + throw Throwables.propagate(e); + } + } + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/RTreeTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/RTreeTest.java new file mode 100755 index 000000000000..06986d8f7d1b --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/RTreeTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial; + +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.split.LinearGutmanSplitStrategy; +import junit.framework.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Random; + +/** + */ +public class RTreeTest +{ + private RTree tree; + private RTree roaringtree; + + @Before + public void setUp() throws Exception + { + BitmapFactory bf = new ConciseBitmapFactory(); + tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + BitmapFactory rbf = new RoaringBitmapFactory(); + roaringtree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, rbf), rbf); + + } + + @Test + public void testInsertNoSplit() + { + float[] elem = new float[]{5, 5}; + tree.insert(elem, 1); + Assert.assertTrue(Arrays.equals(elem, tree.getRoot().getMinCoordinates())); + Assert.assertTrue(Arrays.equals(elem, tree.getRoot().getMaxCoordinates())); + + tree.insert(new float[]{6, 7}, 2); + tree.insert(new float[]{1, 3}, 3); + tree.insert(new float[]{10, 4}, 4); + tree.insert(new float[]{8, 2}, 5); + + Assert.assertEquals(tree.getRoot().getChildren().size(), 5); + + float[] expectedMin = new float[]{1, 2}; + float[] expectedMax = new float[]{10, 7}; + + Assert.assertTrue(Arrays.equals(expectedMin, tree.getRoot().getMinCoordinates())); + Assert.assertTrue(Arrays.equals(expectedMax, tree.getRoot().getMaxCoordinates())); + Assert.assertEquals(tree.getRoot().getArea(), 45.0d); + } + + @Test + public void testInsertDuplicatesNoSplit() + { + tree.insert(new float[]{1, 1}, 1); + tree.insert(new float[]{1, 1}, 1); + tree.insert(new float[]{1, 1}, 1); + + Assert.assertEquals(tree.getRoot().getChildren().size(), 3); + } + + @Test + public void testInsertDuplicatesNoSplitRoaring() + { + roaringtree.insert(new float[]{1, 1}, 1); + roaringtree.insert(new float[]{1, 1}, 1); + roaringtree.insert(new float[]{1, 1}, 1); + + Assert.assertEquals(roaringtree.getRoot().getChildren().size(), 3); + } + + + @Test + public void testSplitOccurs() + { + Random rand = new Random(); + for (int i = 0; i < 100; i++) { + tree.insert(new float[]{rand.nextFloat(), rand.nextFloat()}, i); + } + + Assert.assertTrue(tree.getRoot().getChildren().size() > 1); + } + + @Test + public void testSplitOccursRoaring() + { + Random rand = new Random(); + for (int i = 0; i < 100; i++) { + roaringtree.insert(new float[]{rand.nextFloat(), rand.nextFloat()}, i); + } + + Assert.assertTrue(roaringtree.getRoot().getChildren().size() > 1); + } + +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/PolygonBoundTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/PolygonBoundTest.java new file mode 100755 index 000000000000..73ac85e65802 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/PolygonBoundTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class PolygonBoundTest +{ + @Test + public void testCacheKey() + { + Assert.assertArrayEquals( + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 1).getCacheKey(), + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 1).getCacheKey() + ); + Assert.assertFalse(Arrays.equals( + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 1).getCacheKey(), + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 1F}, 1).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 1).getCacheKey(), + PolygonBound.from(new float[]{1F, 2F, 2F}, new float[]{0F, 2F, 0F}, 1).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 1).getCacheKey(), + PolygonBound.from(new float[]{1F, 2F, 3F}, new float[]{0F, 2F, 0F}, 2).getCacheKey() + )); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RadiusBoundTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RadiusBoundTest.java new file mode 100755 index 000000000000..46309bbcc9f2 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RadiusBoundTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class RadiusBoundTest +{ + @Test + public void testCacheKey() + { + final float[] coords0 = new float[]{1.0F, 2.0F}; + final float[] coords1 = new float[]{1.1F, 2.1F}; + Assert.assertArrayEquals( + new RadiusBound(coords0, 3.0F, 10).getCacheKey(), + new RadiusBound(coords0, 3.0F, 10).getCacheKey() + ); + Assert.assertFalse(Arrays.equals( + new RadiusBound(coords0, 3.0F, 10).getCacheKey(), + new RadiusBound(coords1, 3.0F, 10).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + new RadiusBound(coords0, 3.0F, 10).getCacheKey(), + new RadiusBound(coords0, 3.1F, 10).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + new RadiusBound(coords0, 3.0F, 10).getCacheKey(), + new RadiusBound(coords0, 3.0F, 9).getCacheKey() + )); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RectangularBoundTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RectangularBoundTest.java new file mode 100755 index 000000000000..025618be8cdd --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/search/RectangularBoundTest.java @@ -0,0 +1,49 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.search; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class RectangularBoundTest +{ + @Test + public void testCacheKey() + { + Assert.assertArrayEquals( + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 1).getCacheKey(), + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 1).getCacheKey() + ); + Assert.assertFalse(Arrays.equals( + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 1).getCacheKey(), + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 3F}, 1).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 1).getCacheKey(), + new RectangularBound(new float[]{1F, 0F}, new float[]{2F, 2F}, 1).getCacheKey() + )); + Assert.assertFalse(Arrays.equals( + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 1).getCacheKey(), + new RectangularBound(new float[]{1F, 1F}, new float[]{2F, 2F}, 2).getCacheKey() + )); + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategyTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategyTest.java new file mode 100755 index 000000000000..5caeba4e6f13 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategyTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.collections.spatial.split; + +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.Node; +import io.druid.collections.spatial.Point; +import io.druid.collections.spatial.RTree; +import junit.framework.Assert; +import org.junit.Test; + +import java.util.Random; + +/** + */ +public class LinearGutmanSplitStrategyTest +{ + @Test + public void testPickSeeds() throws Exception + { + BitmapFactory bf = new ConciseBitmapFactory(); + LinearGutmanSplitStrategy strategy = new LinearGutmanSplitStrategy(0, 50, bf); + Node node = new Node(new float[2], new float[2], true, bf); + + node.addChild(new Point(new float[]{3, 7}, 1, bf)); + node.addChild(new Point(new float[]{1, 6}, 1, bf)); + node.addChild(new Point(new float[]{9, 8}, 1, bf)); + node.addChild(new Point(new float[]{2, 5}, 1, bf)); + node.addChild(new Point(new float[]{4, 4}, 1, bf)); + node.enclose(); + + Node[] groups = strategy.split(node); + Assert.assertEquals(groups[0].getMinCoordinates()[0], 1.0f); + Assert.assertEquals(groups[0].getMinCoordinates()[1], 4.0f); + Assert.assertEquals(groups[1].getMinCoordinates()[0], 9.0f); + Assert.assertEquals(groups[1].getMinCoordinates()[1], 8.0f); + } + + @Test + public void testPickSeedsRoaring() throws Exception + { + BitmapFactory bf = new RoaringBitmapFactory(); + LinearGutmanSplitStrategy strategy = new LinearGutmanSplitStrategy(0, 50, bf); + Node node = new Node(new float[2], new float[2], true, bf); + + node.addChild(new Point(new float[]{3, 7}, 1, bf)); + node.addChild(new Point(new float[]{1, 6}, 1, bf)); + node.addChild(new Point(new float[]{9, 8}, 1, bf)); + node.addChild(new Point(new float[]{2, 5}, 1, bf)); + node.addChild(new Point(new float[]{4, 4}, 1, bf)); + node.enclose(); + + Node[] groups = strategy.split(node); + Assert.assertEquals(groups[0].getMinCoordinates()[0], 1.0f); + Assert.assertEquals(groups[0].getMinCoordinates()[1], 4.0f); + Assert.assertEquals(groups[1].getMinCoordinates()[0], 9.0f); + Assert.assertEquals(groups[1].getMinCoordinates()[1], 8.0f); + } + + + @Test + public void testNumChildrenSize() + { + BitmapFactory bf = new ConciseBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + Random rand = new Random(); + for (int i = 0; i < 100; i++) { + tree.insert(new float[]{rand.nextFloat(), rand.nextFloat()}, i); + } + + Assert.assertTrue(getNumPoints(tree.getRoot()) >= tree.getSize()); + } + + @Test + public void testNumChildrenSizeRoaring() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + Random rand = new Random(); + for (int i = 0; i < 100; i++) { + tree.insert(new float[]{rand.nextFloat(), rand.nextFloat()}, i); + } + + Assert.assertTrue(getNumPoints(tree.getRoot()) >= tree.getSize()); + } + + private int getNumPoints(Node node) + { + int total = 0; + if (node.isLeaf()) { + total += node.getChildren().size(); + } else { + for (Node child : node.getChildren()) { + total += getNumPoints(child); + } + } + return total; + } +} diff --git a/bytebuffer-collections/src/test/java/io/druid/test/annotation/Benchmark.java b/bytebuffer-collections/src/test/java/io/druid/test/annotation/Benchmark.java new file mode 100755 index 000000000000..2fa616eef401 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/test/annotation/Benchmark.java @@ -0,0 +1,24 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.test.annotation; + +public interface Benchmark +{ +} diff --git a/bytebuffer-collections/src/test/java/io/druid/test/annotation/Dummy.java b/bytebuffer-collections/src/test/java/io/druid/test/annotation/Dummy.java new file mode 100755 index 000000000000..ae1d36869320 --- /dev/null +++ b/bytebuffer-collections/src/test/java/io/druid/test/annotation/Dummy.java @@ -0,0 +1,24 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.test.annotation; + +public interface Dummy +{ +} diff --git a/extendedset/pom.xml b/extendedset/pom.xml new file mode 100755 index 000000000000..2831462af2c8 --- /dev/null +++ b/extendedset/pom.xml @@ -0,0 +1,66 @@ + + + + + 4.0.0 + + extendedset + extendedset + + Implementation of CONCISE (COmpressed 'N" Composable Integer SEt) bit map compression algorithm by Alessandro + Colantonio with some enhanced features - http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf + + + + io.druid + druid + 0.9.3-SNAPSHOT + + + + + com.google.guava + guava + 16.0.1 + + + + + junit + junit + 4.8.1 + test + + + + + + + maven-compiler-plugin + 2.5.1 + + 1.7 + 1.7 + + + + + diff --git a/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java new file mode 100755 index 000000000000..c47eb79bc0df --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java @@ -0,0 +1,1432 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset; + + +import java.util.AbstractCollection; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +/** + * This class provides a skeletal implementation of the {@link ExtendedSet} + * interface to minimize the effort required to implement this interface. + *

+ * The process of implementing a set by extending this class is very similar, + * for example, to that of implementing a {@link Collection} by extending + * {@link AbstractCollection}. + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: AbstractExtendedSet.java 157 2011-11-14 14:25:15Z cocciasik $ + */ +public abstract class AbstractExtendedSet extends AbstractSet implements ExtendedSet +{ + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet intersection(Collection other) + { + ExtendedSet clone = clone(); + clone.retainAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet union(Collection other) + { + ExtendedSet clone = clone(); + clone.addAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet difference(Collection other) + { + ExtendedSet clone = clone(); + clone.removeAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet symmetricDifference(Collection other) + { + ExtendedSet res = union(other); + res.removeAll(intersection(other)); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet complemented() + { + ExtendedSet clone = clone(); + clone.complement(); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return other == null || other.isEmpty() || intersectionSize(other) > 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + return intersectionSize(other) >= minElements; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (other == null || other.isEmpty() || isEmpty()) { + return 0; + } + return intersection(other).size(); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return complemented().size(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract ExtendedSet empty(); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement) + { + return new ExtendedSubSet(null, toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + return new ExtendedSubSet(fromElement, toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement) + { + return new ExtendedSubSet(fromElement, null); + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return iterator().next(); + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return descendingIterator().next(); + } + + /** + * {@inheritDoc} + *

+ * NOTE: When overriding this method, please note that + * Object.clone() is much slower then performing + * new and "manually" copying data! + */ + @SuppressWarnings("unchecked") + @Override + public ExtendedSet clone() + { + try { + return (ExtendedSet) super.clone(); + } + catch (CloneNotSupportedException e) { + throw new InternalError(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public abstract double bitmapCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + @SuppressWarnings("unchecked") + public ExtendedIterator descendingIterator() + { + // used to compare items + Comparator tmpComp = AbstractExtendedSet.this.comparator(); + if (tmpComp == null) { + tmpComp = new Comparator() + { + @Override + public int compare(T o1, T o2) + { + return ((Comparable) o1).compareTo(o2); + } + }; + } + final Comparator comp = tmpComp; + + return new ExtendedIterator() + { + // iterator from last element + private final ListIterator itr = new ArrayList(AbstractExtendedSet.this) + .listIterator(AbstractExtendedSet.this.size()); + + @Override + public boolean hasNext() + { + return itr.hasPrevious(); + } + + @Override + public T next() + { + return itr.previous(); + } + + @Override + public void skipAllBefore(T element) + { + // iterate until the element is found + while (itr.hasPrevious()) { + int res = comp.compare(itr.previous(), element); + + // the element has not been found, thus the next call to + // itr.previous() will provide the right value + if (res < 0) { + return; + } + + // the element has been found. Hence, we have to get back + // to make itr.previous() provide the right value + if (res == 0) { + itr.next(); + return; + } + } + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Iterable descending() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return descendingIterator(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + + // special cases + List> res = new ArrayList>(); + if (size() < min) { + return res; + } + if (size() == min) { + res.add(this.clone()); + return res; + } + if (size() == min + 1) { + for (T item : this.descending()) { + ExtendedSet set = this.clone(); + set.remove(item); + res.add(set); + } + if (max > min) { + res.add(this.clone()); + } + return res; + } + + // the first level contains only one prefix made up of all 1-subsets + List>> level = new ArrayList>>(); + level.add(new ArrayList>()); + for (T item : this) { + ExtendedSet single = this.empty(); + single.add(item); + level.get(0).add(single); + } + if (min == 1) { + res.addAll(level.get(0)); + } + + // all combinations + int l = 2; + while (!level.isEmpty() && l <= max) { + List>> newLevel = new ArrayList>>(); + for (List> prefix : level) { + for (int i = 0; i < prefix.size() - 1; i++) { + List> newPrefix = new ArrayList>(); + for (int j = i + 1; j < prefix.size(); j++) { + ExtendedSet x = prefix.get(i).clone(); + x.add(prefix.get(j).last()); + newPrefix.add(x); + if (l >= min) { + res.add(x); + } + } + if (newPrefix.size() > 1) { + newLevel.add(newPrefix); + } + } + } + level = newLevel; + l++; + } + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() + { + return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + final int size = size(); + + // special cases + if (size < min) { + return 0; + } + if (size == min) { + return 1; + } + + /* + * Compute the sum of binomial coefficients ranging from (size choose + * max) to (size choose min) using dynamic programming + */ + + // trivial cases + max = Math.min(size, max); + if (max == min && (max == 0 || max == size)) { + return 1; + } + + // compute all binomial coefficients for "n" + int[] b = new int[size + 1]; + for (int i = 0; i <= size; i++) { + b[i] = 1; + } + for (int i = 1; i <= size; i++) { + for (int j = i - 1; j > 0; j--) { + b[j] += b[j - 1]; + } + } + + // sum binomial coefficients + int res = 0; + for (int i = min; i <= max; i++) { + res += b[i]; + } + return res; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public int compareTo(ExtendedSet o) + { + Iterator thisIterator = this.descendingIterator(); + Iterator otherIterator = o.descendingIterator(); + while (thisIterator.hasNext() && otherIterator.hasNext()) { + T thisItem = thisIterator.next(); + T otherItem = otherIterator.next(); + int res = ((Comparable) thisItem).compareTo(otherItem); + if (res != 0) { + return res; + } + } + return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + ExtendedSet toAdd = empty(); + toAdd.add(to); + toAdd.complement(); + toAdd.add(to); + + ExtendedSet toRemove = empty(); + toRemove.add(from); + toRemove.complement(); + + toAdd.removeAll(toRemove); + + this.addAll(toAdd); + } + + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public void clear(T from, T to) + { + ExtendedIterator itr = iterator(); + itr.skipAllBefore(from); + while (itr.hasNext()) { + if (((Comparable) itr.next()).compareTo(to) < 0) { + itr.remove(); + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(T e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public T get(int i) + { + int size = size(); + if (i < 0 || i >= size) { + throw new IndexOutOfBoundsException(); + } + + Iterator itr; + if (i < (size / 2)) { + itr = iterator(); + for (int j = 0; j <= i - 1; j++) { + itr.next(); + } + } else { + itr = descendingIterator(); + for (int j = size - 1; j >= i + 1; j--) { + itr.next(); + } + } + return itr.next(); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(T e) + { + Iterator itr = iterator(); + int i = 0; + while (itr.hasNext()) { + if (itr.next().equals(e)) { + return i; + } + i++; + } + return -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet unmodifiable() + { + return new UnmodifiableExtendedSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract ExtendedIterator iterator(); + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + int inters = intersectionSize(other); + return (double) inters / (size() + other.size() - inters); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(ExtendedSet other) + { + return 1D - jaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(ExtendedSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + ExtendedSet inters = intersection(other); + double intersSum = 0D; + for (T t : inters) { + if (t instanceof Integer) { + intersSum += (Integer) t; + } else if (t instanceof Double) { + intersSum += (Double) t; + } else if (t instanceof Float) { + intersSum += (Float) t; + } else if (t instanceof Byte) { + intersSum += (Byte) t; + } else if (t instanceof Long) { + intersSum += (Long) t; + } else if (t instanceof Short) { + intersSum += (Short) t; + } else { + throw new IllegalArgumentException("A collection of numbers is required"); + } + } + + ExtendedSet symmetricDiff = symmetricDifference(other); + double symmetricDiffSum = 0D; + for (T t : symmetricDiff) { + if (t instanceof Integer) { + symmetricDiffSum += (Integer) t; + } else if (t instanceof Double) { + symmetricDiffSum += (Double) t; + } else if (t instanceof Float) { + symmetricDiffSum += (Float) t; + } else if (t instanceof Byte) { + symmetricDiffSum += (Byte) t; + } else if (t instanceof Long) { + symmetricDiffSum += (Long) t; + } else if (t instanceof Short) { + symmetricDiffSum += (Short) t; + } else { + throw new IllegalArgumentException("A collection of numbers is required"); + } + } + + return intersSum / (intersSum + symmetricDiffSum); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(ExtendedSet other) + { + return 1D - weightedJaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet convert(Object... e) + { + if (e == null) { + return empty(); + } + return convert(Arrays.asList(e)); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public ExtendedSet convert(Collection c) + { + ExtendedSet res = empty(); + res.addAll((Collection) c); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return toString(); + } + + /** + * Base class for {@link ExtendedSubSet} and {@link UnmodifiableExtendedSet} + */ + protected abstract class FilteredSet implements ExtendedSet + { + /** + * @return the container instance, namely the "internal" representation + */ + protected abstract ExtendedSet raw(); + + /* + * Converter methods that allows for good performances with collection + * operations by directly working on internal representation + */ + @Override + public ExtendedSet convert(Collection c) + { + if (c instanceof AbstractExtendedSet.FilteredSet) { + convert(((AbstractExtendedSet.FilteredSet) c).raw()); + } + return raw().convert(c); + } + + @Override + public ExtendedSet convert(Object... e) + { + return raw().convert(e); + } + + /* + * Methods that directly apply to container instance + */ + @Override + public ExtendedSet clone() {return AbstractExtendedSet.this.clone();} + + @Override + public ExtendedSet empty() {return AbstractExtendedSet.this.empty();} + + @Override + public Comparator comparator() {return AbstractExtendedSet.this.comparator();} + + /* + * Read-only methods + */ + @Override + public ExtendedSet unmodifiable() {return raw().unmodifiable();} + + @Override + public ExtendedIterator iterator() {return raw().iterator();} + + @Override + public ExtendedIterator descendingIterator() {return raw().descendingIterator();} + + @Override + public boolean isEmpty() {return raw().isEmpty();} + + @Override + public boolean equals(Object o) {return raw().equals(o);} + + @Override + public int hashCode() {return raw().hashCode();} + + @Override + public int compareTo(ExtendedSet o) {return raw().compareTo(o);} + + @Override + public T first() {return raw().first();} + + @Override + public T last() {return raw().last();} + + @Override + public double bitmapCompressionRatio() {return raw().bitmapCompressionRatio();} + + @Override + public double collectionCompressionRatio() {return raw().collectionCompressionRatio();} + + @Override + public List> powerSet() {return raw().powerSet();} + + @Override + public List> powerSet(int mins, int maxs) {return raw().powerSet(mins, maxs);} + + @Override + public int powerSetSize() {return raw().powerSetSize();} + + @Override + public int powerSetSize(int mins, int maxs) {return raw().powerSetSize(mins, maxs);} + + @Override + public Object[] toArray() {return raw().toArray();} + + @Override + public X[] toArray(X[] a) {return raw().toArray(a);} + + @Override + public String toString() {return raw().toString();} + + @Override + public ExtendedSet complemented() {return raw().complemented();} + + @Override + public int complementSize() {return raw().complementSize();} + + @Override + public int size() {return raw().size();} + + @Override + public boolean contains(Object o) {return raw().contains(o);} + + @Override + public Iterable descending() {return raw().descending();} + + @Override + public String debugInfo() {return raw().debugInfo();} + + @Override + public T get(int i) {return raw().get(i);} + + @Override + public int indexOf(T e) {return raw().indexOf(e);} + + /* + * Methods that requires a call to convert() to assure good performances + */ + @Override + public double jaccardDistance(ExtendedSet other) {return raw().jaccardDistance(convert(other));} + + @Override + public double jaccardSimilarity(ExtendedSet other) {return raw().jaccardSimilarity(convert(other));} + + @Override + public double weightedJaccardDistance(ExtendedSet other) {return raw().weightedJaccardDistance(convert(other));} + + @Override + public double weightedJaccardSimilarity(ExtendedSet other) {return raw().weightedJaccardSimilarity(convert(other));} + + @Override + public ExtendedSet difference(Collection other) {return raw().difference(convert(other));} + + @Override + public ExtendedSet symmetricDifference(Collection other) + { + return raw().symmetricDifference(convert(other)); + } + + @Override + public ExtendedSet intersection(Collection other) {return raw().intersection(convert(other));} + + @Override + public ExtendedSet union(Collection other) {return raw().union(convert(other));} + + @Override + public int intersectionSize(Collection other) {return raw().intersectionSize(convert(other));} + + @Override + public int differenceSize(Collection other) {return raw().differenceSize(convert(other));} + + @Override + public int unionSize(Collection other) {return raw().unionSize(convert(other));} + + @Override + public int symmetricDifferenceSize(Collection other) + { + return raw().symmetricDifferenceSize(convert(other)); + } + + @Override + public boolean containsAll(Collection c) {return raw().containsAll(convert(c));} + + @Override + public boolean containsAny(Collection other) {return raw().containsAny(convert(other));} + + @Override + public boolean containsAtLeast( + Collection other, + int minElements + ) + {return raw().containsAtLeast(convert(other), minElements);} + } + + /** + * Read-only view of the set. + *

+ * Note that it extends {@link AbstractExtendedSet} instead of implementing + * {@link ExtendedSet} because of the methods {@link #tailSet(Object)}, + * {@link #headSet(Object)}, and {@link #subSet(Object, Object)}. + */ + protected class UnmodifiableExtendedSet extends AbstractExtendedSet.FilteredSet + { + // exception message when writing operations are performed on {@link #unmodifiable()} + private final static String UNSUPPORTED_MSG = "The class is read-only!"; + + /* + * Unsupported writing methods + */ + @Override + public boolean add(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean addAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean remove(Object o) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean removeAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean retainAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void clear() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void clear(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void fill(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void complement() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void flip(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + /* + * Special purpose methods + */ + + // create new iterators where the remove() operation is not permitted + @Override + public ExtendedIterator iterator() + { + final ExtendedIterator itr = AbstractExtendedSet.this.iterator(); + return new ExtendedIterator() + { + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return itr.next();} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(element);} + + @Override + public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + }; + } + + @Override + public ExtendedIterator descendingIterator() + { + final ExtendedIterator itr = AbstractExtendedSet.this.descendingIterator(); + return new ExtendedIterator() + { + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return itr.next();} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(element);} + + @Override + public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + }; + } + + /** + * Returns a read-only subset + */ + // TODO: There is a known bug. Indeed, this implementation does not work + // since modifications to the read-write set are not reflected to the + // read-only set. + private ExtendedSet unmodifiableSubSet(T min, T max) + { + ExtendedSet res; + ExtendedSet range = AbstractExtendedSet.this.empty(); + if (min != null && max != null) { + range.fill(min, max); + range.remove(max); + res = AbstractExtendedSet.this.intersection(range).unmodifiable(); + } else if (max != null) { + range.add(max); + range.complement(); + res = AbstractExtendedSet.this.intersection(range).unmodifiable(); + } else { + range.add(min); + range.complement(); + res = AbstractExtendedSet.this.difference(range).unmodifiable(); + } + return res; + } + + // subset operations must be read-only + @Override + public ExtendedSet headSet(T toElement) {return unmodifiableSubSet(null, toElement);} + + @Override + public ExtendedSet subSet(T fromElement, T toElement) {return unmodifiableSubSet(fromElement, toElement);} + + @Override + public ExtendedSet tailSet(T fromElement) {return unmodifiableSubSet(fromElement, null);} + + @Override + public ExtendedSet unmodifiable() + { + // useless to create another instance + return this; + } + + @Override + protected ExtendedSet raw() + { + return AbstractExtendedSet.this; + } + } + + /** + * Used by {@link AbstractExtendedSet#headSet(T)} , {@link AbstractExtendedSet#tailSet(T)} and {@link AbstractExtendedSet#subSet(T, T)} to offer a restricted view of the entire set + */ + protected class ExtendedSubSet extends AbstractExtendedSet.FilteredSet + { + /** + * Minimun allowed element (included) and maximum allowed element + * (excluded) + */ + private final T min; + + /** + * Minimun allowed element (included) and maximum allowed element + * (excluded) + */ + private final T max; + + /** + * When max != null, it contains all elements from {@link #min} to {@link #max} - 1. Otherwise, it contains all the elements strictly below {@link #min} + * + * @uml.property name="range" + * @uml.associationEnd + */ + private final ExtendedSet range; + /** + * Comparator for elements of type T + */ + private final Comparator localComparator; + + + + /* + * PRIVATE UTILITY METHODS + */ + + // initialize the comparator + { + final Comparator c = AbstractExtendedSet.this.comparator(); + if (c != null) { + localComparator = c; + } else { + localComparator = new Comparator() + { + @SuppressWarnings("unchecked") + @Override + public int compare(T o1, T o2) + { + return ((Comparable) o1).compareTo(o2); + } + }; + } + } + + /** + * Creates the subset + * + * @param min minimun allowed element (included) + * @param max maximum allowed element (excluded) + */ + public ExtendedSubSet(T min, T max) + { + if (min == null && max == null) { + throw new IllegalArgumentException(); + } + + if (min != null && max != null + && localComparator.compare(min, max) > 0) { + throw new IllegalArgumentException("min > max"); + } + + this.min = min; + this.max = max; + + // add all elements that are strictly less than "max" + range = AbstractExtendedSet.this.empty(); + if (min != null && max != null) { + range.fill(min, max); + range.remove(max); + } else if (max != null) { + range.add(max); + range.complement(); + } else { + range.add(min); + range.complement(); + } + } + + /** + * Checks if a given set is completely contained within {@link #min} and + * {@link #max} + * + * @param other given set + * + * @return true if the given set is completely contained + * within {@link #min} and {@link #max} + */ + private boolean isInRange(ExtendedSet other) + { + return other.isEmpty() || + ((max == null || localComparator.compare(other.last(), max) < 0) + && (min == null || localComparator.compare(other.first(), min) >= 0)); + } + + /** + * Checks if a given element is completely contained within {@link #min} + * and {@link #max} + * + * @param e given element + * + * @return true if the given element is completely + * contained within {@link #min} and {@link #max} + */ + @SuppressWarnings("unchecked") + private boolean isInRange(Object e) + { + return (max == null || localComparator.compare((T) e, max) < 0) + && (min == null || localComparator.compare((T) e, min) >= 0); + } + + /** + * Generates a set that represent a subview of the given set, namely + * elements from {@link #min} (included) to {@link #max} (excluded) + * + * @param toFilter given set + * + * @return the subview + */ + private ExtendedSet filter(ExtendedSet toFilter) + { + if (isInRange(toFilter)) { + return toFilter; + } + if (max != null) { + return toFilter.intersection(range); + } + return toFilter.difference(range); + } + + + @Override + protected ExtendedSet raw() + { + return filter(AbstractExtendedSet.this); + } + + + + /* + * PUBLIC METHODS + */ + + @Override + public ExtendedSet headSet(T toElement) + { + if (localComparator.compare(toElement, max) > 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(min, toElement); + } + + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + if (localComparator.compare(fromElement, min) < 0 + || localComparator.compare(toElement, max) > 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, toElement); + } + + @Override + public ExtendedSet tailSet(T fromElement) + { + if (localComparator.compare(fromElement, min) < 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, max); + } + + @Override + public boolean addAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + if (!isInRange(other)) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.addAll(other); + } + + @Override + public boolean removeAll(Collection c) + { + if (c == null) { + return false; + } + return AbstractExtendedSet.this.removeAll(filter(convert(c))); + } + + @Override + public boolean retainAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + + if (isInRange(AbstractExtendedSet.this)) { + return AbstractExtendedSet.this.retainAll(other); + } + + int sizeBefore = AbstractExtendedSet.this.size(); + ExtendedSet res = AbstractExtendedSet.this.intersection(other); + clear(); + AbstractExtendedSet.this.addAll(res); + return AbstractExtendedSet.this.size() != sizeBefore; + } + + @Override + public boolean containsAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + return isInRange(other) && AbstractExtendedSet.this.containsAll(other); + } + + @Override + public boolean add(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.add(e); + } + + @Override + public void clear() + { + if (isInRange(AbstractExtendedSet.this)) { + AbstractExtendedSet.this.clear(); + } else if (max != null) { + AbstractExtendedSet.this.removeAll(range); + } else { + AbstractExtendedSet.this.retainAll(range); + } + } + + @Override + public boolean contains(Object o) + { + return o != null && isInRange(o) && AbstractExtendedSet.this.contains(o); + } + + @Override + public boolean remove(Object o) + { + return o != null && isInRange(o) && AbstractExtendedSet.this.remove(o); + } + + @Override + public int size() + { + if (isInRange(AbstractExtendedSet.this)) { + return AbstractExtendedSet.this.size(); + } + if (max != null) { + return AbstractExtendedSet.this.intersectionSize(range); + } + return AbstractExtendedSet.this.differenceSize(range); + } + + @Override + public void complement() + { + ExtendedSet c = complemented(); + clear(); + AbstractExtendedSet.this.addAll(c); + } + + @Override + public int complementSize() + { + return complemented().size(); + } + + @Override + public ExtendedSet complemented() + { + return filter(raw().complemented()); + } + + @Override + public String debugInfo() + { + return String.format("min = %s, max = %s\nmask = %s\nelements = %s", + min.toString(), max.toString(), range.debugInfo(), AbstractExtendedSet.this.toString() + ); + } + + @Override + public void clear(T from, T to) + { + ExtendedSet toRemove = empty(); + toRemove.fill(from, to); + removeAll(toRemove); + } + + @Override + public boolean containsAny(Collection other) + { + return AbstractExtendedSet.this.containsAny(filter(convert(other))); + } + + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return AbstractExtendedSet.this.containsAtLeast(filter(convert(other)), minElements); + } + + @Override + public Iterable descending() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return descendingIterator(); + } + }; + } + + @Override + public void fill(T from, T to) + { + if (!isInRange(from) || !isInRange(to)) { + throw new IllegalArgumentException(); + } + AbstractExtendedSet.this.fill(from, to); + } + + @Override + public void flip(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + AbstractExtendedSet.this.flip(e); + } + + @Override + public T get(int i) + { + int minIndex = 0; + if (min != null) { + minIndex = AbstractExtendedSet.this.indexOf(min); + } + T r = AbstractExtendedSet.this.get(minIndex + i); + if (!isInRange(r)) { + throw new IllegalArgumentException(); + } + return r; + } + + @Override + public int indexOf(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + int minIndex = 0; + if (min != null) { + minIndex = AbstractExtendedSet.this.indexOf(min); + } + return AbstractExtendedSet.this.indexOf(e) - minIndex; + } + + @Override + public ExtendedSet clone() + { + return raw(); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java new file mode 100755 index 000000000000..beaa52368adc --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java @@ -0,0 +1,592 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + + +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.LongSet; +import io.druid.extendedset.wrappers.matrix.PairSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; + +/** + * An interface which extends {@link SortedSet} by adding + * intersection/union/difference and other set operations. + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: ExtendedSet.java 140 2011-02-07 21:30:29Z cocciasik $ + * @see AbstractExtendedSet + * @see IndexedSet + * @see GenericExtendedSet + * @see ArraySet + * @see IntegerSet + * @see LongSet + * @see PairSet + */ +public interface ExtendedSet extends SortedSet, Cloneable, Comparable> +{ + /** + * Generates the intersection set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(java.util.Collection) + */ + public ExtendedSet intersection(Collection other); + + /** + * Generates the union set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(java.util.Collection) + */ + public ExtendedSet union(Collection other); + + /** + * Generates the difference set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(java.util.Collection) + */ + public ExtendedSet difference(Collection other); + + /** + * Generates the symmetric difference set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(Object) + */ + public ExtendedSet symmetricDifference(Collection other); + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see ExtendedSet#complement() + */ + public ExtendedSet complemented(); + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see ExtendedSet#complemented() + */ + public void complement(); + + /** + * Returns true if the specified {@link Collection} instance + * contains any elements that are also contained within this + * {@link ExtendedSet} instance + * + * @param other {@link ExtendedSet} to intersect with + * + * @return a boolean indicating whether this {@link ExtendedSet} intersects + * the specified {@link ExtendedSet}. + */ + public boolean containsAny(Collection other); + + /** + * Returns true if the specified {@link Collection} instance + * contains at least minElements elements that are also + * contained within this {@link ExtendedSet} instance + * + * @param other {@link Collection} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link ExtendedSet} instance + * + * @return a boolean indicating whether this {@link ExtendedSet} intersects + * the specified {@link Collection}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(Collection other, int minElements); + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(Collection)} and + * then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(Collection other); + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(Collection)} and then + * {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(Collection other); + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling + * {@link #symmetricDifference(Collection)} and then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(Collection other); + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(Collection)} and + * then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(Collection other); + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize(); + + /** + * Generates an empty set + * + * @return the empty set + */ + public ExtendedSet empty(); + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + public ExtendedSet clone(); + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio(); + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator(); + + /** + * Gets the descending order iterator over the elements of type + * T + * + * @return descending iterator + */ + public ExtendedIterator descendingIterator(); + + /** + * Allows to use the Java "for-each" statement in descending order + * + * @return {@link Iterable} instance to iterate items in descending + * order + */ + public Iterable descending(); + + /** + * Computes the power-set of the current set. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The returned power-set does not contain the + * empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the sorted set. + * + * @return the power-set + * + * @see #powerSet(int, int) + * @see #powerSetSize() + */ + public List> powerSet(); + + /** + * Computes a subset of the power-set of the current set, composed by those + * subsets that have cardinality between min and + * max. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The power-set does not contains the empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the sorted set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set + * + * @see #powerSet() + * @see #powerSetSize(int, int) + */ + public List> powerSet(int min, int max); + + /** + * Computes the power-set size of the current set. + *

+ * The power-set does not contains the empty set. + * + * @return the power-set size + * + * @see #powerSet() + */ + public int powerSetSize(); + + /** + * Computes the power-set size of the current set, composed by those subsets + * that have cardinality between min and max. + *

+ * The returned power-set does not contain the empty set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set size + * + * @see #powerSet(int, int) + */ + public int powerSetSize(int min, int max); + + /** + * Prints debug info about the given {@link ExtendedSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo(); + + /** + * Adds to the set all the elements between first and + * last, both included. It supposes that there is an ordering + * of the elements of type T and that the universe of all + * possible elements is known. + * + * @param from first element + * @param to last element + */ + public void fill(T from, T to); + + /** + * Removes from the set all the elements between first and + * last, both included. It supposes that there is an ordering + * of the elements of type T and that the universe of all + * possible elements is known. + * + * @param from first element + * @param to last element + */ + public void clear(T from, T to); + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(Collection) + */ + public void flip(T e); + + /** + * Gets the read-only version of the current set + * + * @return the read-only version of the current set + */ + public ExtendedSet unmodifiable(); + + /** + * Gets the ith element of the set + * + * @param i position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public T get(int i); + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param e element of the set + * + * @return the element position + */ + public int indexOf(T e); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement); + + /** + * Converts a given {@link Collection} instance into an instance of the + * current class. NOTE: when the collection is already an instance of + * the current class, the method returns the collection itself. + * + * @param c collection to use to generate the new instance + * + * @return the converted collection + * + * @see #convert(Object...) + */ + public ExtendedSet convert(Collection c); + + /** + * Converts a given integer array into an instance of the current class + * + * @param e objects to use to generate the new instance + * + * @return the converted collection + * + * @see #convert(Collection) + */ + public ExtendedSet convert(Object... e); + + /** + * Computes the Jaccard similarity coefficient between this set and the + * given set. + *

+ * The coefficient is defined as + * |A intersection B| / |A union B|. + * + * @param other the other set + * + * @return the Jaccard similarity coefficient + * + * @see #jaccardDistance(ExtendedSet) + */ + public double jaccardSimilarity(ExtendedSet other); + + /** + * Computes the Jaccard distance between this set and the given set. + *

+ * The coefficient is defined as + * 1 - {@link #jaccardSimilarity(ExtendedSet)}. + * + * @param other the other set + * + * @return the Jaccard distance + * + * @see #jaccardSimilarity(ExtendedSet) + */ + public double jaccardDistance(ExtendedSet other); + + /** + * Computes the weighted version of the Jaccard similarity coefficient + * between this set and the given set. + *

+ * The coefficient is defined as + * sum of min(A_i, B_i) / sum of max(A_i, B_i). + *

+ * NOTE: T must be a number, namely one of + * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, + * {@link Long}, {@link Short}. + * + * @param other the other set + * + * @return the weighted Jaccard similarity coefficient + * + * @throws IllegalArgumentException if T is not a number + * @see #weightedJaccardDistance(ExtendedSet) + */ + public double weightedJaccardSimilarity(ExtendedSet other); + + /** + * Computes the weighted version of the Jaccard distance between this set + * and the given set. + *

+ * The coefficient is defined as 1 - + * {@link #weightedJaccardSimilarity(ExtendedSet)}. + *

+ * NOTE: T must be a number, namely one of + * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, + * {@link Long}, {@link Short}. + * + * @param other the other set + * + * @return the weighted Jaccard distance + * + * @throws IllegalArgumentException if T is not a number + * @see #weightedJaccardSimilarity(ExtendedSet) + */ + public double weightedJaccardDistance(ExtendedSet other); + + /** + * Compares this object with the specified object for order. Returns a + * negative integer, zero, or a positive integer as this object is less + * than, equal to, or greater than the specified object. An {@link IntSet} + * instance A is less than another {@link IntSet} instance + * B if B-A (that is, the elements in + * B that are not contained in A) contains at + * least one element that is greater than all the elements in + * A-B. + *

+ *

+ * The implementor must ensure sgn(x.compareTo(y)) == + * -sgn(y.compareTo(x)) for all x and y. (This + * implies that x.compareTo(y) must throw an exception iff + * y.compareTo(x) throws an exception.) + *

+ *

+ * The implementor must also ensure that the relation is transitive: + * (x.compareTo(y)>0 && y.compareTo(z)>0) implies + * x.compareTo(z)>0. + *

+ *

+ * Finally, the implementor must ensure that x.compareTo(y)==0 + * implies that sgn(x.compareTo(z)) == sgn(y.compareTo(z)), for all + * z. + *

+ *

+ * It is strongly recommended, but not strictly required that + * (x.compareTo(y)==0) == (x.equals(y)). Generally speaking, any + * class that implements the Comparable interface and violates this + * condition should clearly indicate this fact. The recommended language is + * "Note: this class has a natural ordering that is inconsistent with + * equals." + *

+ *

+ * In the foregoing description, the notation sgn(expression + * ) designates the mathematical signum function, which is + * defined to return one of -1, 0, or 1 according + * to whether the value of expression is negative, zero or positive. + * + * @param o the object to be compared. + * + * @return a negative integer, zero, or a positive integer as this object is + * less than, equal to, or greater than the specified object. + * + * @throws ClassCastException if the specified object's type prevents it from being + * compared to this object. + */ + @Override + public int compareTo(ExtendedSet o); + + /** + * Extended version of the {@link Iterator} interface that allows to "skip" + * some elements of the set + * + * @param the type of elements maintained by this set + */ + public interface ExtendedIterator extends Iterator + { + /** + * Skips all the elements before the the specified element, so that + * {@link Iterator#next()} gives the given element or, if it does not + * exist, the element immediately after according to the sorting + * provided by this {@link SortedSet} instance. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(X element); + } +} + + diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java new file mode 100755 index 000000000000..48805215ee18 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java @@ -0,0 +1,744 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * This class provides a skeletal implementation of the {@link IntSet} + * interface to minimize the effort required to implement this interface. + * + * @author Alessandro Colantonio + * @version $Id: AbstractIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public abstract class AbstractIntSet implements IntSet +{ + /** + * {@inheritDoc} + */ + @Override + public IntSet union(IntSet other) + { + IntSet res = clone(); + res.addAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet difference(IntSet other) + { + IntSet res = clone(); + res.removeAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet intersection(IntSet other) + { + IntSet res = clone(); + res.retainAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet symmetricDifference(IntSet c) + { + IntSet res = clone(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.flip(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet complemented() + { + IntSet res = clone(); + res.complement(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + for (int e = last(); e >= 0; e--) { + flip(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + IntIterator itr = c.iterator(); + boolean res = true; + while (res && itr.hasNext()) { + res &= contains(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + IntIterator itr = c.iterator(); + boolean res = true; + while (res && itr.hasNext()) { + if (contains(itr.next())) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + IntIterator itr = c.iterator(); + while (minElements > 0 && itr.hasNext()) { + if (contains(itr.next())) { + minElements--; + } + } + return minElements == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + int res = 0; + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + if (contains(itr.next())) { + res++; + } + } + return res; + + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(IntSet other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(IntSet other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(IntSet other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + if (isEmpty()) { + return 0; + } + return last() - size() + 1; + } + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet empty(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet clone(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double bitmapCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntIterator iterator(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntIterator descendingIterator(); + + /** + * {@inheritDoc} + */ + @Override + public abstract String debugInfo(); + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + IntIterator itr = iterator(); + while (itr.hasNext()) { + itr.next(); + itr.remove(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + for (int e = from; e <= to; e++) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + for (int e = from; e <= to; e++) { + add(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public abstract int get(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract int indexOf(int e); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet convert(int... a); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet convert(Collection c); + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return iterator().next(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract int last(); + + /** + * {@inheritDoc} + */ + @Override + public abstract int size(); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean isEmpty(); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean contains(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean add(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean remove(int i); + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = c.iterator(); + boolean res = false; + while (itr.hasNext()) { + res |= add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = c.iterator(); + boolean res = false; + while (itr.hasNext()) { + res |= remove(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = iterator(); + boolean res = false; + while (itr.hasNext()) { + int e = itr.next(); + if (!c.contains(e)) { + res = true; + itr.remove(); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray() + { + if (isEmpty()) { + return null; + } + return toArray(new int[size()]); + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) + { + if (a.length < size()) { + a = new int[size()]; + } + IntIterator itr = iterator(); + int i = 0; + while (itr.hasNext()) { + a[i++] = itr.next(); + } + for (; i < a.length; i++) { + a[i] = 0; + } + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + IntIterator itr = iterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + int e = itr.next(); + sb.append(e); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + IntIterator thisIterator = this.descendingIterator(); + IntIterator otherIterator = o.descendingIterator(); + while (thisIterator.hasNext() && otherIterator.hasNext()) { + int thisItem = thisIterator.next(); + int otherItem = otherIterator.next(); + if (thisItem < otherItem) { + return -1; + } + if (thisItem > otherItem) { + return 1; + } + } + return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + + // special cases + List res = new ArrayList(); + if (size() < min) { + return res; + } + if (size() == min) { + res.add(clone()); + return res; + } + if (size() == min + 1) { + IntIterator itr = descendingIterator(); + while (itr.hasNext()) { + IntSet set = clone(); + set.remove(itr.next()); + res.add(set); + } + if (max > min) { + res.add(clone()); + } + return res; + } + + // the first level contains only one prefix made up of all 1-subsets + List> level = new ArrayList>(); + level.add(new ArrayList()); + IntIterator itr = iterator(); + while (itr.hasNext()) { + IntSet single = empty(); + single.add(itr.next()); + level.get(0).add(single); + } + if (min == 1) { + res.addAll(level.get(0)); + } + + // all combinations + int lvl = 2; + while (!level.isEmpty() && lvl <= max) { + List> newLevel = new ArrayList>(); + for (List prefix : level) { + for (int i = 0; i < prefix.size() - 1; i++) { + List newPrefix = new ArrayList(); + for (int j = i + 1; j < prefix.size(); j++) { + IntSet x = prefix.get(i).clone(); + x.add(prefix.get(j).last()); + newPrefix.add(x); + if (lvl >= min) { + res.add(x); + } + } + if (newPrefix.size() > 1) { + newLevel.add(newPrefix); + } + } + } + level = newLevel; + lvl++; + } + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() + { + return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + final int size = size(); + + // special cases + if (size < min) { + return 0; + } + if (size == min) { + return 1; + } + + /* + * Compute the sum of binomial coefficients ranging from (size choose + * max) to (size choose min) using dynamic programming + */ + + // trivial cases + max = Math.min(size, max); + if (max == min && (max == 0 || max == size)) { + return 1; + } + + // compute all binomial coefficients for "n" + int[] b = new int[size + 1]; + for (int i = 0; i <= size; i++) { + b[i] = 1; + } + for (int i = 1; i <= size; i++) { + for (int j = i - 1; j > 0; j--) { + b[j] += b[j - 1]; + } + } + + // sum binomial coefficients + int res = 0; + for (int i = min; i <= max; i++) { + res += b[i]; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(IntSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + int inters = intersectionSize(other); + return (double) inters / (size() + other.size() - inters); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(IntSet other) + { + return 1D - jaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(IntSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + IntIterator itr = intersection(other).iterator(); + double intersectionSum = 0D; + while (itr.hasNext()) { + intersectionSum += itr.next(); + } + + itr = symmetricDifference(other).iterator(); + double symmetricDifferenceSum = 0D; + while (itr.hasNext()) { + symmetricDifferenceSum += itr.next(); + } + + return intersectionSum / (intersectionSum + symmetricDifferenceSum); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(IntSet other) + { + return 1D - weightedJaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + // special cases + if (this == obj) { + return true; + } + if (!(obj instanceof IntSet)) { + return false; + } + if (size() != ((IntSet) obj).size()) { + return false; + } + + // compare all the integrals, according to their natural order + IntIterator itr1 = iterator(); + IntIterator itr2 = ((IntSet) obj).iterator(); + while (itr1.hasNext()) { + if (itr1.next() != itr2.next()) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + int h = 1; + IntIterator itr = iterator(); + if (!itr.hasNext()) { + h = (h << 5) - h + itr.next(); + } + return h; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java new file mode 100755 index 000000000000..aee867240a9a --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java @@ -0,0 +1,1157 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * {@link IntSet}-based class internally managed by a sorted array of + * ints. + * + * @author Alessandro Colantonio + * @version $Id: ArraySet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public class ArraySet extends AbstractIntSet +{ + /** + * elements of the set + */ + private int[] elements; + + /** + * set cardinality + */ + private int size; + + /** + * Empty-set constructor + */ + public ArraySet() + { + size = 0; + elements = null; + } + + /** + * Replace the content of the current instance with the content of another + * instance + * + * @param other + */ + private void replaceWith(ArraySet other) + { + size = other.size; + elements = other.elements; + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return size() / Math.ceil(elements[size - 1] / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet empty() + { + return new ArraySet(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new IntIterator() + { + int next = 0; + + @Override + public void skipAllBefore(int e) + { + if (e <= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, next + 1, size, e); + if (next < 0) { + next = -(next + 1); + } + } + + @Override + public boolean hasNext() + { + return next < size; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next++]; + } + + @Override + public void remove() + { + next--; + size--; + System.arraycopy(elements, next + 1, elements, next, size - next); + compact(); + } + + @Override + public IntIterator clone() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new IntIterator() + { + int next = size - 1; + + @Override + public void skipAllBefore(int e) + { + if (e >= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, 0, next, e); + if (next < 0) { + next = -(next + 1) - 1; + } + } + + @Override + public boolean hasNext() + { + return next >= 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next--]; + } + + @Override + public void remove() + { + next++; + size--; + System.arraycopy(elements, next + 1, elements, next, size - next); + compact(); + } + + @Override + public IntIterator clone() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + ArraySet c = empty(); + if (!isEmpty()) { + c.elements = Arrays.copyOf(elements, elements.length); + c.size = size; + } + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return toString(); + } + + /** + * Assures that the size of {@link #elements} is sufficient to contain + * {@link #size} elements. + */ + private void ensureCapacity() + { + int capacity = elements == null ? 0 : elements.length; + if (capacity >= size) { + return; + } + capacity = Math.max(capacity << 1, size); + + if (elements == null) { + // nothing to copy + elements = new int[capacity]; + return; + } + elements = Arrays.copyOf(elements, capacity); + } + + /** + * Removes unused allocated words at the end of {@link #words} only when they + * are more than twice of the needed space + */ + private void compact() + { + if (size == 0) { + elements = null; + return; + } + if (elements != null && (size << 1) < elements.length) { + elements = Arrays.copyOf(elements, size); + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int element) + { + // append + if (isEmpty() || elements[size - 1] < element) { + size++; + ensureCapacity(); + elements[size - 1] = element; + return true; + } + + // insert + int pos = Arrays.binarySearch(elements, 0, size, element); + if (pos >= 0) { + return false; + } + + size++; + ensureCapacity(); + pos = -(pos + 1); + System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); + elements[pos] = element; + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int element) + { + if (element < 0) { + return false; + } + + int pos = Arrays.binarySearch(elements, 0, size, element); + if (pos < 0) { + return false; + } + + size--; + System.arraycopy(elements, pos + 1, elements, pos, size - pos); + compact(); + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int element) + { + // first + if (isEmpty()) { + size++; + ensureCapacity(); + elements[size - 1] = element; + return; + } + + int pos = Arrays.binarySearch(elements, 0, size, element); + + // add + if (pos < 0) { + size++; + ensureCapacity(); + pos = -(pos + 1); + System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); + elements[pos] = element; + return; + } + + // remove + size--; + System.arraycopy(elements, pos + 1, elements, pos, size - pos); + compact(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int element) + { + if (isEmpty()) { + return false; + } + return Arrays.binarySearch(elements, 0, size, element) >= 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ArraySet o = convert(c); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + if (thisElements[thisIndex] > otherElements[otherIndex]) { + return false; + } + } + return otherIndex == otherSize - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return false; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + } + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || other == null || other.isEmpty() || isEmpty()) { + return false; + } + if (this == other) { + return size() >= minElements; + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int res = 0; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return false; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + } + res++; + if (res >= minElements) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + ArraySet res = union(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + ArraySet res = intersection(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + ArraySet res = difference(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + final int[] thisElements = elements; // faster + int h = 1; + for (int i = 0; i < size; i++) { + h = (h << 5) - h + thisElements[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof ArraySet)) { + return super.equals(obj); + } + final ArraySet other = (ArraySet) obj; + if (size != other.size) { + return false; + } + final int[] thisElements = elements; // faster + final int[] otherElements = other.elements; // faster + for (int i = 0; i < size; i++) { + if (thisElements[i] != otherElements[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return size == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + elements = null; + size = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return elements[0]; + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return elements[size - 1]; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0; + } + if (this == other) { + return size(); + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int res = 0; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return res; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return res; + } + thisIndex++; + } + } + res++; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[Math.min(size, otherSize)]; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + thisIndex++; + } + } + resElements[resSize++] = thisElements[thisIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet union(IntSet other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + ArraySet cloned = convert(other); + if (cloned == other) { + cloned = cloned.clone(); + } + return cloned; + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size + otherSize]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + resElements[resSize++] = otherElements[otherIndex]; + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + resElements[resSize++] = otherElements[otherIndex]; + break mainLoop; + } + thisIndex++; + } + } + resElements[resSize++] = thisElements[thisIndex]; + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + while (otherIndex < otherSize - 1) { + resElements[resSize++] = otherElements[++otherIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet difference(IntSet other) + { + if (isEmpty() || this == other) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + break mainLoop; + } + thisIndex++; + } + } + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet symmetricDifference(IntSet other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + return convert(other).clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size + otherSize]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + resElements[resSize++] = otherElements[otherIndex]; + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + resElements[resSize++] = otherElements[otherIndex]; + break mainLoop; + } + thisIndex++; + } + } + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + while (otherIndex < otherSize - 1) { + resElements[resSize++] = otherElements[++otherIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + + IntIterator thisItr = clone().iterator(); // avoid concurrency + elements = new int[complementSize()]; + final int[] thisElements = elements; // faster + size = 0; + int u = -1; + while (thisItr.hasNext()) { + int c = thisItr.next(); + while (++u < c) { + thisElements[size++] = u; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + add(from); + return; + } + + int[] thisElements = elements; // faster + + if (isEmpty()) { + size = to - from + 1; + ensureCapacity(); + thisElements = elements; + for (int i = 0; i < size; i++) { + thisElements[i] = from++; + } + return; + } + + // increase capacity, if necessary + int posFrom = Arrays.binarySearch(thisElements, 0, size, from); + boolean fromMissing = posFrom < 0; + if (fromMissing) { + posFrom = -posFrom - 1; + } + + int posTo = Arrays.binarySearch(thisElements, posFrom, size, to); + boolean toMissing = posTo < 0; + if (toMissing) { + posTo = -posTo - 1; + } + + int delta = 0; + if (toMissing || (fromMissing && (posFrom == posTo + 1))) { + delta = 1; + } + + int gap = to - from; + delta += gap - (posTo - posFrom); + if (delta > 0) { + size += delta; + ensureCapacity(); + thisElements = elements; + System.arraycopy(thisElements, posTo, thisElements, posTo + delta, size - delta - posTo); + posTo = posFrom + gap; + + // set values + for (int i = posFrom; i <= posTo; i++) { + thisElements[i] = from++; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + if (isEmpty()) { + return; + } + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + remove(from); + return; + } + + int posFrom = Arrays.binarySearch(elements, 0, size, from); + if (posFrom < 0) { + posFrom = -posFrom - 1; + } + if (posFrom >= size) { + return; + } + int posTo = Arrays.binarySearch(elements, posFrom, size, to); + if (posTo >= 0) { + posTo++; + } else { + posTo = -posTo - 1; + } + if (posFrom == posTo) { + return; + } + System.arraycopy(elements, posTo, elements, posFrom, size - posTo); + size -= posTo - posFrom; + } + + /** + * Convert a generic {@link IntSet} instance to an {@link ArraySet} instance + * + * @param c + * + * @return + */ + private ArraySet convert(IntSet c) + { + if (c instanceof ArraySet) { + return (ArraySet) c; + } + + int[] resElements = new int[c.size()]; + int resSize = 0; + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + resElements[resSize++] = itr.next(); + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet convert(int... a) + { + int[] resElements = null; + int resSize = 0; + int last = -1; + if (a != null) { + resElements = new int[a.length]; + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + if (a[0] < 0) { + throw new ArrayIndexOutOfBoundsException(Integer.toString(a[0])); + } + for (int i : a) { + if (last != i) { + resElements[resSize++] = last = i; + } + } + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet convert(Collection c) + { + Collection sorted; + int[] resElements = null; + int resSize = 0; + int last = -1; + if (c != null) { + resElements = new int[c.size()]; + if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { + sorted = c; + } else { + sorted = new ArrayList(c); + Collections.sort((List) sorted); + int first = ((ArrayList) sorted).get(0).intValue(); + if (first < 0) { + throw new ArrayIndexOutOfBoundsException(Integer.toString(first)); + } + } + for (int i : sorted) { + if (last != i) { + resElements[resSize++] = last = i; + } + } + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet complemented() + { + ArraySet res = clone(); + res.complement(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + if (i < 0 || i >= size) { + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + return elements[i]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + int pos = Arrays.binarySearch(elements, 0, size, e); + if (pos < 0) { + return -1; + } + return pos; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java new file mode 100755 index 000000000000..b3f3d87bc0c4 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java @@ -0,0 +1,3178 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.utilities.BitCount; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.ConcurrentModificationException; +import java.util.Formatter; +import java.util.List; +import java.util.Locale; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * This is CONCISE: COmpressed 'N' Composable Integer SEt. + *

+ * This class is an instance of {@link IntSet} internally represented by + * compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm. + * See http + * ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf for more + * details. + *

+ * Notice that the iterator by {@link #iterator()} is fail-fast, + * similar to most {@link Collection}-derived classes. If the set is + * structurally modified at any time after the iterator is created, the iterator + * will throw a {@link ConcurrentModificationException}. Thus, in the face of + * concurrent modification, the iterator fails quickly and cleanly, rather than + * risking arbitrary, non-deterministic behavior at an undetermined time in the + * future. The iterator throws a {@link ConcurrentModificationException} on a + * best-effort basis. Therefore, it would be wrong to write a program that + * depended on this exception for its correctness: the fail-fast behavior of + * iterators should be used only to detect bugs. + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class ConciseSet extends AbstractIntSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 560068054685367266L; + /** + * true if the class must simulate the behavior of WAH + */ + private final boolean simulateWAH; + /** + * User for fail-fast iterator. It counts the number of operations + * that do modify {@link #words} + */ + protected transient volatile int modCount = 0; + /** + * This is the compressed bitmap, that is a collection of words. For each + * word: + *

    + *
  • 1* (0x80000000) means that it is a 31-bit literal. + *
  • 00* (0x00000000) indicates a sequence made up of at + * most one set bit in the first 31 bits, and followed by blocks of 31 0's. + * The following 5 bits (00xxxxx*) indicates which is the set bit ( + * 00000 = no set bit, 00001 = LSB, 11111 = MSB), + * while the remaining 25 bits indicate the number of following 0's blocks. + *
  • 01* (0x40000000) indicates a sequence made up of at + * most one unset bit in the first 31 bits, and followed by blocks of + * 31 1's. (see the 00* case above). + *
+ *

+ * Note that literal words 0xFFFFFFFF and 0x80000000 are allowed, thus + * zero-length sequences (i.e., such that getSequenceCount() == 0) cannot + * exists. + */ + private int[] words; + /** + * Most significant set bit within the uncompressed bit string. + */ + private transient int last; + /** + * Cached cardinality of the bit-set. Defined for efficient {@link #size()} + * calls. When -1, the cache is invalid. + */ + private transient int size; + /** + * Index of the last word in {@link #words} + */ + private transient int lastWordIndex; + + /** + * Creates an empty integer set + */ + public ConciseSet() + { + this(false); + } + + /** + * Creates an empty integer set + * + * @param simulateWAH true if the class must simulate the behavior of + * WAH + */ + public ConciseSet(boolean simulateWAH) + { + this.simulateWAH = simulateWAH; + reset(); + } + + public ConciseSet(int[] words, boolean simulateWAH) + { + this.words = words; + this.lastWordIndex = isEmpty() ? -1 : words.length - 1; + this.size = -1; + updateLast(); + this.simulateWAH = simulateWAH; + } + + /** + * Calculates the modulus division by 31 in a faster way than using n % 31 + *

+ * This method of finding modulus division by an integer that is one less + * than a power of 2 takes at most O(lg(32)) time. The number of operations + * is at most 12 + 9 * ceil(lg(32)). + *

+ * See http://graphics.stanford.edu/~seander/bithacks.html + * + * @param n number to divide + * + * @return n % 31 + */ + private static int maxLiteralLengthModulus(int n) + { + int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); + m = (m >>> 15) + (m & 0x00007FFF); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + return m == 31 ? 0 : m; + } + + /** + * Calculates the multiplication by 31 in a faster way than using n * 31 + * + * @param n number to multiply + * + * @return n * 31 + */ + private static int maxLiteralLengthMultiplication(int n) + { + return (n << 5) - n; + } + + /** + * Calculates the division by 31 + * + * @param n number to divide + * + * @return n / 31 + */ + private static int maxLiteralLengthDivision(int n) + { + return n / 31; + } + + /** + * Checks whether a word is a literal one + * + * @param word word to check + * + * @return true if the given word is a literal word + */ + private static boolean isLiteral(int word) + { + // "word" must be 1* + // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" + return (word & 0x80000000) != 0; + } + + /** + * Checks whether a word contains a sequence of 1's + * + * @param word word to check + * + * @return true if the given word is a sequence of 1's + */ + private static boolean isOneSequence(int word) + { + // "word" must be 01* + return (word & 0xC0000000) == ConciseSetUtils.SEQUENCE_BIT; + } + + /** + * Checks whether a word contains a sequence of 0's + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's + */ + private static boolean isZeroSequence(int word) + { + // "word" must be 00* + return (word & 0xC0000000) == 0; + } + + /** + * Checks whether a word contains a sequence of 0's with no set bit, or 1's + * with no unset bit. + *

+ * NOTE: when {@link #simulateWAH} is true, it is + * equivalent to (and as fast as) !{@link #isLiteral(int)} + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's or 1's + * but with no (un)set bit + */ + private static boolean isSequenceWithNoBits(int word) + { + // "word" must be 0?00000* + return (word & 0xBE000000) == 0x00000000; + } + + /** + * Gets the number of blocks of 1's or 0's stored in a sequence word + * + * @param word word to check + * + * @return the number of blocks that follow the first block of 31 bits + */ + private static int getSequenceCount(int word) + { + // get the 25 LSB bits + return word & 0x01FFFFFF; + } + + /** + * Clears the (un)set bit in a sequence + * + * @param word word to check + * + * @return the sequence corresponding to the given sequence and with no + * (un)set bits + */ + private static int getSequenceWithNoBits(int word) + { + // clear 29 to 25 LSB bits + return (word & 0xC1FFFFFF); + } + + /** + * Gets the position of the flipped bit within a sequence word. If the + * sequence has no set/unset bit, returns -1. + *

+ * Note that the parameter must a sequence word, otherwise the + * result is meaningless. + * + * @param word sequence word to check + * + * @return the position of the set bit, from 0 to 31. If the sequence has no + * set/unset bit, returns -1. + */ + private static int getFlippedBit(int word) + { + // get bits from 30 to 26 + // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set + return ((word >>> 25) & 0x0000001F) - 1; + } + + /** + * Gets the number of set bits within the literal word + * + * @param word literal word + * + * @return the number of set bits within the literal word + */ + private static int getLiteralBitCount(int word) + { + return BitCount.count(getLiteralBits(word)); + } + + /** + * Gets the bits contained within the literal word + * + * @param word literal word + * + * @return the literal word with the most significant bit cleared + */ + private static int getLiteralBits(int word) + { + return ConciseSetUtils.ALL_ONES_WITHOUT_MSB & word; + } + + /** + * Returns true when the given 31-bit literal string (namely, + * with MSB set) contains only one set bit + * + * @param literal literal word (namely, with MSB unset) + * + * @return true when the given literal contains only one set + * bit + */ + private static boolean containsOnlyOneBit(int literal) + { + return (literal & (literal - 1)) == 0; + } + + /** + * Generates the 32-bit binary representation of a given word (debug only) + * + * @param word word to represent + * + * @return 32-character string that represents the given word + */ + private static String toBinaryString(int word) + { + String lsb = Integer.toBinaryString(word); + StringBuilder pad = new StringBuilder(); + for (int i = lsb.length(); i < 32; i++) { + pad.append('0'); + } + return pad.append(lsb).toString(); + } + + /** + * Resets to an empty set + * + * @see #ConciseSet() + * {@link #clear()} + */ + private void reset() + { + modCount++; + words = null; + last = -1; + size = 0; + lastWordIndex = -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet clone() + { + if (isEmpty()) { + return empty(); + } + + // NOTE: do not use super.clone() since it is 10 times slower! + ConciseSet res = empty(); + res.last = last; + res.lastWordIndex = lastWordIndex; + res.modCount = 0; + res.size = size; + res.words = Arrays.copyOf(words, lastWordIndex + 1); + return res; + } + + /** + * Gets the literal word that represents the first 31 bits of the given the + * word (i.e. the first block of a sequence word, or the bits of a literal word). + *

+ * If the word is a literal, it returns the unmodified word. In case of a + * sequence, it returns a literal that represents the first 31 bits of the + * given sequence word. + * + * @param word word to check + * + * @return the literal contained within the given word, with the most + * significant bit set to 1. + */ + private /*static*/ int getLiteral(int word) + { + if (isLiteral(word)) { + return word; + } + + if (simulateWAH) { + return isZeroSequence(word) ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL; + } + + // get bits from 30 to 26 and use them to set the corresponding bit + // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent + // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set + int literal = (1 << (word >>> 25)) >>> 1; + return isZeroSequence(word) + ? (ConciseSetUtils.ALL_ZEROS_LITERAL | literal) + : (ConciseSetUtils.ALL_ONES_LITERAL & ~literal); + } + + /** + * Clears bits from MSB (excluded, since it indicates the word type) to the + * specified bit (excluded). Last word is supposed to be a literal one. + * + * @param lastSetBit leftmost bit to preserve + */ + private void clearBitsAfterInLastWord(int lastSetBit) + { + words[lastWordIndex] &= ConciseSetUtils.ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); + } + + /** + * Assures that the length of {@link #words} is sufficient to contain + * the given index. + */ + private void ensureCapacity(int index) + { + int capacity = words == null ? 0 : words.length; + if (capacity > index) { + return; + } + capacity = Math.max(capacity << 1, index + 1); + + if (words == null) { + // nothing to copy + words = new int[capacity]; + return; + } + words = Arrays.copyOf(words, capacity); + } + + /** + * Removes unused allocated words at the end of {@link #words} only when they + * are more than twice of the needed space + */ + private void compact() + { + if (words != null && ((lastWordIndex + 1) << 1) < words.length) { + words = Arrays.copyOf(words, lastWordIndex + 1); + } + } + + /** + * Sets the bit at the given absolute position within the uncompressed bit + * string. The bit must be appendable, that is it must represent an + * integer that is strictly greater than the maximum integer in the set. + * Note that the parameter range check is performed by the public method + * {@link #add(Integer)} and not in this method. + *

+ * NOTE: This method assumes that the last element of {@link #words} + * (i.e. getLastWord()) must be one of the + * following: + *

    + *
  • a literal word with at least one set bit; + *
  • a sequence of ones. + *
+ * Hence, the last word in {@link #words} cannot be: + *
    + *
  • a literal word containing only zeros; + *
  • a sequence of zeros. + *
+ * + * @param i the absolute position of the bit to set (i.e., the integer to add) + */ + private void append(int i) + { + // special case of empty set + if (isEmpty()) { + int zeroBlocks = maxLiteralLengthDivision(i); + if (zeroBlocks == 0) { + words = new int[1]; + lastWordIndex = 0; + } else if (zeroBlocks == 1) { + words = new int[2]; + lastWordIndex = 1; + words[0] = ConciseSetUtils.ALL_ZEROS_LITERAL; + } else { + words = new int[2]; + lastWordIndex = 1; + words[0] = zeroBlocks - 1; + } + last = i; + size = 1; + words[lastWordIndex] = ConciseSetUtils.ALL_ZEROS_LITERAL | (1 << maxLiteralLengthModulus(i)); + return; + } + + // position of the next bit to set within the current literal + int bit = maxLiteralLengthModulus(last) + i - last; + + // if we are outside the current literal, add zeros in + // between the current word and the new 1-bit literal word + if (bit >= ConciseSetUtils.MAX_LITERAL_LENGTH) { + int zeroBlocks = maxLiteralLengthDivision(bit) - 1; + bit = maxLiteralLengthModulus(bit); + if (zeroBlocks == 0) { + ensureCapacity(lastWordIndex + 1); + } else { + ensureCapacity(lastWordIndex + 2); + appendFill(zeroBlocks, 0); + } + appendLiteral(ConciseSetUtils.ALL_ZEROS_LITERAL | 1 << bit); + } else { + words[lastWordIndex] |= 1 << bit; + if (words[lastWordIndex] == ConciseSetUtils.ALL_ONES_LITERAL) { + lastWordIndex--; + appendLiteral(ConciseSetUtils.ALL_ONES_LITERAL); + } + } + + // update other info + last = i; + if (size >= 0) { + size++; + } + } + + /** + * Append a literal word after the last word + * + * @param word the new literal word. Note that the leftmost bit must + * be set to 1. + */ + private void appendLiteral(int word) + { + // when we have a zero sequence of the maximum lenght (that is, + // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen + // that we try to append a zero literal because the result of the given operation must be an + // empty set. Whitout the following test, we would have increased the + // counter of the zero sequence, thus obtaining 0x02000000 that + // represents a sequence with the first bit set! + if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) { + return; + } + + // first addition + if (lastWordIndex < 0) { + words[lastWordIndex = 0] = word; + return; + } + + final int lastWord = words[lastWordIndex]; + if (word == ConciseSetUtils.ALL_ZEROS_LITERAL) { + if (lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { + words[lastWordIndex] = 1; + } else if (isZeroSequence(lastWord)) { + words[lastWordIndex]++; + } else if (!simulateWAH && containsOnlyOneBit(getLiteralBits(lastWord))) { + words[lastWordIndex] = 1 | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); + } else { + words[++lastWordIndex] = word; + } + } else if (word == ConciseSetUtils.ALL_ONES_LITERAL) { + if (lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1; + } else if (isOneSequence(lastWord)) { + words[lastWordIndex]++; + } else if (!simulateWAH && containsOnlyOneBit(~lastWord)) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1 | ((1 + Integer.numberOfTrailingZeros(~lastWord)) + << 25); + } else { + words[++lastWordIndex] = word; + } + } else { + words[++lastWordIndex] = word; + } + } + + /** + * Append a sequence word after the last word + * + * @param length sequence length + * @param fillType sequence word with a count that equals 0 + */ + private void appendFill(int length, int fillType) + { + assert length > 0; + assert lastWordIndex >= -1; + + fillType &= ConciseSetUtils.SEQUENCE_BIT; + + // it is actually a literal... + if (length == 1) { + appendLiteral(fillType == 0 ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL); + return; + } + + // empty set + if (lastWordIndex < 0) { + words[lastWordIndex = 0] = fillType | (length - 1); + return; + } + + final int lastWord = words[lastWordIndex]; + if (isLiteral(lastWord)) { + if (fillType == 0 && lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { + words[lastWordIndex] = length; + } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length; + } else if (!simulateWAH) { + if (fillType == 0 && containsOnlyOneBit(getLiteralBits(lastWord))) { + words[lastWordIndex] = length | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); + } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && containsOnlyOneBit(~lastWord)) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length | ((1 + Integer.numberOfTrailingZeros(~lastWord)) + << 25); + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } else { + if ((lastWord & 0xC0000000) == fillType) { + words[lastWordIndex] += length; + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } + } + + /** + * Recalculate a fresh value for {@link ConciseSet#last} + */ + private void updateLast() + { + if (isEmpty()) { + last = -1; + return; + } + + last = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + } + } + + int w = words[lastWordIndex]; + if (isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); + } else { + last--; + } + } + + /** + * Performs the given operation over the bit-sets + * + * @param other {@link ConciseSet} instance that represents the right + * operand + * @param operator operator + * + * @return the result of the operation + */ + private ConciseSet performOperation(ConciseSet other, Operator operator) + { + // non-empty arguments + if (this.isEmpty() || other.isEmpty()) { + return operator.combineEmptySets(this, other); + } + + // if the two operands are disjoint, the operation is faster + ConciseSet res = operator.combineDisjointSets(this, other); + if (res != null) { + return res; + } + + // Allocate a sufficient number of words to contain all possible results. + // NOTE: since lastWordIndex is the index of the last used word in "words", + // we require "+2" to have the actual maximum required space. + // In any case, we do not allocate more than the maximum space required + // for the uncompressed representation. + // Another "+1" is required to allows for the addition of the last word + // before compacting. + res = empty(); + res.words = new int[1 + Math.min( + this.lastWordIndex + other.lastWordIndex + 2, + maxLiteralLengthDivision(Math.max(this.last, other.last)) << (simulateWAH ? 1 : 0) + )]; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + res.appendFill(minCount, operator.combineLiterals(thisItr.word, otherItr.word)); + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + break; + } + } else { + res.appendLiteral(operator.combineLiterals(thisItr.toLiteral(), otherItr.word)); + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } else if (!otherItr.isLiteral) { + res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.toLiteral())); + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + break; + } + } else { + res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.word)); + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } + + // invalidate the size + res.size = -1; + boolean invalidLast = true; + + // if one bit string is greater than the other one, we add the remaining + // bits depending on the given operation. + switch (operator) { + case AND: + break; + case OR: + res.last = Math.max(this.last, other.last); + invalidLast = false; + invalidLast |= thisItr.flush(res); + invalidLast |= otherItr.flush(res); + break; + case XOR: + if (this.last != other.last) { + res.last = Math.max(this.last, other.last); + invalidLast = false; + } + invalidLast |= thisItr.flush(res); + invalidLast |= otherItr.flush(res); + break; + case ANDNOT: + if (this.last > other.last) { + res.last = this.last; + invalidLast = false; + } + invalidLast |= thisItr.flush(res); + break; + } + + // remove trailing zeros + res.trimZeros(); + if (res.isEmpty()) { + return res; + } + + // compute the greatest element + if (invalidLast) { + res.updateLast(); + } + + // compact the memory + res.compact(); + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet o) + { + // special cases + if (isEmpty() || o == null || o.isEmpty()) { + return 0; + } + if (this == o) { + return size(); + } + + final ConciseSet other = convert(o); + + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return 0; + } + return other.size(); + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return 0; + } + return this.size(); + } + + int res = 0; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + res += maxLiteralLengthMultiplication(minCount); + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + break; + } + } else { + res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } else if (!otherItr.isLiteral) { + res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + break; + } + } else { + res += getLiteralBitCount(thisItr.word & otherItr.word); + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } + + return res; + } + + /** + * {@inheritDoc} + */ + public ByteBuffer toByteBuffer() + { + ByteBuffer buffer = ByteBuffer.allocate((lastWordIndex + 1) * 4); + buffer.asIntBuffer().put(Arrays.copyOf(words, lastWordIndex + 1)); + return buffer; + } + + /** + * {@inheritDoc} + */ + public int[] getWords() + { + if (words == null) { + return new int[]{}; + } + return Arrays.copyOf(words, lastWordIndex + 1); + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + if (i < 0) { + throw new IndexOutOfBoundsException(); + } + + // initialize data + int firstSetBitInWord = 0; + int position = i; + int setBitsInCurrentWord = 0; + for (int j = 0; j <= lastWordIndex; j++) { + int w = words[j]; + if (isLiteral(w)) { + // number of bits in the current word + setBitsInCurrentWord = getLiteralBitCount(w); + + // check if the desired bit is in the current word + if (position < setBitsInCurrentWord) { + int currSetBitInWord = -1; + for (; position >= 0; position--) { + currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); + } + return firstSetBitInWord + currSetBitInWord; + } + + // skip the 31-bit block + firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // number of involved bits (31 * blocks) + int sequenceLength = maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + + // check the sequence type + if (isOneSequence(w)) { + if (simulateWAH || isSequenceWithNoBits(w)) { + setBitsInCurrentWord = sequenceLength; + if (position < setBitsInCurrentWord) { + return firstSetBitInWord + position; + } + } else { + setBitsInCurrentWord = sequenceLength - 1; + if (position < setBitsInCurrentWord) + // check whether the desired set bit is after the + // flipped bit (or after the first block) + { + return firstSetBitInWord + position + (position < getFlippedBit(w) ? 0 : 1); + } + } + } else { + if (simulateWAH || isSequenceWithNoBits(w)) { + setBitsInCurrentWord = 0; + } else { + setBitsInCurrentWord = 1; + if (position == 0) { + return firstSetBitInWord + getFlippedBit(w); + } + } + } + + // skip the 31-bit blocks + firstSetBitInWord += sequenceLength; + } + + // update the number of found set bits + position -= setBitsInCurrentWord; + } + + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + if (isEmpty()) { + return -1; + } + + // returned value + int index = 0; + + int blockIndex = maxLiteralLengthDivision(e); + int bitPosition = maxLiteralLengthModulus(e); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + if ((w & (1 << bitPosition)) == 0) { + return -1; + } + return index + BitCount.count(w & ~(0xFFFFFFFF << bitPosition)); + } + blockIndex--; + index += getLiteralBitCount(w); + } else { + if (simulateWAH) { + if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { + return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition; + } + } else { + // if we are at the beginning of a sequence, and it is + // a set bit, the bit already exists + if (blockIndex == 0) { + int l = getLiteral(w); + if ((l & (1 << bitPosition)) == 0) { + return -1; + } + return index + BitCount.count(l & ~(0xFFFFFFFF << bitPosition)); + } + + // if we are in the middle of a sequence of 1's, the bit already exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isOneSequence(w)) { + return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition - (isSequenceWithNoBits(w) ? 0 : 1); + } + } + + // next word + int blocks = getSequenceCount(w) + 1; + blockIndex -= blocks; + if (isZeroSequence(w)) { + if (!simulateWAH && !isSequenceWithNoBits(w)) { + index++; + } + } else { + index += maxLiteralLengthMultiplication(blocks); + if (!simulateWAH && !isSequenceWithNoBits(w)) { + index--; + } + } + } + } + + // not found + return -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (other == this) { + return clone(); + } + return performOperation(convert(other), Operator.AND); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet union(IntSet other) + { + if (other == null || other.isEmpty() || other == this) { + return clone(); + } + return performOperation(convert(other), Operator.OR); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet difference(IntSet other) + { + if (other == this) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + return performOperation(convert(other), Operator.ANDNOT); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet symmetricDifference(IntSet other) + { + if (other == this) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + return performOperation(convert(other), Operator.XOR); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet complemented() + { + ConciseSet cloned = clone(); + cloned.complement(); + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + modCount++; + + if (isEmpty()) { + return; + } + + if (last == ConciseSetUtils.MIN_ALLOWED_SET_BIT) { + clear(); + return; + } + + // update size + if (size >= 0) { + size = last - size + 1; + } + + // complement each word + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) + // negate the bits and set the most significant bit to 1 + { + words[i] = ConciseSetUtils.ALL_ZEROS_LITERAL | ~w; + } else + // switch the sequence type + { + words[i] ^= ConciseSetUtils.SEQUENCE_BIT; + } + } + + // do not complement after the last element + if (isLiteral(words[lastWordIndex])) { + clearBitsAfterInLastWord(maxLiteralLengthModulus(last)); + } + + // remove trailing zeros + trimZeros(); + if (isEmpty()) { + return; + } + + // calculate the maximal element + last = 0; + int w = 0; + for (int i = 0; i <= lastWordIndex; i++) { + w = words[i]; + if (isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + } + } + + // manage the last word (that must be a literal or a sequence of 1's) + if (isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); + } else { + last--; + } + } + + /** + * Removes trailing zeros + */ + private void trimZeros() + { + // loop over ALL_ZEROS_LITERAL words + int w; + do { + w = words[lastWordIndex]; + if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { + lastWordIndex--; + } else if (isZeroSequence(w)) { + if (simulateWAH || isSequenceWithNoBits(w)) { + lastWordIndex--; + } else { + // convert the sequence in a 1-bit literal word + words[lastWordIndex] = getLiteral(w); + return; + } + } else { + // one sequence or literal + return; + } + if (lastWordIndex < 0) { + reset(); + return; + } + } while (true); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + if (isEmpty()) { + return new IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new BitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + if (isEmpty()) { + return new IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new ReverseBitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + reset(); + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return last; + } + + /** + * Convert a given collection to a {@link ConciseSet} instance + */ + private ConciseSet convert(IntSet c) + { + if (c instanceof ConciseSet && simulateWAH == ((ConciseSet) c).simulateWAH) { + return (ConciseSet) c; + } + if (c == null) { + return empty(); + } + + ConciseSet res = empty(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet convert(int... a) + { + ConciseSet res = empty(); + if (a != null) { + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + for (int i : a) { + if (res.last != i) { + res.add(i); + } + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet convert(Collection c) + { + ConciseSet res = empty(); + Collection sorted; + if (c != null) { + if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { + sorted = c; + } else { + sorted = new ArrayList(c); + Collections.sort((List) sorted); + } + for (int i : sorted) { + if (res.last != i) { + res.add(i); + } + } + } + return res; + } + + /** + * Replace the current instance with another {@link ConciseSet} instance. It + * also returns true if the given set is actually different + * from the current one + * + * @param other {@link ConciseSet} instance to use to replace the current one + * + * @return true if the given set is different from the current + * set + */ + private boolean replaceWith(ConciseSet other) + { + if (this == other) { + return false; + } + + boolean isSimilar = (this.lastWordIndex == other.lastWordIndex) + && (this.last == other.last); + for (int i = 0; isSimilar && (i <= lastWordIndex); i++) { + isSimilar &= this.words[i] == other.words[i]; + } + + if (isSimilar) { + if (other.size >= 0) { + this.size = other.size; + } + return false; + } + + this.words = other.words; + this.size = other.size; + this.last = other.last; + this.lastWordIndex = other.lastWordIndex; + this.modCount++; + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int e) + { + modCount++; + + // range check + if (e < ConciseSetUtils.MIN_ALLOWED_SET_BIT || e > ConciseSetUtils.MAX_ALLOWED_INTEGER) { + throw new IndexOutOfBoundsException(String.valueOf(e)); + } + + // the element can be simply appended + if (e > last) { + append(e); + return true; + } + + if (e == last) { + return false; + } + + // check if the element can be put in a literal word + int blockIndex = maxLiteralLengthDivision(e); + int bitPosition = maxLiteralLengthModulus(e); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + // bit already set + if ((w & (1 << bitPosition)) != 0) { + return false; + } + + // By adding the bit we potentially create a sequence: + // -- If the literal is made up of all zeros, it definitely + // cannot be part of a sequence (otherwise it would not have + // been created). Thus, we can create a 1-bit literal word + // -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding + // the new one we potentially allow for a 1's sequence + // together with the successive word + // -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding + // the new one we potentially allow for a 1's sequence + // together with the successive and/or the preceding words + if (!simulateWAH) { + int bitCount = getLiteralBitCount(w); + if (bitCount >= ConciseSetUtils.MAX_LITERAL_LENGTH - 2) { + break; + } + } else { + if (containsOnlyOneBit(~w) || w == ConciseSetUtils.ALL_ONES_LITERAL) { + break; + } + } + + // set the bit + words[i] |= 1 << bitPosition; + if (size >= 0) { + size++; + } + return true; + } + + blockIndex--; + } else { + if (simulateWAH) { + if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { + return false; + } + } else { + // if we are at the beginning of a sequence, and it is + // a set bit, the bit already exists + if (blockIndex == 0 + && (getLiteral(w) & (1 << bitPosition)) != 0) { + return false; + } + + // if we are in the middle of a sequence of 1's, the bit already exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isOneSequence(w)) { + return false; + } + } + + // next word + blockIndex -= getSequenceCount(w) + 1; + } + } + + // the bit is in the middle of a sequence or it may cause a literal to + // become a sequence, thus the "easiest" way to add it is by ORing + return replaceWith(performOperation(convert(e), Operator.OR)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int o) + { + modCount++; + + if (isEmpty()) { + return false; + } + + // the element cannot exist + if (o > last) { + return false; + } + + // check if the element can be removed from a literal word + int blockIndex = maxLiteralLengthDivision(o); + int bitPosition = maxLiteralLengthModulus(o); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + final int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + // the bit is already unset + if ((w & (1 << bitPosition)) == 0) { + return false; + } + + // By removing the bit we potentially create a sequence: + // -- If the literal is made up of all ones, it definitely + // cannot be part of a sequence (otherwise it would not have + // been created). Thus, we can create a 30-bit literal word + // -- If there are 2 set bits, by removing the specified + // one we potentially allow for a 1's sequence together with + // the successive word + // -- If there is 1 set bit, by removing the new one we + // potentially allow for a 0's sequence + // together with the successive and/or the preceding words + if (!simulateWAH) { + int bitCount = getLiteralBitCount(w); + if (bitCount <= 2) { + break; + } + } else { + final int l = getLiteralBits(w); + if (l == 0 || containsOnlyOneBit(l)) { + break; + } + } + + // unset the bit + words[i] &= ~(1 << bitPosition); + if (size >= 0) { + size--; + } + + // if the bit is the maximal element, update it + if (o == last) { + last -= maxLiteralLengthModulus(last) - (ConciseSetUtils.MAX_LITERAL_LENGTH + - Integer.numberOfLeadingZeros(getLiteralBits(words[i]))); + } + return true; + } + + blockIndex--; + } else { + if (simulateWAH) { + if (isZeroSequence(w) && blockIndex <= getSequenceCount(w)) { + return false; + } + } else { + // if we are at the beginning of a sequence, and it is + // an unset bit, the bit does not exist + if (blockIndex == 0 + && (getLiteral(w) & (1 << bitPosition)) == 0) { + return false; + } + + // if we are in the middle of a sequence of 0's, the bit does not exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isZeroSequence(w)) { + return false; + } + } + + // next word + blockIndex -= getSequenceCount(w) + 1; + } + } + + // the bit is in the middle of a sequence or it may cause a literal to + // become a sequence, thus the "easiest" way to remove it by ANDNOTing + return replaceWith(performOperation(convert(o), Operator.ANDNOT)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int o) + { + if (isEmpty() || o > last || o < 0) { + return false; + } + + // check if the element is within a literal word + int block = maxLiteralLengthDivision(o); + int bit = maxLiteralLengthModulus(o); + for (int i = 0; i <= lastWordIndex; i++) { + final int w = words[i]; + final int t = w & 0xC0000000; // the first two bits... + switch (t) { + case 0x80000000: // LITERAL + case 0xC0000000: // LITERAL + // check if the current literal word is the "right" one + if (block == 0) { + return (w & (1 << bit)) != 0; + } + block--; + break; + case 0x00000000: // ZERO SEQUENCE + if (!simulateWAH) { + if (block == 0 && ((w >> 25) - 1) == bit) { + return true; + } + } + block -= getSequenceCount(w) + 1; + if (block < 0) { + return false; + } + break; + case 0x40000000: // ONE SEQUENCE + if (!simulateWAH) { + if (block == 0 && (0x0000001F & (w >> 25) - 1) == bit) { + return false; + } + } + block -= getSequenceCount(w) + 1; + if (block < 0) { + return true; + } + break; + } + } + + // no more words + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ConciseSet other = convert(c); + if (other.last > last) { + return false; + } + if (size >= 0 && other.size > size) { + return false; + } + if (other.size == 1) { + return contains(other.last); + } + + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + return false; + } + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word) == 0 + && (ConciseSetUtils.SEQUENCE_BIT & otherItr.word) != 0) { + return false; + } + if (!otherItr.prepareNext(minCount)) { + return true; + } + if (!thisItr.prepareNext(minCount)) { + return false; + } + } else { + if ((thisItr.toLiteral() & otherItr.word) != otherItr.word) { + return false; + } + thisItr.word--; + if (!otherItr.prepareNext()) { + return true; + } + if (!thisItr.prepareNext(1)) { + return false; + } + } + } else if (!otherItr.isLiteral) { + int o = otherItr.toLiteral(); + if ((thisItr.word & otherItr.toLiteral()) != o) { + return false; + } + otherItr.word--; + if (!otherItr.prepareNext(1)) { + return true; + } + if (!thisItr.prepareNext()) { + return false; + } + } else { + if ((thisItr.word & otherItr.word) != otherItr.word) { + return false; + } + if (!otherItr.prepareNext()) { + return true; + } + if (!thisItr.prepareNext()) { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ConciseSet other = convert(c); + if (other.size == 1) { + return contains(other.last); + } + + // disjoint sets + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return false; + } + return true; + } + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + return true; + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + return false; + } + } else { + if ((thisItr.toLiteral() & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } else if (!otherItr.isLiteral) { + if ((thisItr.word & otherItr.toLiteral()) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + return false; + } + } else { + if ((thisItr.word & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (this == c) { + return size() >= minElements; + } + + // convert the other set in order to perform a more complex intersection + ConciseSet other = convert(c); + if (other.size >= 0 && other.size < minElements) { + return false; + } + if (minElements == 1 && other.size == 1) { + return contains(other.last); + } + if (minElements == 1 && size == 1) { + return other.contains(last); + } + + // disjoint sets + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return false; + } + return true; + } + + // resulting size + int res = 0; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + res += maxLiteralLengthMultiplication(minCount); + if (res >= minElements) { + return true; + } + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + return false; + } + } else { + res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); + if (res >= minElements) { + return true; + } + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } else if (!otherItr.isLiteral) { + res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); + if (res >= minElements) { + return true; + } + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + return false; + } + } else { + res += getLiteralBitCount(thisItr.word & otherItr.word); + if (res >= minElements) { + return true; + } + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return words == null; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + modCount++; + + if (isEmpty() || c == this) { + return false; + } + if (c == null || c.isEmpty()) { + clear(); + return true; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + if (contains(other.last)) { + if (size == 1) { + return false; + } + return replaceWith(convert(other.last)); + } + clear(); + return true; + } + + return replaceWith(performOperation(other, Operator.AND)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + modCount++; + if (c == null || c.isEmpty() || this == c) { + return false; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + return add(other.last); + } + + return replaceWith(performOperation(convert(c), Operator.OR)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + modCount++; + + if (c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (c == this) { + clear(); + return true; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + return remove(other.last); + } + + return replaceWith(performOperation(convert(c), Operator.ANDNOT)); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + if (size < 0) { + size = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) { + size += getLiteralBitCount(w); + } else { + if (isZeroSequence(w)) { + if (!isSequenceWithNoBits(w)) { + size++; + } + } else { + size += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + if (!isSequenceWithNoBits(w)) { + size--; + } + } + } + } + } + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet empty() + { + return new ConciseSet(simulateWAH); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + for (int i = 0; i <= lastWordIndex; i++) { + h = (h << 5) - h + words[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof ConciseSet)) { + return super.equals(obj); + } + + final ConciseSet other = (ConciseSet) obj; + if (simulateWAH != other.simulateWAH) { + return super.equals(obj); + } + + if (size() != other.size()) { + return false; + } + if (isEmpty()) { + return true; + } + if (last != other.last) { + return false; + } + for (int i = 0; i <= lastWordIndex; i++) { + if (words[i] != other.words[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + // empty set cases + if (this.isEmpty() && o.isEmpty()) { + return 0; + } + if (this.isEmpty()) { + return -1; + } + if (o.isEmpty()) { + return 1; + } + + final ConciseSet other = convert(o); + + // the word at the end must be the same + int res = this.last - other.last; + if (res != 0) { + return res < 0 ? -1 : 1; + } + + // scan words from MSB to LSB + int thisIndex = this.lastWordIndex; + int otherIndex = other.lastWordIndex; + int thisWord = this.words[thisIndex]; + int otherWord = other.words[otherIndex]; + while (thisIndex >= 0 && otherIndex >= 0) { + if (!isLiteral(thisWord)) { + if (!isLiteral(otherWord)) { + // compare two sequences + // note that they are made up of at least two blocks, and we + // start comparing from the end, that is at blocks with no + // (un)set bits + if (isZeroSequence(thisWord)) { + if (isOneSequence(otherWord)) + // zeros < ones + { + return -1; + } + // compare two sequences of zeros + res = getSequenceCount(otherWord) - getSequenceCount(thisWord); + if (res != 0) { + return res < 0 ? -1 : 1; + } + } else { + if (isZeroSequence(otherWord)) + // ones > zeros + { + return 1; + } + // compare two sequences of ones + res = getSequenceCount(thisWord) - getSequenceCount(otherWord); + if (res != 0) { + return res < 0 ? -1 : 1; + } + } + // if the sequences are the same (both zeros or both ones) + // and have the same length, compare the first blocks in the + // next loop since such blocks might contain (un)set bits + thisWord = getLiteral(thisWord); + otherWord = getLiteral(otherWord); + } else { + // zeros < literal --> -1 + // ones > literal --> +1 + // note that the sequence is made up of at least two blocks, + // and we start comparing from the end, that is at a block + // with no (un)set bits + if (isZeroSequence(thisWord)) { + if (otherWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return -1; + } + } else { + if (otherWord != ConciseSetUtils.ALL_ONES_LITERAL) { + return 1; + } + } + if (getSequenceCount(thisWord) == 1) { + thisWord = getLiteral(thisWord); + } else { + thisWord--; + } + if (--otherIndex >= 0) { + otherWord = other.words[otherIndex]; + } + } + } else if (!isLiteral(otherWord)) { + // literal > zeros --> +1 + // literal < ones --> -1 + // note that the sequence is made up of at least two blocks, + // and we start comparing from the end, that is at a block + // with no (un)set bits + if (isZeroSequence(otherWord)) { + if (thisWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return 1; + } + } else { + if (thisWord != ConciseSetUtils.ALL_ONES_LITERAL) { + return -1; + } + } + if (--thisIndex >= 0) { + thisWord = this.words[thisIndex]; + } + if (getSequenceCount(otherWord) == 1) { + otherWord = getLiteral(otherWord); + } else { + otherWord--; + } + } else { + res = thisWord - otherWord; // equals getLiteralBits(thisWord) - getLiteralBits(otherWord) + if (res != 0) { + return res < 0 ? -1 : 1; + } + if (--thisIndex >= 0) { + thisWord = this.words[thisIndex]; + } + if (--otherIndex >= 0) { + otherWord = other.words[otherIndex]; + } + } + } + return thisIndex >= 0 ? 1 : (otherIndex >= 0 ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + ConciseSet toRemove = empty(); + toRemove.fill(from, to); + this.removeAll(toRemove); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + ConciseSet toAdd = empty(); + toAdd.add(to); + toAdd.complement(); + toAdd.add(to); + + ConciseSet toRemove = empty(); + toRemove.add(from); + toRemove.complement(); + + toAdd.removeAll(toRemove); + + this.addAll(toAdd); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (lastWordIndex + 1) / Math.ceil((1 + last) / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (double) (lastWordIndex + 1) / size(); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); + final Formatter f = new Formatter(s, Locale.ENGLISH); + + if (isEmpty()) { + return s.append("null\n").toString(); + } + + f.format("Elements: %s\n", toString()); + + // elements + int firstBitInWord = 0; + for (int i = 0; i <= lastWordIndex; i++) { + // raw representation of words[i] + f.format("words[%d] = ", i); + String ws = toBinaryString(words[i]); + if (isLiteral(words[i])) { + s.append(ws.substring(0, 1)); + s.append("--"); + s.append(ws.substring(1)); + } else { + s.append(ws.substring(0, 2)); + s.append('-'); + if (simulateWAH) { + s.append("xxxxx"); + } else { + s.append(ws.substring(2, 7)); + } + s.append('-'); + s.append(ws.substring(7)); + } + s.append(" --> "); + + // decode words[i] + if (isLiteral(words[i])) { + // literal + s.append("literal: "); + s.append(toBinaryString(words[i]).substring(1)); + f.format(" ---> [from %d to %d] ", firstBitInWord, firstBitInWord + ConciseSetUtils.MAX_LITERAL_LENGTH - 1); + firstBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // sequence + if (isOneSequence(words[i])) { + s.append('1'); + } else { + s.append('0'); + } + s.append(" block: "); + s.append(toBinaryString(getLiteralBits(getLiteral(words[i]))).substring(1)); + if (!simulateWAH) { + s.append(" (bit="); + int bit = (words[i] & 0x3E000000) >>> 25; + if (bit == 0) { + s.append("none"); + } else { + s.append(String.format("%4d", bit - 1)); + } + s.append(')'); + } + int count = getSequenceCount(words[i]); + f.format( + " followed by %d blocks (%d bits)", + getSequenceCount(words[i]), + maxLiteralLengthMultiplication(count) + ); + f.format( + " ---> [from %d to %d] ", + firstBitInWord, + firstBitInWord + (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH - 1 + ); + firstBitInWord += (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH; + } + s.append('\n'); + } + + // object attributes + f.format("simulateWAH: %b\n", simulateWAH); + f.format("last: %d\n", last); + f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); + f.format("words.length: %d\n", words.length); + f.format("lastWordIndex: %d\n", lastWordIndex); + + // compression + f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); + f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); + + return s.toString(); + } + + /** + * Save the state of the instance to a stream + */ + private void writeObject(ObjectOutputStream s) throws IOException + { + if (words != null && lastWordIndex < words.length - 1) + // compact before serializing + { + words = Arrays.copyOf(words, lastWordIndex + 1); + } + s.defaultWriteObject(); + } + + /** + * Reconstruct the instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + if (words == null) { + reset(); + return; + } + lastWordIndex = words.length - 1; + updateLast(); + size = -1; + } + + /** + * Possible operations + */ + private enum Operator + { + /** + * @uml.property name="aND" + * @uml.associationEnd + */ + AND { + @Override + public int combineLiterals(int literal1, int literal2) + { + return literal1 & literal2; + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is completely hidden by op1 + if (isZeroSequence(op1.words[0])) { + return op1.empty(); + } + // op2 is left unchanged, but the rest of op1 is hidden + return op2.clone(); + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="oR" + * @uml.associationEnd + */ + OR { + @Override + public int combineLiterals(int literal1, int literal2) + { + return literal1 | literal2; + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + if (!op2.isEmpty()) { + return op2.clone(); + } + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is completely hidden by op1 + if (isOneSequence(op1.words[0])) { + return op1.clone(); + } + // op2 is left unchanged, but the rest of op1 must be appended... + + // ... first, allocate sufficient space for the result + ConciseSet res = op1.empty(); + res.words = new int[op1.lastWordIndex + op2.lastWordIndex + 3]; + res.lastWordIndex = op2.lastWordIndex; + + // ... then, copy op2 + System.arraycopy(op2.words, 0, res.words, 0, op2.lastWordIndex + 1); + + // ... finally, append op1 + WordIterator wordIterator = op1.new WordIterator(); + wordIterator.prepareNext(maxLiteralLengthDivision(op2.last) + 1); + wordIterator.flush(res); + if (op1.size < 0 || op2.size < 0) { + res.size = -1; + } else { + res.size = op1.size + op2.size; + } + res.last = op1.last; + res.compact(); + return res; + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="xOR" + * @uml.associationEnd + */ + XOR { + @Override + public int combineLiterals(int literal1, int literal2) + { + return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 ^ literal2); + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + if (!op2.isEmpty()) { + return op2.clone(); + } + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is left unchanged by op1 + if (isZeroSequence(op1.words[0])) { + return OR.combineDisjointSets(op1, op2); + } + // op2 must be complemented, then op1 must be appended + // it is better to perform it normally... + return null; + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="aNDNOT" + * @uml.associationEnd + */ + ANDNOT { + @Override + public int combineLiterals(int literal1, int literal2) + { + return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 & (~literal2)); + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + return op1.empty(); + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op1 is left unchanged by op2 + if (isZeroSequence(op1.words[0])) { + return op1.clone(); + } + // op2 must be complemented, then op1 must be appended + // it is better to perform it normally... + return null; + } + // check whether the second operator starts with a sequence that + // completely "covers" the first operator + if (isSequenceWithNoBits(op2.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op2.words[0]) + 1) > op1.last) { + // op1 is left unchanged by op2 + if (isZeroSequence(op2.words[0])) { + return op1.clone(); + } + // op1 is cleared by op2 + return op1.empty(); + } + return null; + } + },; + + /** + * Performs the operation on the given literals + * + * @param literal1 left operand + * @param literal2 right operand + * + * @return literal representing the result of the specified operation + */ + public abstract int combineLiterals(int literal1, int literal2); + + /** + * Performs the operation when one or both operands are empty set + *

+ * NOTE: the caller MUST assure that one or both the operands + * are empty!!! + * + * @param op1 left operand + * @param op2 right operand + * + * @return null if both operands are non-empty + */ + public abstract ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2); + + /** + * Performs the operation in the special case of "disjoint" sets, namely + * when the first (or the second) operand starts with a sequence (it + * does not matter if 0's or 1's) that completely covers all the bits of + * the second (or the first) operand. + * + * @param op1 left operand + * @param op2 right operand + * + * @return null if operands are non-disjoint + */ + public abstract ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2); + } + + /** + * Iterator over the bits of a single literal/fill word + */ + private interface WordExpander + { + public boolean hasNext(); + + public boolean hasPrevious(); + + public int next(); + + public int previous(); + + public void skipAllAfter(int i); + + public void skipAllBefore(int i); + + public void reset(int offset, int word, boolean fromBeginning); + } + + /** + * Iterates over words, from the rightmost (LSB) to the leftmost (MSB). + *

+ * When {@link ConciseSet#simulateWAH} is false, mixed + * sequences are "broken" into a literal (i.e., the first block is coded + * with a literal in {@link #word}) and a "pure" sequence (i.e., the + * remaining blocks are coded with a sequence with no bits in {@link #word}) + */ + private class WordIterator + { + /** + * copy of the current word + */ + int word; + + /** + * current word index + */ + int index; + + /** + * true if {@link #word} is a literal + */ + boolean isLiteral; + + /** + * number of blocks in the current word (1 for literals, > 1 for sequences) + */ + int count; + + /** + * Initialize data + */ + WordIterator() + { + isLiteral = false; + index = -1; + prepareNext(); + } + + /** + * @return true if there is no current word + */ + boolean exhausted() + { + return index > lastWordIndex; + } + + /** + * Prepare the next value for {@link #word} after skipping a given + * number of 31-bit blocks in the current sequence. + *

+ * NOTE: it works only when the current word is within a + * sequence, namely a literal cannot be skipped. Moreover, the number of + * blocks to skip must be less than the remaining blocks in the current + * sequence. + * + * @param c number of 31-bit "blocks" to skip + * + * @return false if the next word does not exists + */ + boolean prepareNext(int c) + { + assert c <= count; + count -= c; + if (count == 0) { + return prepareNext(); + } + return true; + } + + /** + * Prepare the next value for {@link #word} + * + * @return false if the next word does not exists + */ + boolean prepareNext() + { + if (!simulateWAH && isLiteral && count > 1) { + count--; + isLiteral = false; + word = getSequenceWithNoBits(words[index]) - 1; + return true; + } + + index++; + if (index > lastWordIndex) { + return false; + } + word = words[index]; + isLiteral = isLiteral(word); + if (!isLiteral) { + count = getSequenceCount(word) + 1; + if (!simulateWAH && !isSequenceWithNoBits(word)) { + isLiteral = true; + int bit = (1 << (word >>> 25)) >>> 1; + word = isZeroSequence(word) + ? (ConciseSetUtils.ALL_ZEROS_LITERAL | bit) + : (ConciseSetUtils.ALL_ONES_LITERAL & ~bit); + } + } else { + count = 1; + } + return true; + } + + /** + * @return the literal word corresponding to each block contained in the + * current sequence word. Not to be used with literal words! + */ + int toLiteral() + { + assert !isLiteral; + return ConciseSetUtils.ALL_ZEROS_LITERAL | ((word << 1) >> ConciseSetUtils.MAX_LITERAL_LENGTH); + } + + /** + * Copies all the remaining words in the given set + * + * @param s set where the words must be copied + * + * @return false if there are no words to copy + */ + private boolean flush(ConciseSet s) + { + // nothing to flush + if (exhausted()) { + return false; + } + + // try to "compress" the first few words + do { + if (isLiteral) { + s.appendLiteral(word); + } else { + s.appendFill(count, word); + } + } while (prepareNext() && s.words[s.lastWordIndex] != word); + + // copy remaining words "as-is" + int delta = lastWordIndex - index + 1; + System.arraycopy(words, index, s.words, s.lastWordIndex + 1, delta); + s.lastWordIndex += delta; + s.last = last; + return true; + } + } + + /* + * DEBUGGING METHODS + */ + + /** + * Iterator over the bits of literal and zero-fill words + */ + private class LiteralAndZeroFillExpander implements WordExpander + { + final int[] buffer = new int[ConciseSetUtils.MAX_LITERAL_LENGTH]; + int len = 0; + int current = 0; + + @Override + public boolean hasNext() + { + return current < len; + } + + @Override + public boolean hasPrevious() + { + return current > 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return buffer[current++]; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + return buffer[--current]; + } + + @Override + public void skipAllAfter(int i) + { + while (hasPrevious() && buffer[current - 1] > i) { + current--; + } + } + + @Override + public void skipAllBefore(int i) + { + while (hasNext() && buffer[current] < i) { + current++; + } + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (isLiteral(word)) { + len = 0; + for (int i = 0; i < ConciseSetUtils.MAX_LITERAL_LENGTH; i++) { + if ((word & (1 << i)) != 0) { + buffer[len++] = offset + i; + } + } + current = fromBeginning ? 0 : len; + } else { + if (isZeroSequence(word)) { + if (simulateWAH || isSequenceWithNoBits(word)) { + len = 0; + current = 0; + } else { + len = 1; + buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + current = fromBeginning ? 0 : 1; + } + } else { + throw new RuntimeException("sequence of ones!"); + } + } + } + } + + /** + * Iterator over the bits of one-fill words + */ + private class OneFillExpander implements WordExpander + { + int firstInt = 1; + int lastInt = -1; + int current = 0; + int exception = -1; + + @Override + public boolean hasNext() + { + return current < lastInt; + } + + @Override + public boolean hasPrevious() + { + return current > firstInt; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + current++; + if (!simulateWAH && current == exception) { + current++; + } + return current; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + current--; + if (!simulateWAH && current == exception) { + current--; + } + return current; + } + + @Override + public void skipAllAfter(int i) + { + if (i >= current) { + return; + } + current = i + 1; + } + + @Override + public void skipAllBefore(int i) + { + if (i <= current) { + return; + } + current = i - 1; + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (!isOneSequence(word)) { + throw new RuntimeException("NOT a sequence of ones!"); + } + firstInt = offset; + lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; + if (!simulateWAH) { + exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + if (exception == firstInt) { + firstInt++; + } + if (exception == lastInt) { + lastInt--; + } + } + current = fromBeginning ? (firstInt - 1) : (lastInt + 1); + } + } + + /** + * Iterator for all the integers of a {@link ConciseSet} instance + */ + private class BitIterator implements IntIterator + { + /** + * @uml.property name="litExp" + * @uml.associationEnd + */ + final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); + /** + * @uml.property name="oneExp" + * @uml.associationEnd + */ + final OneFillExpander oneExp = new OneFillExpander(); + /** + * @uml.property name="exp" + * @uml.associationEnd + */ + WordExpander exp; + int nextIndex = 0; + int nextOffset = 0; + + private BitIterator() + { + nextWord(); + } + + private void nextWord() + { + final int word = words[nextIndex++]; + exp = isOneSequence(word) ? oneExp : litExp; + exp.reset(nextOffset, word, true); + + // prepare next offset + if (isLiteral(word)) { + nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset += maxLiteralLengthMultiplication(getSequenceCount(word) + 1); + } + } + + @Override + public boolean hasNext() + { + return nextIndex <= lastWordIndex || exp.hasNext(); + } + + @Override + public int next() + { + while (!exp.hasNext()) { + if (nextIndex > lastWordIndex) { + throw new NoSuchElementException(); + } + nextWord(); + } + return exp.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllBefore(element); + if (exp.hasNext() || nextIndex > lastWordIndex) { + return; + } + nextWord(); + } + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.exp = exp; + retVal.nextIndex = nextIndex; + retVal.nextOffset = nextOffset; + return retVal; + } + } + + /** + * @author alessandrocolantonio + */ + private class ReverseBitIterator implements IntIterator + { + /** + * @uml.property name="litExp" + * @uml.associationEnd + */ + final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); + /** + * @uml.property name="oneExp" + * @uml.associationEnd + */ + final OneFillExpander oneExp = new OneFillExpander(); + /** + * @uml.property name="exp" + * @uml.associationEnd + */ + WordExpander exp; + int nextIndex = lastWordIndex; + int nextOffset = maxLiteralLengthMultiplication(maxLiteralLengthDivision(last) + 1); + int firstIndex; // first non-zero block + + ReverseBitIterator() + { + // identify the first non-zero block + if ((isSequenceWithNoBits(words[0]) && isZeroSequence(words[0])) || (isLiteral(words[0]) + && words[0] + == ConciseSetUtils.ALL_ZEROS_LITERAL)) { + firstIndex = 1; + } else { + firstIndex = 0; + } + previousWord(); + } + + void previousWord() + { + final int word = words[nextIndex--]; + exp = isOneSequence(word) ? oneExp : litExp; + if (isLiteral(word)) { + nextOffset -= ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset -= maxLiteralLengthMultiplication(getSequenceCount(word) + 1); + } + exp.reset(nextOffset, word, false); + } + + @Override + public boolean hasNext() + { + return nextIndex >= firstIndex || exp.hasPrevious(); + } + + @Override + public int next() + { + while (!exp.hasPrevious()) { + if (nextIndex < firstIndex) { + throw new NoSuchElementException(); + } + previousWord(); + } + return exp.previous(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllAfter(element); + if (exp.hasPrevious() || nextIndex < firstIndex) { + return; + } + previousWord(); + } + } + + @Override + public IntIterator clone() + { + ReverseBitIterator retVal = new ReverseBitIterator(); + retVal.exp = exp; + retVal.nextIndex = nextIndex; + retVal.nextOffset = nextOffset; + retVal.firstIndex = firstIndex; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java new file mode 100755 index 000000000000..b9465d9dc3de --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java @@ -0,0 +1,563 @@ +package io.druid.extendedset.intset; + +import io.druid.extendedset.utilities.BitCount; + +import java.util.NoSuchElementException; + +/** + */ +public class ConciseSetUtils +{ + /** + * The highest representable integer. + *

+ * Its value is computed as follows. The number of bits required to + * represent the longest sequence of 0's or 1's is + * ceil(log2(({@link Integer#MAX_VALUE} - 31) / 31)) = 27. + * Indeed, at least one literal exists, and the other bits may all be 0's or + * 1's, that is {@link Integer#MAX_VALUE} - 31. If we use: + *

    + *
  • 2 bits for the sequence type; + *
  • 5 bits to indicate which bit is set; + *
+ * then 32 - 5 - 2 = 25 is the number of available bits to + * represent the maximum sequence of 0's and 1's. Thus, the maximal bit that + * can be set is represented by a number of 0's equals to + * 31 * (1 << 25), followed by a literal with 30 0's and the + * MSB (31st bit) equal to 1 + */ + public final static int MAX_ALLOWED_INTEGER = 31 * (1 << 25) + 30; // 1040187422 + + /** + * The lowest representable integer. + */ + public final static int MIN_ALLOWED_SET_BIT = 0; + + /** + * Maximum number of representable bits within a literal + */ + public final static int MAX_LITERAL_LENGTH = 31; + + /** + * Literal that represents all bits set to 1 (and MSB = 1) + */ + public final static int ALL_ONES_LITERAL = 0xFFFFFFFF; + + /** + * Literal that represents all bits set to 0 (and MSB = 1) + */ + public final static int ALL_ZEROS_LITERAL = 0x80000000; + + /** + * All bits set to 1 and MSB = 0 + */ + public final static int ALL_ONES_WITHOUT_MSB = 0x7FFFFFFF; + + /** + * Sequence bit + */ + public final static int SEQUENCE_BIT = 0x40000000; + + /** + * Calculates the modulus division by 31 in a faster way than using n % 31 + *

+ * This method of finding modulus division by an integer that is one less + * than a power of 2 takes at most O(lg(32)) time. The number of operations + * is at most 12 + 9 * ceil(lg(32)). + *

+ * See http://graphics.stanford.edu/~seander/bithacks.html + * + * @param n number to divide + * + * @return n % 31 + */ + public static int maxLiteralLengthModulus(int n) + { + int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); + m = (m >>> 15) + (m & 0x00007FFF); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + return m == 31 ? 0 : m; + } + + /** + * Calculates the multiplication by 31 in a faster way than using n * 31 + * + * @param n number to multiply + * + * @return n * 31 + */ + public static int maxLiteralLengthMultiplication(int n) + { + return (n << 5) - n; + } + + /** + * Calculates the division by 31 + * + * @param n number to divide + * + * @return n / 31 + */ + public static int maxLiteralLengthDivision(int n) + { + return n / 31; + } + + /** + * Checks whether a word is a literal one + * + * @param word word to check + * + * @return true if the given word is a literal word + */ + public static boolean isLiteral(int word) + { + // "word" must be 1* + // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" + return (word & 0x80000000) != 0; + } + + /** + * Checks whether a word contains a sequence of 1's + * + * @param word word to check + * + * @return true if the given word is a sequence of 1's + */ + public static boolean isOneSequence(int word) + { + // "word" must be 01* + return (word & 0xC0000000) == SEQUENCE_BIT; + } + + /** + * Checks whether a word contains a sequence of 0's + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's + */ + public static boolean isZeroSequence(int word) + { + // "word" must be 00* + return (word & 0xC0000000) == 0; + } + + /** + * Checks whether a word contains a sequence of 0's with no set bit, or 1's + * with no unset bit. + *

+ * NOTE: when {@link #simulateWAH} is true, it is + * equivalent to (and as fast as) !{@link #isLiteral(int)} + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's or 1's + * but with no (un)set bit + */ + public static boolean isSequenceWithNoBits(int word) + { + // "word" must be 0?00000* + return (word & 0xBE000000) == 0x00000000; + } + + /** + * Gets the number of blocks of 1's or 0's stored in a sequence word + * + * @param word word to check + * + * @return the number of blocks that follow the first block of 31 bits + */ + public static int getSequenceCount(int word) + { + // get the 25 LSB bits + return word & 0x01FFFFFF; + } + + public static int getSequenceNumWords(int word) + { + return getSequenceCount(word) + 1; + } + + /** + * Clears the (un)set bit in a sequence + * + * @param word word to check + * + * @return the sequence corresponding to the given sequence and with no + * (un)set bits + */ + public static int getSequenceWithNoBits(int word) + { + // clear 29 to 25 LSB bits + return (word & 0xC1FFFFFF); + } + + /** + * Gets the literal word that represents the first 31 bits of the given the + * word (i.e. the first block of a sequence word, or the bits of a literal word). + *

+ * If the word is a literal, it returns the unmodified word. In case of a + * sequence, it returns a literal that represents the first 31 bits of the + * given sequence word. + * + * @param word word to check + * + * @return the literal contained within the given word, with the most + * significant bit set to 1. + */ + public static int getLiteral(int word, boolean simulateWAH) + { + if (isLiteral(word)) { + return word; + } + + if (simulateWAH) { + return isZeroSequence(word) ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL; + } + + // get bits from 30 to 26 and use them to set the corresponding bit + // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent + // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set + int literal = (1 << (word >>> 25)) >>> 1; + return isZeroSequence(word) + ? (ALL_ZEROS_LITERAL | literal) + : (ALL_ONES_LITERAL & ~literal); + } + + public static int getLiteralFromZeroSeqFlipBit(int word) + { + int flipBit = getFlippedBit(word); + if (flipBit > -1) { + return ALL_ZEROS_LITERAL | flipBitAsBinaryString(flipBit); + } + return ALL_ZEROS_LITERAL; + } + + public static int getLiteralFromOneSeqFlipBit(int word) + { + int flipBit = getFlippedBit(word); + if (flipBit > -1) { + return ALL_ONES_LITERAL ^ flipBitAsBinaryString(flipBit); + } + return ALL_ONES_LITERAL; + } + + /** + * Gets the position of the flipped bit within a sequence word. If the + * sequence has no set/unset bit, returns -1. + *

+ * Note that the parameter must a sequence word, otherwise the + * result is meaningless. + * + * @param word sequence word to check + * + * @return the position of the set bit, from 0 to 31. If the sequence has no + * set/unset bit, returns -1. + */ + public static int getFlippedBit(int word) + { + // get bits from 30 to 26 + // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set + return ((word >>> 25) & 0x0000001F) - 1; + } + + public static int flipBitAsBinaryString(int flipBit) + { + return ((Number) Math.pow(2, flipBit)).intValue(); + } + + /** + * Gets the number of set bits within the literal word + * + * @param word literal word + * + * @return the number of set bits within the literal word + */ + public static int getLiteralBitCount(int word) + { + return BitCount.count(getLiteralBits(word)); + } + + /** + * Gets the bits contained within the literal word + * + * @param word literal word + * + * @return the literal word with the most significant bit cleared + */ + public static int getLiteralBits(int word) + { + return ALL_ONES_WITHOUT_MSB & word; + } + + public static boolean isAllOnesLiteral(int word) + { + return (word & -1) == -1; + } + + public static boolean isAllZerosLiteral(int word) + { + return (word | 0x80000000) == 0x80000000; + } + + public static boolean isLiteralWithSingleZeroBit(int word) + { + return isLiteral(word) && (Integer.bitCount(~word) == 1); + } + + public static boolean isLiteralWithSingleOneBit(int word) + { + return isLiteral(word) && (Integer.bitCount(word) == 2); + } + + public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit) + { + return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); + } + + public static int onesUntil(int bit) + { + return 0x80000000 | ((1 << bit) - 1); + } + + public static LiteralAndZeroFillExpander newLiteralAndZeroFillExpander() + { + return new LiteralAndZeroFillExpander(); + } + + public static OneFillExpander newOneFillExpander() + { + return new OneFillExpander(); + } + + public interface WordExpander + { + public boolean hasNext(); + + public boolean hasPrevious(); + + public int next(); + + public int previous(); + + public void skipAllAfter(int i); + + public void skipAllBefore(int i); + + public void reset(int offset, int word, boolean fromBeginning); + + public WordExpander clone(); + } + + /** + * Iterator over the bits of literal and zero-fill words + */ + public static class LiteralAndZeroFillExpander implements WordExpander + { + final int[] buffer = new int[MAX_LITERAL_LENGTH]; + int len = 0; + int current = 0; + + @Override + public boolean hasNext() + { + return current < len; + } + + @Override + public boolean hasPrevious() + { + return current > 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return buffer[current++]; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + return buffer[--current]; + } + + @Override + public void skipAllAfter(int i) + { + while (hasPrevious() && buffer[current - 1] > i) { + current--; + } + } + + @Override + public void skipAllBefore(int i) + { + while (hasNext() && buffer[current] < i) { + current++; + } + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (isLiteral(word)) { + len = 0; + for (int i = 0; i < MAX_LITERAL_LENGTH; i++) { + if ((word & (1 << i)) != 0) { + buffer[len++] = offset + i; + } + } + current = fromBeginning ? 0 : len; + } else { + if (isZeroSequence(word)) { + if (isSequenceWithNoBits(word)) { + len = 0; + current = 0; + } else { + len = 1; + buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + current = fromBeginning ? 0 : 1; + } + } else { + throw new RuntimeException("sequence of ones!"); + } + } + } + + @Override + public WordExpander clone() + { + LiteralAndZeroFillExpander retVal = new LiteralAndZeroFillExpander(); + System.arraycopy(buffer, 0, retVal.buffer, 0, buffer.length); + retVal.len = len; + retVal.current = current; + return retVal; + } + } + + /** + * Iterator over the bits of one-fill words + */ + public static class OneFillExpander implements WordExpander + { + int firstInt = 1; + int lastInt = -1; + int current = 0; + int exception = -1; + + @Override + public boolean hasNext() + { + return current < lastInt; + } + + @Override + public boolean hasPrevious() + { + return current > firstInt; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + current++; + if (current == exception) { + current++; + } + return current; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + current--; + if (current == exception) { + current--; + } + return current; + } + + @Override + public void skipAllAfter(int i) + { + if (i >= current) { + return; + } + current = i + 1; + } + + @Override + public void skipAllBefore(int i) + { + if (i <= current) { + return; + } + current = i - 1; + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (!isOneSequence(word)) { + throw new RuntimeException("NOT a sequence of ones!"); + } + firstInt = offset; + lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; + + exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + if (exception == firstInt) { + firstInt++; + } + if (exception == lastInt) { + lastInt--; + } + + current = fromBeginning ? (firstInt - 1) : (lastInt + 1); + } + + @Override + public WordExpander clone() + { + OneFillExpander retVal = new OneFillExpander(); + retVal.firstInt = firstInt; + retVal.lastInt = lastInt; + retVal.current = current; + retVal.exception = exception; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java new file mode 100755 index 000000000000..d0f6fe4bd169 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java @@ -0,0 +1,1403 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.utilities.BitCount; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Formatter; +import java.util.Locale; +import java.util.NoSuchElementException; + +/** + * An {@link IntSet} implementation, representing a set of integers, based on an + * uncompressed bitmap. + *

+ * It actually is an extension of {@link BitSet}. More specifically, union and + * intersection operations are mainly derived from the code of {@link BitSet} to + * provide bitwise "or" and "and". + *

+ * The iterator implemented for this class allows for modifications during the + * iteration, that is it is possible to add/remove elements through + * {@link #add(int)}, {@link #remove(int)}, {@link #addAll(IntSet)}, + * {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the last + * visited one. + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class FastSet extends AbstractIntSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 6519808981110513440L; + + /** + * number of bits within each word + */ + private final static int WORD_SIZE = 32; + + /** + * 32-bit string of all 1's + */ + private static final int ALL_ONES_WORD = 0xFFFFFFFF; + + /** + * all bits, grouped in blocks of length 32 + */ + private int[] words; + + /** + * index of the first empty word, that is the number of words in the logical + * size of this {@link FastSet} + */ + private transient int firstEmptyWord; + + /** + * cached set size (only for fast size() call). When -1, the cache is invalid + */ + private transient int size; + + /** + * Creates a new, empty set. + */ + public FastSet() + { + clear(); + } + + /** + * Creates a new, empty set. It preallocates the space for + * maxWordsInUse words. + */ + private FastSet(int wordsToAllocate) + { + firstEmptyWord = 0; + size = 0; + words = new int[wordsToAllocate]; + } + + /** + * Given a number, it returns the multiplication by the number of bits for each block + */ + private static int multiplyByWordSize(int i) + { + return i << 5; // i * WORD_SIZE; + } + + /** + * Given a bit index, it returns the index of the word containing it + */ + private static int wordIndex(int bitIndex) + { + if (bitIndex < 0) { + throw new IndexOutOfBoundsException("index < 0: " + bitIndex); + } + return bitIndex >> 5; + } + + /** + * Given a bit index, it returns the index of the word containing it + */ + private static int wordIndexNoCheck(int bitIndex) + { + return bitIndex >> 5; + } + + /** + * Generates the 32-bit binary representation of a given word (debug only) + * + * @param word word to represent + * + * @return 32-character string that represents the given word + */ + private static String toBinaryString(int word) + { + String lsb = Integer.toBinaryString(word); + StringBuilder pad = new StringBuilder(); + for (int i = lsb.length(); i < 32; i++) { + pad.append('0'); + } + return pad.append(lsb).toString(); + } + + /** + * Sets the field {@link #firstEmptyWord} with the logical size in words of the + * bit set. + */ + private void fixFirstEmptyWord() + { + int i = firstEmptyWord - 1; + final int[] localWords = words; // faster + while (i >= 0 && localWords[i] == 0) { + i--; + } + firstEmptyWord = i + 1; + } + + /** + * Ensures that the {@link FastSet} can hold enough words. + * + * @param wordsRequired the minimum acceptable number of words. + */ + private void ensureCapacity(int wordsRequired) + { + if (words.length >= wordsRequired) { + return; + } + int newLength = Math.max(words.length << 1, wordsRequired); + words = Arrays.copyOf(words, newLength); + } + + /** + * Ensures that the {@link FastSet} can accommodate a given word index + * + * @param wordIndex the index to be accommodated. + */ + private void expandTo(int wordIndex) + { + int wordsRequired = wordIndex + 1; + if (firstEmptyWord < wordsRequired) { + ensureCapacity(wordsRequired); + firstEmptyWord = wordsRequired; + } + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + FastSet res = new FastSet(); + res.firstEmptyWord = firstEmptyWord; + res.size = size; + res.words = Arrays.copyOf(words, firstEmptyWord); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + final int[] localWords = words; // faster + for (int i = 0; i < firstEmptyWord; i++) { + h = (h << 5) - h + localWords[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof FastSet)) { + return super.equals(obj); + } + + final FastSet other = (FastSet) obj; + if (firstEmptyWord != other.firstEmptyWord) { + return false; + } + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + for (int i = 0; i < firstEmptyWord; i++) { + if (localWords[i] != localOtherWords[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return firstEmptyWord == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + // check if the cached size is invalid + if (size < 0) { + size = BitCount.count(words, firstEmptyWord); + } + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int i) + { + int wordIndex = wordIndex(i); + expandTo(wordIndex); + int before = words[wordIndex]; + words[wordIndex] |= (1 << i); + if (before != words[wordIndex]) { + if (size >= 0) { + size++; + } + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int i) + { + if (i < 0) { + return false; + } + + int wordIndex = wordIndex(i); + if (wordIndex >= firstEmptyWord) { + return false; + } + int before = words[wordIndex]; + words[wordIndex] &= ~(1 << i); + if (before != words[wordIndex]) { + if (size >= 0) { + size--; + } + fixFirstEmptyWord(); + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty() || this == c) { + return false; + } + + final FastSet other = convert(c); + + int wordsInCommon = Math.min(firstEmptyWord, other.firstEmptyWord); + + boolean modified = false; + if (firstEmptyWord < other.firstEmptyWord) { + modified = true; + ensureCapacity(other.firstEmptyWord); + firstEmptyWord = other.firstEmptyWord; + } + + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + // Perform logical OR on words in common + for (int i = 0; i < wordsInCommon; i++) { + int before = localWords[i]; + localWords[i] |= localOtherWords[i]; + modified = modified || before != localWords[i]; + } + + // Copy any remaining words + if (wordsInCommon < other.firstEmptyWord) { + modified = true; + System.arraycopy( + other.words, wordsInCommon, words, + wordsInCommon, firstEmptyWord - wordsInCommon + ); + } + if (modified) { + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (c == this) { + clear(); + return true; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + // Perform logical (a & !b) on words in common + boolean modified = false; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + int before = localWords[i]; + localWords[i] &= ~localOtherWords[i]; + modified = modified || before != localWords[i]; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (isEmpty() || c == this) { + return false; + } + if (c == null || c.isEmpty()) { + clear(); + return true; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + boolean modified = false; + if (firstEmptyWord > other.firstEmptyWord) { + modified = true; + while (firstEmptyWord > other.firstEmptyWord) { + localWords[--firstEmptyWord] = 0; + } + } + + // Perform logical AND on words in common + for (int i = 0; i < firstEmptyWord; i++) { + int before = localWords[i]; + localWords[i] &= localOtherWords[i]; + modified = modified || before != localWords[i]; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + words = new int[10]; + firstEmptyWord = 0; + size = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int i) + { + if (isEmpty() || i < 0) { + return false; + } + int wordIndex = wordIndexNoCheck(i); + return (wordIndex < firstEmptyWord) + && ((words[wordIndex] & (1 << i)) != 0); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final FastSet other = convert(c); + + if (other.firstEmptyWord > firstEmptyWord) { + return false; + } + + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + for (int i = 0; i < other.firstEmptyWord; i++) { + int o = localOtherWords[i]; + if ((localWords[i] & o) != o) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (this == c) { + return size() >= minElements; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + int count = 0; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + count += BitCount.count(localWords[i] & localOtherWords[i]); + if (count >= minElements) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + if ((localWords[i] & localOtherWords[i]) != 0) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + if (c == null || c.isEmpty()) { + return 0; + } + if (c == this) { + return size(); + } + if (isEmpty()) { + return 0; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + int count = 0; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + count += BitCount.count(localWords[i] & localOtherWords[i]); + } + return count; + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new BitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new ReverseBitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return multiplyByWordSize(firstEmptyWord - 1) + + (WORD_SIZE - Integer.numberOfLeadingZeros(words[firstEmptyWord - 1])) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + if (size > 0) { + size = last() - size + 1; + } + int lastWordMask = ALL_ONES_WORD >>> Integer.numberOfLeadingZeros(words[firstEmptyWord - 1]); + final int[] localWords = words; // faster + for (int i = 0; i < firstEmptyWord - 1; i++) { + localWords[i] ^= ALL_ONES_WORD; + } + localWords[firstEmptyWord - 1] ^= lastWordMask; + fixFirstEmptyWord(); + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet complemented() + { + FastSet clone = clone(); + clone.complement(); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet empty() + { + return new FastSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (double) firstEmptyWord / size(); + } + + /** + * Convert a given collection to a {@link FastSet} instance + */ + private FastSet convert(IntSet c) + { + if (c instanceof FastSet) { + return (FastSet) c; + } + if (c == null) { + return new FastSet(); + } + + FastSet res = new FastSet(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet convert(Collection c) + { + FastSet res = empty(); + if (c != null) { + for (int i : c) { + res.add(i); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet convert(int... a) + { + FastSet res = new FastSet(); + if (a != null) { + for (int i : a) { + res.add(i); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int fromIndex, int toIndex) + { + if (fromIndex > toIndex) { + throw new IndexOutOfBoundsException( + "fromIndex: " + fromIndex + + " > toIndex: " + toIndex + ); + } + if (fromIndex == toIndex) { + add(fromIndex); + return; + } + + // Increase capacity if necessary + int startWordIndex = wordIndex(fromIndex); + int endWordIndex = wordIndex(toIndex); + expandTo(endWordIndex); + + final int[] localWords = words; // faster + + boolean modified = false; + int firstWordMask = ALL_ONES_WORD << fromIndex; + int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); + if (startWordIndex == endWordIndex) { + // Case 1: One word + int before = localWords[startWordIndex]; + localWords[startWordIndex] |= (firstWordMask & lastWordMask); + modified = localWords[startWordIndex] != before; + } else { + // Case 2: Multiple words + // Handle first word + int before = localWords[startWordIndex]; + localWords[startWordIndex] |= firstWordMask; + modified = localWords[startWordIndex] != before; + + // Handle intermediate words, if any + for (int i = startWordIndex + 1; i < endWordIndex; i++) { + modified = modified || localWords[i] != ALL_ONES_WORD; + localWords[i] = ALL_ONES_WORD; + } + + // Handle last word + before = localWords[endWordIndex]; + localWords[endWordIndex] |= lastWordMask; + modified = modified || localWords[endWordIndex] != before; + } + if (modified) { + size = -1; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int fromIndex, int toIndex) + { + if (fromIndex > toIndex) { + throw new IndexOutOfBoundsException( + "fromIndex: " + fromIndex + + " > toIndex: " + toIndex + ); + } + if (fromIndex == toIndex) { + remove(fromIndex); + return; + } + + int startWordIndex = wordIndex(fromIndex); + if (startWordIndex >= firstEmptyWord) { + return; + } + + int endWordIndex = wordIndex(toIndex); + if (endWordIndex >= firstEmptyWord) { + toIndex = last(); + endWordIndex = firstEmptyWord - 1; + } + + final int[] localWords = words; // faster + + boolean modified = false; + int firstWordMask = ALL_ONES_WORD << fromIndex; + int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); + if (startWordIndex == endWordIndex) { + // Case 1: One word + int before = localWords[startWordIndex]; + localWords[startWordIndex] &= ~(firstWordMask & lastWordMask); + modified = localWords[startWordIndex] != before; + } else { + // Case 2: Multiple words + // Handle first word + int before = localWords[startWordIndex]; + localWords[startWordIndex] &= ~firstWordMask; + modified = localWords[startWordIndex] != before; + + // Handle intermediate words, if any + for (int i = startWordIndex + 1; i < endWordIndex; i++) { + modified = modified || localWords[i] != 0; + localWords[i] = 0; + } + + // Handle last word + before = localWords[endWordIndex]; + localWords[endWordIndex] &= ~lastWordMask; + modified = modified || localWords[endWordIndex] != before; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + int wordIndex = wordIndex(e); + expandTo(wordIndex); + int mask = (1 << e); + words[wordIndex] ^= mask; + fixFirstEmptyWord(); + if (size >= 0) { + if ((words[wordIndex] & mask) == 0) { + size--; + } else { + size++; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + // empty set cases + if (this.isEmpty() && o.isEmpty()) { + return 0; + } + if (this.isEmpty()) { + return -1; + } + if (o.isEmpty()) { + return 1; + } + + final FastSet other = convert(o); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + if (firstEmptyWord > other.firstEmptyWord) { + return 1; + } + if (firstEmptyWord < other.firstEmptyWord) { + return -1; + } + for (int i = firstEmptyWord - 1; i >= 0; i--) { + long w1 = localWords[i] & 0xFFFFFFFFL; + long w2 = localOtherWords[i] & 0xFFFFFFFFL; + int res = w1 < w2 ? -1 : (w1 > w2 ? 1 : 0); + if (res != 0) { + return res; + } + } + return 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int index) + { + if (index < 0) { + throw new IndexOutOfBoundsException(); + } + + int count = 0; + final int[] localWords = words; // faster + for (int j = 0; j < firstEmptyWord; j++) { + int w = localWords[j]; + int current = BitCount.count(w); + if (index < count + current) { + int bit = -1; + for (int skip = index - count; skip >= 0; skip--) { + bit = Integer.numberOfTrailingZeros(w & (ALL_ONES_WORD << (bit + 1))); + } + return multiplyByWordSize(j) + bit; + } + count += current; + } + throw new NoSuchElementException(); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + if (isEmpty()) { + return -1; + } + + int index = wordIndex(e); + if (index >= firstEmptyWord || (words[index] & (1 << e)) == 0) { + return -1; + } + int count = BitCount.count(words, index); + count += BitCount.count(words[index] & ~(ALL_ONES_WORD << e)); + return count; + + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (other == this) { + return clone(); + } + + final FastSet o = convert(other); + FastSet res = new FastSet(Math.min(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < res.firstEmptyWord; i++) { + localResWords[i] = localWords[i] & localOtherWords[i]; + } + res.fixFirstEmptyWord(); + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet union(IntSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + + final FastSet o = convert(other); + if (isEmpty()) { + return o.clone(); + } + + FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < wordsInCommon; i++) { + localResWords[i] = localWords[i] | localOtherWords[i]; + } + + if (wordsInCommon < firstEmptyWord) { + System.arraycopy( + localWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } + if (wordsInCommon < o.firstEmptyWord) { + System.arraycopy( + localOtherWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet difference(IntSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (other == this || isEmpty()) { + return empty(); + } + + final FastSet o = convert(other); + FastSet res = new FastSet(firstEmptyWord); + res.firstEmptyWord = firstEmptyWord; + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + int i = 0; + final int m = Math.min(firstEmptyWord, o.firstEmptyWord); + for (; i < m; i++) { + localResWords[i] = localWords[i] & ~localOtherWords[i]; + } + if (i < firstEmptyWord) { + System.arraycopy(localWords, i, localResWords, i, firstEmptyWord - i); + } else { + res.fixFirstEmptyWord(); + } + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet symmetricDifference(IntSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (other == this) { + return empty(); + } + + final FastSet o = convert(other); + if (isEmpty()) { + return o.clone(); + } + + FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < wordsInCommon; i++) { + localResWords[i] = localWords[i] ^ localOtherWords[i]; + } + + if (wordsInCommon < firstEmptyWord) { + System.arraycopy( + localWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } else if (wordsInCommon < o.firstEmptyWord) { + System.arraycopy( + localOtherWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } else { + res.fixFirstEmptyWord(); + } + res.size = -1; + return res; + } + + /** + * Save the state of the {@link ConciseSet}instance to a stream + */ + private void writeObject(ObjectOutputStream s) throws IOException + { + assert words != null; + if (firstEmptyWord < words.length) { + words = Arrays.copyOf(words, firstEmptyWord); + } + s.defaultWriteObject(); + } + + /** + * Reconstruct the {@link ConciseSet} instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + firstEmptyWord = words.length; + size = -1; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); + final Formatter f = new Formatter(s, Locale.ENGLISH); + + if (isEmpty()) { + return s.append("null\n").toString(); + } + + // elements + f.format("Elements: %s\n", toString()); + + // raw representation of words + for (int i = 0; i < firstEmptyWord; i++) { + f.format( + "words[%d] = %s (from %d to %d)\n", + Integer.valueOf(i), + toBinaryString(words[i]), + Integer.valueOf(multiplyByWordSize(i)), + Integer.valueOf(multiplyByWordSize(i + 1) - 1) + ); + } + + // object attributes + f.format("wordsInUse: %d\n", firstEmptyWord); + f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); + f.format("words.length: %d\n", words.length); + + // compression + f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); + f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); + + return s.toString(); + } + + /** + * Iterates over bits + *

+ * This iterator allows for modifications during the iteration, that is it + * is possible to add/remove elements through {@link #add(int)}, + * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the + * last visited one. + */ + private class BitIterator implements IntIterator + { + private int nextIndex; + private int nextBit; + private int last; + + /** + * identify the first bit + */ + private BitIterator() + { + nextIndex = 0; + if (isEmpty()) { + return; + } + + last = -1; // unused! + + // find the first non-empty word + while (words[nextIndex] == 0) { + nextIndex++; + } + + // find the first set bit + nextBit = Integer.numberOfTrailingZeros(words[nextIndex]); + } + + /** + * find the first set bit after nextIndex + nextBit + */ + void prepareNext() + { + // find the next set bit within the current word + int w = words[nextIndex]; + while ((++nextBit < WORD_SIZE)) { + if ((w & (1 << nextBit)) != 0) { + return; + } + } + + // find the first non-empty word + do { + if (++nextIndex == firstEmptyWord) { + return; + } + } while ((w = words[nextIndex]) == 0); + nextBit = Integer.numberOfTrailingZeros(w); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean hasNext() + { + return nextIndex < firstEmptyWord; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + last = multiplyByWordSize(nextIndex) + nextBit; + prepareNext(); + return last; + } + + /** + * {@inheritDoc} + */ + @Override + public void skipAllBefore(int element) + { + if (element <= 0 || element <= last) { + return; + } + + // identify where the element is + int newNextIndex = wordIndexNoCheck(element); + int newNextBit = element & (WORD_SIZE - 1); + if (newNextIndex < nextIndex || (newNextIndex == nextIndex && newNextBit <= nextBit)) { + return; + } + + // "element" is the next item to return, unless it does not exist + nextIndex = newNextIndex; + if (nextIndex >= firstEmptyWord) { + return; + } + nextBit = newNextBit; + if ((words[nextIndex] & (1 << nextBit)) == 0) { + prepareNext(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void remove() + { + FastSet.this.remove(last); + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.nextIndex = nextIndex; + retVal.nextBit = nextBit; + retVal.last = last; + return retVal; + } + } + + /** + * Iterates over bits in reverse order + *

+ * This iterator allows for modifications during the iteration, that is it + * is possible to add/remove elements through {@link #add(int)}, + * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the + * last visited one. + */ + private class ReverseBitIterator implements IntIterator + { + private int nextIndex; + private int nextBit; + private int last; + + /** + * identify the first bit + */ + private ReverseBitIterator() + { + nextIndex = firstEmptyWord - 1; + if (isEmpty()) { + return; + } + + last = Integer.MAX_VALUE; // unused! + nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(words[nextIndex]) - 1; + } + + /** + * find the first set bit after nextIndex + nextBit + */ + void prepareNext() + { + // find the next set bit within the current word + int w = words[nextIndex]; + while ((--nextBit >= 0)) { + if ((w & (1 << nextBit)) != 0) { + return; + } + } + + // find the first non-empty word + do { + if (--nextIndex == -1) { + return; + } + } while ((w = words[nextIndex]) == 0); + nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(w) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean hasNext() + { + return nextIndex >= 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + last = multiplyByWordSize(nextIndex) + nextBit; + prepareNext(); + return last; + } + + /** + * {@inheritDoc} + */ + @Override + public void skipAllBefore(int element) + { + if (element < 0) { + nextIndex = -1; + return; + } + if (element >= last) { + return; + } + + // identify where the element is + int newNextIndex = wordIndexNoCheck(element); + int newNextBit = element & (WORD_SIZE - 1); + if (newNextIndex > nextIndex || (newNextIndex == nextIndex && newNextBit >= nextBit)) { + return; + } + + // "element" is the next item to return, unless it does not exist + nextIndex = newNextIndex; + nextBit = newNextBit; + if ((words[nextIndex] & (1 << nextBit)) == 0) { + prepareNext(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void remove() + { + FastSet.this.remove(last); + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.nextIndex = nextIndex; + retVal.nextBit = nextBit; + retVal.last = last; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java new file mode 100755 index 000000000000..e81434a890bb --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java @@ -0,0 +1,1012 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + +// update CompactIdentityHashSet.java, UniqueSet.java and +// SoftHashMapIndex.java accordingly. + +import io.druid.extendedset.utilities.IntHashCode; + +import java.util.Arrays; +import java.util.Collection; +import java.util.ConcurrentModificationException; +import java.util.NoSuchElementException; + +/** + * Implements a fast hash-set. + *

+ * Inspired by http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ + * ontopia/utils/CompactHashSet.java + * + * @author Alessandro Colantonio + * @version $Id: HashIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public class HashIntSet extends AbstractIntSet +{ + protected final static int INITIAL_SIZE = 3; + protected final static double LOAD_FACTOR = 0.75D; + + /** + * empty cell + */ + protected final static int EMPTY = -1; + + /** + * When an object is deleted this object is put into the hashtable in its + * place, so that other objects with the same key (collisions) further down + * the hashtable are not lost after we delete an object in the collision + * chain. + */ + protected final static int REMOVED = -2; + + /** + * number of elements + */ + protected int size; + + /** + * This is the number of empty cells. It's not necessarily the same as + * objects.length - elements, because some cells may contain REMOVED. + */ + protected int freecells; + + /** + * cells + */ + protected int[] cells; + + /** + * concurrent modification during iteration + */ + protected int modCount; + + /** + * Constructs a new, empty set. + */ + public HashIntSet() + { + this(INITIAL_SIZE); + } + + /** + * Constructs a new, empty set. + * + * @param initialSize + */ + public HashIntSet(int initialSize) + { + if (initialSize <= 0) { + throw new IllegalArgumentException(); + } + cells = new int[initialSize]; + modCount = 0; + clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new SortedIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new DescendingSortedIterator(); + } + + /** + * Similar to {@link #iterator()}, but with no particular order + * + * @return iterator with no sorting + */ + public IntIterator unsortedIterator() + { + return new UnsortedIterator(); + } + + /** + * Returns the number of elements in this set (its cardinality). + */ + @Override + public int size() + { + return size; + } + + /** + * Returns true if this set contains no elements. + */ + @Override + public boolean isEmpty() + { + return size == 0; + } + + /** + * Compute the index of the element + * + * @param o element to search + * + * @return index of the element in {@link #cells} + */ + private final int toIndex(int o) + { + return (o & 0x7FFFFFFF) % cells.length; + } + + /** + * Find position of the integer in {@link #cells}. If not found, returns the + * first empty cell. + * + * @param element element to search + * + * @return if returned value >=0, it returns the index of the + * element; if returned value <0, the index of the + * first empty cell is -(returned value - 1) + */ + private int findElementOrEmpty(int element) + { + assert element >= 0; + int index = toIndex(IntHashCode.hashCode(element)); + int offset = 1; + + while (cells[index] != EMPTY) { + // element found! + if (cells[index] == element) { + return index; + } + + // compute the next index to check + index = toIndex(index + offset); + offset <<= 1; + offset++; + if (offset < 0) { + offset = 2; + } + } + + // element not found! + return -(index + 1); + } + + /** + * Find position of the integer in {@link #cells}. If not found, returns the + * first removed cell. + * + * @param element element to search + * + * @return if returned value >=0, it returns the index of the + * element; if returned value <0, the index of the + * first empty cell is -(returned value - 1) + */ + private int findElementOrRemoved(int element) + { + assert element >= 0; + int index = toIndex(IntHashCode.hashCode(element)); + int offset = 1; + int removed = -1; + + while (cells[index] != EMPTY) { + // element found! + if (cells[index] == element) { + return index; + } + + // remember the last removed cell if we don't find the element + if (cells[index] == REMOVED) { + removed = index; + } + + index = toIndex(index + offset); + offset <<= 1; + offset++; + if (offset < 0) { + offset = 2; + } + } + if (removed >= 0) { + return -(removed + 1); + } + return index; + } + + /** + * Returns true if this set contains the specified element. + * + * @param element element whose presence in this set is to be tested. + * + * @return true if this set contains the specified element. + */ + @Override + public boolean contains(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + if (isEmpty()) { + return false; + } + return findElementOrEmpty(element) >= 0; + } + + /** + * Adds the specified element to this set if it is not already present. + * + * @param element element to be added to this set. + * + * @return true if the set did not already contain the specified + * element. + */ + @Override + public boolean add(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + int index = findElementOrRemoved(element); + if (index >= 0) { + if (cells[index] == element) { + return false; + } + freecells--; + } else { + index = -(index + 1); + } + + modCount++; + size++; + + // set the integer + cells[index] = element; + + // do we need to rehash? + if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { + rehash(); + } + return true; + } + + /** + * Removes the specified element from the set. + */ + @Override + public boolean remove(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + int index = findElementOrEmpty(element); + if (index < 0) { + return false; + } + + cells[index] = REMOVED; + modCount++; + size--; + return true; + } + + /** + * Removes all of the elements from this set. + */ + @Override + public void clear() + { + size = 0; + Arrays.fill(cells, EMPTY); + freecells = cells.length; + modCount++; + } + + /** + * Figures out correct size for rehashed set, then does the rehash. + */ + protected void rehash() + { + // do we need to increase capacity, or are there so many + // deleted objects hanging around that rehashing to the same + // size is sufficient? if 5% (arbitrarily chosen number) of + // cells can be freed up by a rehash, we do it. + + int gargagecells = cells.length - (size + freecells); + if ((double) gargagecells / cells.length > 0.05D) + // rehash with same size + { + rehash(cells.length); + } else + // rehash with increased capacity + { + rehash((cells.length << 1) + 1); + } + } + + /** + * Rehashes to a bigger size. + */ + protected void rehash(int newCapacity) + { + HashIntSet rehashed = new HashIntSet(newCapacity); + @SuppressWarnings("hiding") + int[] cells = rehashed.cells; + for (int element : this.cells) { + if (element < 0) + // removed or empty + { + continue; + } + + // add the element + cells[-(rehashed.findElementOrEmpty(element) + 1)] = element; + } + this.cells = cells; + freecells = newCapacity - size; + modCount++; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = false; + while (itr.hasNext()) { + res |= add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = false; + while (itr.hasNext()) { + res |= remove(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + boolean res = false; + for (int i = 0; i < cells.length; i++) { + if (cells[i] >= 0 && !c.contains(cells[i])) { + cells[i] = REMOVED; + res = true; + size--; + } + } + if (res) { + modCount++; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet clone() + { + HashIntSet cloned = new HashIntSet(cells.length); + System.arraycopy(cells, 0, cloned.cells, 0, cells.length); + cloned.freecells = freecells; + cloned.size = size; + cloned.modCount = 0; + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return cells.length / Math.ceil(last() / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : (double) cells.length / size(); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet complemented() + { + return (HashIntSet) super.complemented(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = true; + while (res && itr.hasNext()) { + res &= contains(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = true; + while (res && itr.hasNext()) { + if (contains(itr.next())) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (minElements > 0 && itr.hasNext()) { + if (contains(itr.next())) { + minElements--; + } + } + return minElements == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet convert(int... a) + { + HashIntSet res = new HashIntSet((int) (a.length / LOAD_FACTOR) + 1); + for (int e : a) { + res.add(e); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet convert(Collection c) + { + HashIntSet res = new HashIntSet((int) (c.size() / LOAD_FACTOR) + 1); + for (int e : c) { + res.add(e); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return "size: " + size + ", freecells: " + freecells + ", " + + Arrays.toString(cells); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet symmetricDifference(IntSet c) + { + HashIntSet res = clone(); + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (itr.hasNext()) { + res.flip(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet union(IntSet other) + { + return (HashIntSet) super.union(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet difference(IntSet other) + { + return (HashIntSet) super.difference(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet intersection(IntSet other) + { + return (HashIntSet) super.intersection(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet empty() + { + return new HashIntSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + modCount++; + int index = findElementOrRemoved(element); + if (index >= 0) { + // REMOVE + if (cells[index] == element) { + cells[index] = REMOVED; + size--; + return; + } + freecells--; + } else { + index = -(index + 1); + } + + // ADD + cells[index] = element; + size++; + + // do we need to rehash? + if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { + rehash(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + return toArray()[i]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + return Arrays.binarySearch(toArray(), e); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + int res = 0; + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (itr.hasNext()) { + if (contains(itr.next())) { + res++; + } + } + return res; + + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + int max = 0; + for (int element : cells) { + if (max < element) { + max = element; + } + } + return max; + } + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + int min = Integer.MAX_VALUE; + for (int element : cells) { + if (element >= 0 && min > element) { + min = element; + } + } + return min; + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) + { + if (a.length < size) { + throw new IllegalArgumentException(); + } + if (isEmpty()) { + return a; + } + int i = 0; + for (int element : this.cells) { + if (element < 0) + // removed or empty + { + continue; + } + + // copy the element + a[i++] = element; + } + Arrays.sort(a, 0, size); + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return Arrays.toString(toArray()); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + int h = 1; + for (int e : cells) { + if (e >= 0) { + h ^= IntHashCode.hashCode(e); + } + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof HashIntSet)) { + return super.equals(obj); + } + final HashIntSet other = (HashIntSet) obj; + if (size != other.size) { + return false; + } + for (int e : other.cells) { + if (e >= 0 && !contains(e)) { + return false; + } + } + return true; + } + + /** + * Iterates over the hashset, with no sorting + */ + private class UnsortedIterator implements IntIterator + { + private int nextIndex = 0; + private int current = -1; + private int expectedModCount = modCount; + + public UnsortedIterator() + { + nextIndex = 0; + skipEmpty(); + expectedModCount = modCount; + } + + void skipEmpty() + { + while (nextIndex < cells.length + && (cells[nextIndex] == EMPTY || cells[nextIndex] == REMOVED)) { + nextIndex++; + } + } + + @Override + public boolean hasNext() + { + return nextIndex < cells.length; + } + + @Override + public int next() + { + if (modCount != expectedModCount) { + throw new ConcurrentModificationException(); + } + if (nextIndex >= cells.length) { + throw new NoSuchElementException(); + } + + current = nextIndex; + nextIndex++; + skipEmpty(); + return cells[current]; + } + + @Override + public void remove() + { + if (modCount != expectedModCount) { + throw new ConcurrentModificationException(); + } + if (current < 0) { + throw new IllegalStateException(); + } + // delete object + cells[current] = REMOVED; + size--; + modCount++; + expectedModCount = modCount; // this is expected! + current = -1; + } + + @Override + public void skipAllBefore(int element) + { + throw new UnsupportedOperationException(); + } + + @Override + public IntIterator clone() + { + UnsortedIterator retVal = new UnsortedIterator(); + retVal.nextIndex = nextIndex; + retVal.current = current; + retVal.expectedModCount = expectedModCount; + return retVal; + } + } + + /** + * Iterates over the hashset, with no sorting + */ + private class SortedIterator implements IntIterator + { + int[] elements = toArray(); + int next = 0; + + @Override + public boolean hasNext() + { + return next < size; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next++]; + } + + @Override + public void remove() + { + if (elements[next - 1] == REMOVED) { + throw new IllegalStateException(); + } + HashIntSet.this.remove(elements[next - 1]); + elements[next - 1] = REMOVED; + } + + @Override + public void skipAllBefore(int element) + { + if (element <= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, next + 1, size, element); + if (next < 0) { + next = -(next + 1); + } + } + + @Override + public IntIterator clone() + { + SortedIterator retVal = new SortedIterator(); + retVal.next = next; + retVal.elements = elements.clone(); + return retVal; + } + } + + /** + * Iterates over the hashset, with no sorting + */ + private class DescendingSortedIterator implements IntIterator + { + int[] elements = toArray(); + int next = size - 1; + + @Override + public boolean hasNext() + { + return next >= 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next--]; + } + + @Override + public void remove() + { + if (elements[next + 1] == REMOVED) { + throw new IllegalStateException(); + } + HashIntSet.this.remove(elements[next + 1]); + elements[next + 1] = REMOVED; + } + + @Override + public void skipAllBefore(int element) + { + if (element >= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, 0, next, element); + if (next < 0) { + next = -(next + 1) - 1; + } + } + + @Override + public IntIterator clone() + { + DescendingSortedIterator retVal = new DescendingSortedIterator(); + retVal.elements = elements.clone(); + retVal.next = next; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java new file mode 100755 index 000000000000..6dbf8bb1d0d8 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java @@ -0,0 +1,1157 @@ +/* +* Copyright 2012 Metamarkets Group Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package io.druid.extendedset.intset; + + +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.google.common.collect.MinMaxPriorityQueue; +import com.google.common.collect.UnmodifiableIterator; +import com.google.common.primitives.Ints; +import io.druid.extendedset.utilities.IntList; + +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +public class ImmutableConciseSet +{ + private final static int CHUNK_SIZE = 10000; + private final IntBuffer words; + private final int lastWordIndex; + private final int size; + + public ImmutableConciseSet() + { + this.words = null; + this.lastWordIndex = -1; + this.size = 0; + } + + public ImmutableConciseSet(ByteBuffer byteBuffer) + { + this.words = byteBuffer.asIntBuffer(); + this.lastWordIndex = words.capacity() - 1; + this.size = calcSize(); + } + + public ImmutableConciseSet(IntBuffer buffer) + { + this.words = buffer; + this.lastWordIndex = (words == null || buffer.capacity() == 0) ? -1 : words.capacity() - 1; + this.size = calcSize(); + } + + public static ImmutableConciseSet newImmutableFromMutable(ConciseSet conciseSet) + { + if (conciseSet == null || conciseSet.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(conciseSet.getWords())); + } + + public static int compareInts(int x, int y) + { + return (x < y) ? -1 : ((x == y) ? 0 : 1); + } + + public static ImmutableConciseSet union(ImmutableConciseSet... sets) + { + return union(Arrays.asList(sets)); + } + + public static ImmutableConciseSet union(Iterable sets) + { + return union(sets.iterator()); + } + + public static ImmutableConciseSet union(Iterator sets) + { + ImmutableConciseSet partialResults = doUnion(Iterators.limit(sets, CHUNK_SIZE)); + while (sets.hasNext()) { + final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); + partialResults = doUnion(Iterators.concat(partialIter, Iterators.limit(sets, CHUNK_SIZE))); + } + return partialResults; + } + + public static ImmutableConciseSet intersection(ImmutableConciseSet... sets) + { + return intersection(Arrays.asList(sets)); + } + + public static ImmutableConciseSet intersection(Iterable sets) + { + return intersection(sets.iterator()); + } + + public static ImmutableConciseSet intersection(Iterator sets) + { + ImmutableConciseSet partialResults = doIntersection(Iterators.limit(sets, CHUNK_SIZE)); + while (sets.hasNext()) { + final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); + partialResults = doIntersection( + Iterators.concat(Iterators.limit(sets, CHUNK_SIZE), partialIter) + ); + } + return partialResults; + } + + public static ImmutableConciseSet complement(ImmutableConciseSet set) + { + return doComplement(set); + } + + public static ImmutableConciseSet complement(ImmutableConciseSet set, int length) + { + if (length <= 0) { + return new ImmutableConciseSet(); + } + + // special case when the set is empty and we need a concise set of ones + if (set == null || set.isEmpty()) { + final int leftoverBits = length % 31; + final int onesBlocks = length / 31; + final int[] words; + if (onesBlocks > 0) { + if (leftoverBits > 0) { + words = new int[]{ + ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1), + ConciseSetUtils.onesUntil(leftoverBits) + }; + } else { + words = new int[]{ + ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1) + }; + } + } else { + if (leftoverBits > 0) { + words = new int[]{ConciseSetUtils.onesUntil(leftoverBits)}; + } else { + words = new int[]{}; + } + } + ConciseSet newSet = new ConciseSet(words, false); + return ImmutableConciseSet.newImmutableFromMutable(newSet); + } + + IntList retVal = new IntList(); + int endIndex = length - 1; + + int wordsWalked = 0; + int last = 0; + + WordIterator iter = set.newWordIterator(); + + while (iter.hasNext()) { + int word = iter.next(); + wordsWalked = iter.wordsWalked; + if (ConciseSetUtils.isLiteral(word)) { + retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); + } else { + retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); + } + } + + last = set.getLast(); + + int distFromLastWordBoundary = ConciseSetUtils.maxLiteralLengthModulus(last); + int distToNextWordBoundary = ConciseSetUtils.MAX_LITERAL_LENGTH - distFromLastWordBoundary - 1; + last = (last < 0) ? 0 : last + distToNextWordBoundary; + + int diff = endIndex - last; + // only append a new literal when the end index is beyond the current word + if (diff > 0) { + // first check if the difference can be represented in 31 bits + if (diff <= ConciseSetUtils.MAX_LITERAL_LENGTH) { + retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); + } else { + // create a fill from last set bit to endIndex for number of 31 bit blocks minus one + int endIndexWordCount = ConciseSetUtils.maxLiteralLengthDivision(endIndex); + retVal.add(ConciseSetUtils.SEQUENCE_BIT | (endIndexWordCount - wordsWalked - 1)); + retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); + } + } + + // clear bits after last set value + int lastWord = retVal.get(retVal.length() - 1); + if (ConciseSetUtils.isLiteral(lastWord)) { + lastWord = ConciseSetUtils.clearBitsAfterInLastWord( + lastWord, + ConciseSetUtils.maxLiteralLengthModulus(endIndex) + ); + } + + retVal.set(retVal.length() - 1, lastWord); + trimZeros(retVal); + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return compact(new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()))); + } + + public static ImmutableConciseSet compact(ImmutableConciseSet set) + { + IntList retVal = new IntList(); + WordIterator itr = set.newWordIterator(); + while (itr.hasNext()) { + addAndCompact(retVal, itr.next()); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + private static void addAndCompact(IntList set, int wordToAdd) + { + int length = set.length(); + if (set.isEmpty()) { + set.add(wordToAdd); + return; + } + + int last = set.get(length - 1); + + int newWord = 0; + if (ConciseSetUtils.isAllOnesLiteral(last)) { + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = 0x40000001; + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = wordToAdd + 1; + } + } else if (ConciseSetUtils.isOneSequence(last)) { + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = last + 1; + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); + } + } else if (ConciseSetUtils.isAllZerosLiteral(last)) { + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = 0x00000001; + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = wordToAdd + 1; + } + } else if (ConciseSetUtils.isZeroSequence(last)) { + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = last + 1; + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); + } + } else if (ConciseSetUtils.isLiteralWithSingleOneBit(last)) { + int position = Integer.numberOfTrailingZeros(last) + 1; + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = 0x00000001 | (position << 25); + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = (wordToAdd + 1) | (position << 25); + } + } else if (ConciseSetUtils.isLiteralWithSingleZeroBit(last)) { + int position = Integer.numberOfTrailingZeros(~last) + 1; + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = 0x40000001 | (position << 25); + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = (wordToAdd + 1) | (position << 25); + } + } + + if (newWord != 0) { + set.set(length - 1, newWord); + } else { + set.add(wordToAdd); + } + } + + private static ImmutableConciseSet doUnion(Iterator sets) + { + IntList retVal = new IntList(); + + // lhs = current word position, rhs = the iterator + // Comparison is first by index, then one fills > literals > zero fills + // one fills are sorted by length (longer one fills have priority) + // similarily, shorter zero fills have priority + MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare(WordHolder h1, WordHolder h2) + { + int w1 = h1.getWord(); + int w2 = h2.getWord(); + int s1 = h1.getIterator().startIndex; + int s2 = h2.getIterator().startIndex; + + if (s1 != s2) { + return compareInts(s1, s2); + } + + if (ConciseSetUtils.isOneSequence(w1)) { + if (ConciseSetUtils.isOneSequence(w2)) { + return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + return -1; + } else if (ConciseSetUtils.isLiteral(w1)) { + if (ConciseSetUtils.isOneSequence(w2)) { + return 1; + } else if (ConciseSetUtils.isLiteral(w2)) { + return 0; + } + return -1; + } else { + if (!ConciseSetUtils.isZeroSequence(w2)) { + return 1; + } + return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + } + } + ).create(); + + // populate priority queue + while (sets.hasNext()) { + ImmutableConciseSet set = sets.next(); + + if (set != null && !set.isEmpty()) { + WordIterator itr = set.newWordIterator(); + theQ.add(new WordHolder(itr.next(), itr)); + } + } + + int currIndex = 0; + + while (!theQ.isEmpty()) { + // create a temp list to hold everything that will get pushed back into the priority queue after each run + List wordsToAdd = Lists.newArrayList(); + + // grab the top element from the priority queue + WordHolder curr = theQ.poll(); + int word = curr.getWord(); + WordIterator itr = curr.getIterator(); + + // if the next word in the queue starts at a different point than where we ended off we need to create a zero gap + // to fill the space + if (currIndex < itr.startIndex) { + addAndCompact(retVal, itr.startIndex - currIndex - 1); + currIndex = itr.startIndex; + } + + if (ConciseSetUtils.isOneSequence(word)) { + // extract a literal from the flip bits of the one sequence + int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); + + // advance everything past the longest ones sequence + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex < itr.wordsWalked) { + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + if (i.startIndex == itr.startIndex) { + // if a literal was created from a flip bit, OR it with other literals or literals from flip bits in the same + // position + if (ConciseSetUtils.isOneSequence(w)) { + flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + } else if (ConciseSetUtils.isLiteral(w)) { + flipBitLiteral |= w; + } else { + flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + } + } + + i.advanceTo(itr.wordsWalked); + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + nextVal = theQ.peek(); + } + + // advance longest one literal forward and push result back to priority queue + // if a flip bit is still needed, put it in the correct position + int newWord = word & 0xC1FFFFFF; + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL; + int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; + newWord |= (position << 25); + } + addAndCompact(retVal, newWord); + currIndex = itr.wordsWalked; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } else if (ConciseSetUtils.isLiteral(word)) { + // advance all other literals + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + // if we still have zero fills with flipped bits, OR them here + if (ConciseSetUtils.isLiteral(w)) { + word |= w; + } else { + int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + word |= flipBitLiteral; + i.advanceTo(itr.wordsWalked); + } + } + + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + + nextVal = theQ.peek(); + } + + // advance the set with the current literal forward and push result back to priority queue + addAndCompact(retVal, word); + currIndex++; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } else { // zero fills + int flipBitLiteral; + WordHolder nextVal = theQ.peek(); + + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + // check if literal can be created flip bits of other zero sequences + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, i)); + } else if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + nextVal = theQ.peek(); + } + + // check if a literal needs to be created from the flipped bits of this sequence + flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); + } else if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } + + theQ.addAll(wordsToAdd); + } + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + public static ImmutableConciseSet doIntersection(Iterator sets) + { + IntList retVal = new IntList(); + + // lhs = current word position, rhs = the iterator + // Comparison is first by index, then zero fills > literals > one fills + // zero fills are sorted by length (longer zero fills have priority) + // similarily, shorter one fills have priority + MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare(WordHolder h1, WordHolder h2) + { + int w1 = h1.getWord(); + int w2 = h2.getWord(); + int s1 = h1.getIterator().startIndex; + int s2 = h2.getIterator().startIndex; + + if (s1 != s2) { + return compareInts(s1, s2); + } + + if (ConciseSetUtils.isZeroSequence(w1)) { + if (ConciseSetUtils.isZeroSequence(w2)) { + return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + return -1; + } else if (ConciseSetUtils.isLiteral(w1)) { + if (ConciseSetUtils.isZeroSequence(w2)) { + return 1; + } else if (ConciseSetUtils.isLiteral(w2)) { + return 0; + } + return -1; + } else { + if (!ConciseSetUtils.isOneSequence(w2)) { + return 1; + } + return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + } + } + ).create(); + + // populate priority queue + while (sets.hasNext()) { + ImmutableConciseSet set = sets.next(); + + if (set == null || set.isEmpty()) { + return new ImmutableConciseSet(); + } + + WordIterator itr = set.newWordIterator(); + theQ.add(new WordHolder(itr.next(), itr)); + } + + int currIndex = 0; + int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE; + + while (!theQ.isEmpty()) { + // create a temp list to hold everything that will get pushed back into the priority queue after each run + List wordsToAdd = Lists.newArrayList(); + + // grab the top element from the priority queue + WordHolder curr = theQ.poll(); + int word = curr.getWord(); + WordIterator itr = curr.getIterator(); + + // if a sequence has ended, we can break out because of Boolean logic + if (itr.startIndex >= wordsWalkedAtSequenceEnd) { + break; + } + + // if the next word in the queue starts at a different point than where we ended off we need to create a one gap + // to fill the space + if (currIndex < itr.startIndex) { + // number of 31 bit blocks that compromise the fill minus one + addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1))); + currIndex = itr.startIndex; + } + + if (ConciseSetUtils.isZeroSequence(word)) { + // extract a literal from the flip bits of the zero sequence + int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); + + // advance everything past the longest zero sequence + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex < itr.wordsWalked) { + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + if (i.startIndex == itr.startIndex) { + // if a literal was created from a flip bit, AND it with other literals or literals from flip bits in the same + // position + if (ConciseSetUtils.isZeroSequence(w)) { + flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + } else if (ConciseSetUtils.isLiteral(w)) { + flipBitLiteral &= w; + } else { + flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + } + } + + i.advanceTo(itr.wordsWalked); + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + nextVal = theQ.peek(); + } + + // advance longest zero literal forward and push result back to priority queue + // if a flip bit is still needed, put it in the correct position + int newWord = word & 0xC1FFFFFF; + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; + newWord = (word & 0xC1FFFFFF) | (position << 25); + } + addAndCompact(retVal, newWord); + currIndex = itr.wordsWalked; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } else if (ConciseSetUtils.isLiteral(word)) { + // advance all other literals + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + // if we still have one fills with flipped bits, AND them here + if (ConciseSetUtils.isLiteral(w)) { + word &= w; + } else { + int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + word &= flipBitLiteral; + i.advanceTo(itr.wordsWalked); + } + } + + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + + nextVal = theQ.peek(); + } + + // advance the set with the current literal forward and push result back to priority queue + addAndCompact(retVal, word); + currIndex++; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } else { // one fills + int flipBitLiteral; + WordHolder nextVal = theQ.peek(); + + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + // check if literal can be created flip bits of other one sequences + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, i)); + } else if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + + nextVal = theQ.peek(); + } + + // check if a literal needs to be created from the flipped bits of this sequence + flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); + } else if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } + + theQ.addAll(wordsToAdd); + } + + // fill in any missing one sequences + if (currIndex < wordsWalkedAtSequenceEnd) { + addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1))); + } + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + public static ImmutableConciseSet doComplement(ImmutableConciseSet set) + { + if (set == null || set.isEmpty()) { + return new ImmutableConciseSet(); + } + + IntList retVal = new IntList(); + WordIterator iter = set.newWordIterator(); + while (iter.hasNext()) { + int word = iter.next(); + if (ConciseSetUtils.isLiteral(word)) { + retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); + } else { + retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); + } + } + // do not complement after the last element + int lastWord = retVal.get(retVal.length() - 1); + if (ConciseSetUtils.isLiteral(lastWord)) { + lastWord = ConciseSetUtils.clearBitsAfterInLastWord( + lastWord, + ConciseSetUtils.maxLiteralLengthModulus(set.getLast()) + ); + } + + retVal.set(retVal.length() - 1, lastWord); + + trimZeros(retVal); + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private static void trimZeros(IntList set) + { + // loop over ALL_ZEROS_LITERAL words + int w; + int last = set.length() - 1; + do { + w = set.get(last); + if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { + set.set(last, 0); + last--; + } else if (ConciseSetUtils.isZeroSequence(w)) { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + set.set(last, 0); + last--; + } else { + // convert the sequence in a 1-bit literal word + set.set(last, ConciseSetUtils.getLiteral(w, false)); + return; + } + } else { + // one sequence or literal + return; + } + if (set.isEmpty() || last == -1) { + return; + } + } while (true); + } + + public byte[] toBytes() + { + if (words == null) { + return new byte[]{}; + } + ByteBuffer buf = ByteBuffer.allocate(words.capacity() * Ints.BYTES); + buf.asIntBuffer().put(words.asReadOnlyBuffer()); + return buf.array(); + } + + public int getLastWordIndex() + { + return lastWordIndex; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private int calcSize() + { + int retVal = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words.get(i); + if (ConciseSetUtils.isLiteral(w)) { + retVal += ConciseSetUtils.getLiteralBitCount(w); + } else { + if (ConciseSetUtils.isZeroSequence(w)) { + if (!ConciseSetUtils.isSequenceWithNoBits(w)) { + retVal++; + } + } else { + retVal += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + if (!ConciseSetUtils.isSequenceWithNoBits(w)) { + retVal--; + } + } + } + } + + return retVal; + } + + public int size() + { + return size; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public int getLast() + { + if (isEmpty()) { + return -1; + } + + int last = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words.get(i); + if (ConciseSetUtils.isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + } + } + + int w = words.get(lastWordIndex); + if (ConciseSetUtils.isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(ConciseSetUtils.getLiteralBits(w)); + } else { + last--; + } + return last; + } + + public boolean contains(final int integer) + { + if (isEmpty()) { + return false; + } + final IntSet.IntIterator intIterator = iterator(); + intIterator.skipAllBefore(integer); + return intIterator.hasNext() && intIterator.next() == integer; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public int get(int i) + { + if (i < 0) { + throw new IndexOutOfBoundsException(); + } + + // initialize data + int firstSetBitInWord = 0; + int position = i; + int setBitsInCurrentWord = 0; + for (int j = 0; j <= lastWordIndex; j++) { + int w = words.get(j); + if (ConciseSetUtils.isLiteral(w)) { + // number of bits in the current word + setBitsInCurrentWord = ConciseSetUtils.getLiteralBitCount(w); + + // check if the desired bit is in the current word + if (position < setBitsInCurrentWord) { + int currSetBitInWord = -1; + for (; position >= 0; position--) { + currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); + } + return firstSetBitInWord + currSetBitInWord; + } + + // skip the 31-bit block + firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // number of involved bits (31 * blocks) + int sequenceLength = ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + + // check the sequence type + if (ConciseSetUtils.isOneSequence(w)) { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + setBitsInCurrentWord = sequenceLength; + if (position < setBitsInCurrentWord) { + return firstSetBitInWord + position; + } + } else { + setBitsInCurrentWord = sequenceLength - 1; + if (position < setBitsInCurrentWord) + // check whether the desired set bit is after the + // flipped bit (or after the first block) + { + return firstSetBitInWord + position + (position < ConciseSetUtils.getFlippedBit(w) ? 0 : 1); + } + } + } else { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + setBitsInCurrentWord = 0; + } else { + setBitsInCurrentWord = 1; + if (position == 0) { + return firstSetBitInWord + ConciseSetUtils.getFlippedBit(w); + } + } + } + + // skip the 31-bit blocks + firstSetBitInWord += sequenceLength; + } + + // update the number of found set bits + position -= setBitsInCurrentWord; + } + + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + + public int compareTo(ImmutableConciseSet other) + { + return words.asReadOnlyBuffer().compareTo(other.words.asReadOnlyBuffer()); + } + + private boolean isEmpty() + { + return words == null || words.limit() == 0; + } + + @Override + // Based on the AbstractIntSet implementation by Alessandro Colantonio + public String toString() + { + IntSet.IntIterator itr = iterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + sb.append(itr.next()); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public IntSet.IntIterator iterator() + { + if (isEmpty()) { + return new IntSet.IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntSet.IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new BitIterator(); + } + + public WordIterator newWordIterator() + { + return new WordIterator(); + } + + private static class WordHolder + { + private final int word; + private final WordIterator iterator; + + public WordHolder( + int word, + WordIterator iterator + ) + { + this.word = word; + this.iterator = iterator; + } + + public int getWord() + { + return word; + } + + public WordIterator getIterator() + { + return iterator; + } + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private class BitIterator implements IntSet.IntIterator + { + final ConciseSetUtils.LiteralAndZeroFillExpander litExp; + final ConciseSetUtils.OneFillExpander oneExp; + + ConciseSetUtils.WordExpander exp; + int nextIndex = 0; + int nextOffset = 0; + + private BitIterator() + { + litExp = ConciseSetUtils.newLiteralAndZeroFillExpander(); + oneExp = ConciseSetUtils.newOneFillExpander(); + + nextWord(); + } + + private BitIterator( + ConciseSetUtils.LiteralAndZeroFillExpander litExp, + ConciseSetUtils.OneFillExpander oneExp, + ConciseSetUtils.WordExpander exp, + int nextIndex, + int nextOffset + ) + { + this.litExp = litExp; + this.oneExp = oneExp; + this.exp = exp; + this.nextIndex = nextIndex; + this.nextOffset = nextOffset; + } + + @Override + public boolean hasNext() + { + while (!exp.hasNext()) { + if (nextIndex > lastWordIndex) { + return false; + } + nextWord(); + } + return true; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return exp.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllBefore(element); + if (exp.hasNext() || nextIndex > lastWordIndex) { + return; + } + nextWord(); + } + } + + @Override + public IntSet.IntIterator clone() + { + return new BitIterator( + (ConciseSetUtils.LiteralAndZeroFillExpander) litExp.clone(), + (ConciseSetUtils.OneFillExpander) oneExp.clone(), + exp.clone(), + nextIndex, + nextOffset + ); + } + + private void nextWord() + { + final int word = words.get(nextIndex++); + exp = ConciseSetUtils.isOneSequence(word) ? oneExp : litExp; + exp.reset(nextOffset, word, true); + + // prepare next offset + if (ConciseSetUtils.isLiteral(word)) { + nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(word) + 1); + } + } + } + + public class WordIterator implements Iterator + { + private int startIndex; + private int wordsWalked; + private int currWord; + private int nextWord; + private int currRow; + + private volatile boolean hasNextWord = false; + + WordIterator() + { + startIndex = -1; + wordsWalked = 0; + currRow = -1; + } + + public void advanceTo(int endCount) + { + while (hasNext() && wordsWalked < endCount) { + next(); + } + if (wordsWalked <= endCount) { + return; + } + + nextWord = (currWord & 0xC1000000) | (wordsWalked - endCount - 1); + startIndex = endCount; + hasNextWord = true; + } + + @Override + public boolean hasNext() + { + if (isEmpty()) { + return false; + } + if (hasNextWord) { + return true; + } + return currRow < (words.capacity() - 1); + } + + @Override + public Integer next() + { + if (hasNextWord) { + currWord = nextWord; + hasNextWord = false; + return new Integer(currWord); + } + + currWord = words.get(++currRow); + if (ConciseSetUtils.isLiteral(currWord)) { + startIndex = wordsWalked++; + } else { + startIndex = wordsWalked; + wordsWalked += ConciseSetUtils.getSequenceNumWords(currWord); + } + + return new Integer(currWord); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java new file mode 100755 index 000000000000..3f15daeb8289 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java @@ -0,0 +1,662 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.ExtendedSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Very similar to {@link ExtendedSet} but for the primitive int + * type. + * + * @author Alessandro Colantonio + * @version $Id: IntSet.java 135 2011-01-04 15:54:48Z cocciasik $ + * @see ArraySet + * @see ConciseSet + * @see FastSet + * @see HashIntSet + */ +public interface IntSet extends Cloneable, Comparable +{ + /** + * Generates the intersection set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(IntSet) + */ + public IntSet intersection(IntSet other); + + /** + * Generates the union set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(IntSet) + */ + public IntSet union(IntSet other); + + /** + * Generates the difference set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(IntSet) + */ + public IntSet difference(IntSet other); + + /** + * Generates the symmetric difference set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(int) + */ + public IntSet symmetricDifference(IntSet other); + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see IntSet#complement() + */ + public IntSet complemented(); + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see IntSet#complemented() + */ + public void complement(); + + /** + * Returns true if the specified {@link IntSet} + * instance contains any elements that are also contained within this + * {@link IntSet} instance + * + * @param other {@link IntSet} to intersect with + * + * @return a boolean indicating whether this {@link IntSet} + * intersects the specified {@link IntSet}. + */ + public boolean containsAny(IntSet other); + + /** + * Returns true if the specified {@link IntSet} + * instance contains at least minElements elements that are + * also contained within this {@link IntSet} instance + * + * @param other {@link IntSet} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link IntSet} instance + * + * @return a boolean indicating whether this {@link IntSet} + * intersects the specified {@link IntSet}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(IntSet other, int minElements); + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(IntSet)} and + * then {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(IntSet other); + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(IntSet)} and then + * {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(IntSet other); + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling {@link #symmetricDifference(IntSet)} + * and then {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(IntSet other); + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(IntSet)} and then + * {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(IntSet other); + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize(); + + /** + * Generates an empty set + * + * @return the empty set + */ + public IntSet empty(); + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + public IntSet clone(); + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio(); + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio(); + + /** + * @return a {@link IntIterator} instance to iterate over the set + */ + public IntIterator iterator(); + + /** + * @return a {@link IntIterator} instance to iterate over the set in + * descending order + */ + public IntIterator descendingIterator(); + + /** + * Prints debug info about the given {@link IntSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo(); + + /** + * Adds to the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void fill(int from, int to); + + /** + * Removes from the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void clear(int from, int to); + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(IntSet) + */ + public void flip(int e); + + /** + * Gets the ith element of the set + * + * @param i position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public int get(int i); + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param e element of the set + * + * @return the element position + */ + public int indexOf(int e); + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public IntSet convert(int... a); + + /** + * Converts a given collection into an instance of the current class. + * + * @param c array to use to generate the new instance + * + * @return the converted collection + */ + public IntSet convert(Collection c); + + /** + * Returns the first (lowest) element currently in this set. + * + * @return the first (lowest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int first(); + + /** + * Returns the last (highest) element currently in this set. + * + * @return the last (highest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int last(); + + /** + * @return the number of elements in this set (its cardinality) + */ + public int size(); + + /** + * @return true if this set contains no elements + */ + public boolean isEmpty(); + + /** + * Returns true if this set contains the specified element. + * + * @param i element whose presence in this set is to be tested + * + * @return true if this set contains the specified element + */ + public boolean contains(int i); + + /** + * Adds the specified element to this set if it is not already present. It + * ensures that sets never contain duplicate elements. + * + * @param i element to be added to this set + * + * @return true if this set did not already contain the specified + * element + * + * @throws IllegalArgumentException if some property of the specified element prevents it from + * being added to this set + */ + public boolean add(int i); + + /** + * Removes the specified element from this set if it is present. + * + * @param i object to be removed from this set, if present + * + * @return true if this set contained the specified element + * + * @throws UnsupportedOperationException if the remove operation is not supported by this set + */ + public boolean remove(int i); + + /** + * Returns true if this set contains all of the elements of the + * specified collection. + * + * @param c collection to be checked for containment in this set + * + * @return true if this set contains all of the elements of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements + * (optional), or if the specified collection is null + * @see #contains(int) + */ + public boolean containsAll(IntSet c); + + /** + * Adds all of the elements in the specified collection to this set if + * they're not already present. + * + * @param c collection containing elements to be added to this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements, or if + * the specified collection is null + * @throws IllegalArgumentException if some property of an element of the specified collection + * prevents it from being added to this set + * @see #add(int) + */ + public boolean addAll(IntSet c); + + /** + * Retains only the elements in this set that are contained in the specified + * collection. In other words, removes from this set all of its elements + * that are not contained in the specified collection. + * + * @param c collection containing elements to be retained in this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(int) + */ + public boolean retainAll(IntSet c); + + /** + * Removes from this set all of its elements that are contained in the + * specified collection. + * + * @param c collection containing elements to be removed from this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(int) + * @see #contains(int) + */ + public boolean removeAll(IntSet c); + + /** + * Removes all of the elements from this set. The set will be empty after + * this call returns. + * + * @throws UnsupportedOperationException if the clear method is not supported by this set + */ + public void clear(); + + /** + * @return an array containing all the elements in this set, in the same + * order. + */ + public int[] toArray(); + + /** + * Returns an array containing all of the elements in this set. + *

+ * If this set fits in the specified array with room to spare (i.e., the + * array has more elements than this set), the element in the array + * immediately following the end of the set are left unchanged. + * + * @param a the array into which the elements of this set are to be + * stored. + * + * @return the array containing all the elements in this set + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this set does not fit in the specified array + */ + public int[] toArray(int[] a); + + /** + * Computes the power-set of the current set. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The returned power-set does not contain the + * empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the integer set. + * + * @return the power-set + * + * @see #powerSet(int, int) + * @see #powerSetSize() + */ + public List powerSet(); + + /** + * Computes a subset of the power-set of the current set, composed by those + * subsets that have cardinality between min and + * max. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The power-set does not contains the empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the integer set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set + * + * @see #powerSet() + * @see #powerSetSize(int, int) + */ + public List powerSet(int min, int max); + + /** + * Computes the power-set size of the current set. + *

+ * The power-set does not contains the empty set. + * + * @return the power-set size + * + * @see #powerSet() + */ + public int powerSetSize(); + + /** + * Computes the power-set size of the current set, composed by those subsets + * that have cardinality between min and max. + *

+ * The returned power-set does not contain the empty set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set size + * + * @see #powerSet(int, int) + */ + public int powerSetSize(int min, int max); + + /** + * Computes the Jaccard similarity coefficient between this set and the + * given set. + *

+ * The coefficient is defined as + * |A intersection B| / |A union B|. + * + * @param other the other set + * + * @return the Jaccard similarity coefficient + * + * @see #jaccardDistance(IntSet) + */ + public double jaccardSimilarity(IntSet other); + + /** + * Computes the Jaccard distance between this set and the given set. + *

+ * The coefficient is defined as + * 1 - {@link #jaccardSimilarity(IntSet)}. + * + * @param other the other set + * + * @return the Jaccard distance + * + * @see #jaccardSimilarity(IntSet) + */ + public double jaccardDistance(IntSet other); + + /** + * Computes the weighted version of the Jaccard similarity coefficient + * between this set and the given set. + *

+ * The coefficient is defined as + * sum of min(A_i, B_i) / sum of max(A_i, B_i). + * + * @param other the other set + * + * @return the weighted Jaccard similarity coefficient + * + * @see #weightedJaccardDistance(IntSet) + */ + public double weightedJaccardSimilarity(IntSet other); + + /** + * Computes the weighted version of the Jaccard distance between this set + * and the given set. + *

+ * The coefficient is defined as 1 - + * {@link #weightedJaccardSimilarity(IntSet)}. + * + * @param other the other set + * + * @return the weighted Jaccard distance + * + * @see #weightedJaccardSimilarity(IntSet) + */ + public double weightedJaccardDistance(IntSet other); + + /** + * An {@link Iterator}-like interface that allows to "skip" some elements of + * the set + */ + public interface IntIterator + { + /** + * @return true if the iterator has more elements. + */ + boolean hasNext(); + + /** + * @return the next element in the iteration. + * + * @throws NoSuchElementException iteration has no more elements. + */ + int next(); + + /** + * Removes from the underlying collection the last element returned by + * the iterator (optional operation). This method can be called only + * once per call to next. The behavior of an iterator is + * unspecified if the underlying collection is modified while the + * iteration is in progress in any way other than by calling this + * method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + void remove(); + + /** + * Skips all the elements before the the specified element, so that + * {@link #next()} gives the given element or, if it does not exist, the + * element immediately after according to the sorting provided by this + * set. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(int element); + + /** + * Clone the iterator + * + * @return a clone of the IntIterator + */ + public IntIterator clone(); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java new file mode 100755 index 000000000000..3026fdf25907 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java @@ -0,0 +1,299 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.utilities; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +/** + * A {@link Map} backed by an array, where keys are the indices of the array, + * and values are the elements of the array. + *

+ * Modifications to the map (i.e., through {@link #put(Integer, Object)} and + * {@link java.util.Map.Entry#setValue(Object)}) are reflected to the original array. + * However, the map has a fixed length, that is the length of the array. + * + * @param the type of elements represented by columns + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class ArrayMap extends AbstractMap implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = -578029467093308343L; + + /** + * array backed by this map + */ + private final T[] array; + /** + * first index of the map + */ + private final int indexShift; + /** + * {@link Set} instance to iterate over #array + */ + private transient Set> entrySet; + + /** + * Initializes the map + * + * @param array array to manipulate + * @param indexShift first index of the map + */ + ArrayMap(T[] array, int indexShift) + { + this.array = array; + this.indexShift = indexShift; + entrySet = null; + } + + /** + * Initializes the map + * + * @param array array to manipulate + */ + ArrayMap(T[] array) + { + this(array, 0); + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + ArrayMap am = new ArrayMap(new String[]{"Three", "Four", "Five"}, 3); + System.out.println(am); + am.put(5, "FIVE"); + System.out.println(am); + System.out.println(am.get(5)); + System.out.println(am.containsKey(2)); + System.out.println(am.containsKey(3)); + System.out.println(am.containsValue("THREE")); + System.out.println(am.keySet()); + System.out.println(am.values()); + } + + /** + * {@inheritDoc} + */ + @Override + public Set> entrySet() + { + if (entrySet == null) { + // create an entry for each element + final List entries = new ArrayList(array.length); + for (int i = 0; i < array.length; i++) { + entries.add(new SimpleEntry(i)); + } + + // create the Set instance + entrySet = new AbstractSet>() + { + @Override + public Iterator> iterator() + { + return new Iterator>() + { + int curr = 0; + + @Override + public boolean hasNext() + { + return curr < entries.size(); + } + + @Override + public Entry next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return entries.get(curr++); + } + + @Override + public void remove() + { + throw new IllegalArgumentException(); + } + }; + } + + @Override + public int size() + { + return entries.size(); + } + }; + } + return entrySet; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return array.length; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsKey(Object key) + { + int index = (Integer) key - indexShift; + return (index >= 0) && (index < array.length); + } + + /** + * {@inheritDoc} + */ + @Override + public T get(Object key) + { + return array[(Integer) key - indexShift]; + } + + /** + * {@inheritDoc} + */ + @Override + public T put(Integer key, T value) + { + int actualIndex = key - indexShift; + T old = array[actualIndex]; + array[actualIndex] = value; + return old; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Arrays.hashCode(array); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (!(obj instanceof ArrayMap)) { + return false; + } + return Arrays.equals(array, ((ArrayMap) obj).array); + } + + /** + * Reconstruct the instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + entrySet = null; + } + + /** + * Entry of the map + */ + private class SimpleEntry implements Entry + { + /** + * index of {@link ArrayMap#array} + */ + final int actualIndex; + + /** + * Creates an entry + * + * @param index index of {@link ArrayMap#array} + */ + private SimpleEntry(int index) + { + this.actualIndex = index; + } + + /** + * {@inheritDoc} + */ + @Override + public Integer getKey() + { + return actualIndex + indexShift; + } + + /** + * {@inheritDoc} + */ + @Override + public T getValue() + { + return array[actualIndex]; + } + + /** + * {@inheritDoc} + */ + @Override + public T setValue(T value) + { + T old = array[actualIndex]; + array[actualIndex] = value; + return old; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return (actualIndex + indexShift) + "=" + array[actualIndex]; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java b/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java new file mode 100755 index 000000000000..306a2e1e510c --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java @@ -0,0 +1,350 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.utilities; + +import java.util.Random; + +/** + * Population count (a.k.a. Hamming distance) of a bitmap represented by an + * array of int. + *

+ * Derived from http + * ://dalkescientific.com/writings/diary/popcnt.c + * + * @author Alessandro Colantonio + * @version $Id: BitCount.java 157 2011-11-14 14:25:15Z cocciasik $ + */ +public class BitCount +{ + /** + * Population count + *

+ * It counts a single word + * + * @param word word to count + * + * @return population count + */ + public static int count(int word) + { + word -= ((word >>> 1) & 0x55555555); + word = (word & 0x33333333) + ((word >>> 2) & 0x33333333); + word = (word + (word >>> 4)) & 0x0F0F0F0F; + return (word * 0x01010101) >>> 24; + } + + /** + * Population count + * + * @param buffer array of int + * + * @return population count + */ + public static int count(int[] buffer) + { + return count(buffer, buffer.length); + } + + /** + * Population count + *

+ * It counts 24 words at a time, then 3 at a time, then 1 at a time + * + * @param buffer array of int + * @param n number of elements of buffer to count + * + * @return population count + */ + public static int count(int[] buffer, int n) + { + final int n1 = n - n % 24; + final int n2 = n - n % 3; + + int cnt = 0; + int i; + for (i = 0; i < n1; i += 24) { + cnt += merging3(buffer, i); + } + for (; i < n2; i += 3) { + cnt += merging2(buffer, i); + } + cnt += popcount_fbsd2(buffer, i, n); + return cnt; + } + + // used by count() + private static int merging3(int[] buffer, int x) + { + int cnt1; + int cnt2; + int cnt = 0; + for (int i = x; i < x + 24; i += 3) { + cnt1 = buffer[i]; + cnt2 = buffer[i + 1]; + final int w = buffer[i + 2]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + } + cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); + cnt += cnt >>> 16; + return cnt & 0x00000FFFF; + } + + // used by count() + private static int merging2(int[] buffer, int x) + { + int cnt1 = buffer[x]; + int cnt2 = buffer[x + 1]; + final int w = buffer[x + 2]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt1 += cnt2; + cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + cnt1 += cnt1 >>> 8; + cnt1 += cnt1 >>> 16; + return cnt1 & 0x000000FF; + } + + // used by count() + private static int popcount_fbsd2(int[] data, int x, int n) + { + int cnt = 0; + for (; x < n; x++) { + cnt += count(data[x]); + } + return cnt; + } + + /** + * Population count, skipping words at even positions + * + * @param buffer array of int + * + * @return population count + */ + public static int count_2(int[] buffer) + { + return count_2(buffer, buffer.length); + } + + /** + * Population count, skipping words at even positions + *

+ * It counts 24 words at a time, then 3 at a time, then 1 at a time + * + * @param buffer array of int + * @param n number of elements of buffer to count + * + * @return population count + */ + public static int count_2(int[] buffer, int n) + { + final int n1 = n - n % 48; + final int n2 = n - n % 6; + + int cnt = 0; + int i; + for (i = 0; i < n1; i += 48) { + cnt += merging3_2(buffer, i); + } + for (; i < n2; i += 6) { + cnt += merging2_2(buffer, i); + } + cnt += popcount_fbsd2_2(buffer, i, n); + return cnt; + } + + // used by count_2() + private static int merging3_2(int[] buffer, int x) + { + int cnt1; + int cnt2; + int cnt = 0; + for (int i = x; i < x + 48; i += 6) { + cnt1 = buffer[i + 1]; + cnt2 = buffer[i + 3]; + final int w = buffer[i + 5]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + } + cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); + cnt += cnt >>> 16; + return cnt & 0x00000FFFF; + } + + // used by count_2() + private static int merging2_2(int[] buffer, int x) + { + int cnt1 = buffer[x + 1]; + int cnt2 = buffer[x + 3]; + final int w = buffer[x + 5]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt1 += cnt2; + cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + cnt1 += cnt1 >>> 8; + cnt1 += cnt1 >>> 16; + return cnt1 & 0x000000FF; + } + + // used by count_2() + private static int popcount_fbsd2_2(int[] data, int x, int n) + { + int cnt = 0; + for (x++; x < n; x += 2) { + cnt += count(data[x]); + } + return cnt; + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + final int trials = 10000; + final int maxLength = 10000; + + Random rnd = new Random(); + final int seed = rnd.nextInt(); + + System.out.print("Test correctness... "); + rnd = new Random(seed); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + int size1 = 0; + for (int j = 0; j < x.length; j++) { + size1 += count(x[j]); + } + int size2 = count(x); + + if (size1 != size2) { + System.out.println("i = " + i); + System.out.println("ERRORE!"); + System.out.println(size1 + ", " + size2); + for (int j = 0; j < x.length; j++) { + System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); + } + return; + } + } + System.out.println("done!"); + + System.out.print("Test correctness II... "); + rnd = new Random(seed); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + int size1 = 0; + for (int j = 1; j < x.length; j += 2) { + size1 += count(x[j]); + } + int size2 = count_2(x); + + if (size1 != size2) { + System.out.println("i = " + i); + System.out.println("ERRORE!"); + System.out.println(size1 + ", " + size2); + for (int j = 1; j < x.length; j += 2) { + System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); + } + return; + } + } + System.out.println("done!"); + + System.out.print("Test time count(): "); + rnd = new Random(seed); + long t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + @SuppressWarnings("unused") + int size = 0; + for (int j = 0; j < x.length; j++) { + size += count(x[j]); + } + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test time BitCount.count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + count(x); + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test II time count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + @SuppressWarnings("unused") + int size = 0; + for (int j = 1; j < x.length; j += 2) { + size += count(x[j]); + } + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test II time BitCount.count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + count_2(x); + } + System.out.println(System.currentTimeMillis() - t); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java new file mode 100755 index 000000000000..ec38ce38b99b --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java @@ -0,0 +1,317 @@ +package io.druid.extendedset.utilities; + +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.wrappers.IntegerSet; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; + +/** + * This class implements a {@link Map} from a key of type K to a + * collection contains instances of I. + * + * @param key type + * @param item type + * @param {@link Collection} subclass used to collect items + * + * @author Alessandro Colantonio + * @version $Id: CollectionMap.java 152 2011-03-30 11:18:18Z cocciasik $ + */ +public class CollectionMap> extends LinkedHashMap +{ + private static final long serialVersionUID = -2613391212228461025L; + + /** + * empty collection + */ + private final C emptySet; + + /** + * Initializes the map by providing an instance of the empty collection + * + * @param emptySet the empty collection + */ + public CollectionMap(C emptySet) + { + this.emptySet = emptySet; + } + + /** + * Generates a new {@link CollectionMap} instance. It is an alternative to + * the constructor {@link #CollectionMap(Collection)} that reduces the code + * to write. + * + * @param key type + * @param item type + * @param {@link Collection} subclass used to collect items + * @param empty subset type + * @param emptySet the empty collection + * + * @return the new instance of {@link CollectionMap} + */ + public static , EX extends CX> + CollectionMap newCollectionMap(EX emptySet) + { + return new CollectionMap(emptySet); + } + + /** + * Test procedure + *

+ * Expected output: + *

+   * {}
+   * {A=[1]}
+   * {A=[1, 2]}
+   * {A=[1, 2], B=[3]}
+   * {A=[1, 2], B=[3, 4, 5, 6]}
+   * true
+   * true
+   * false
+   * {A=[1], B=[3, 4, 5, 6]}
+   * {A=[1], B=[3, 4, 5, 6]}
+   * {A=[1], B=[6]}
+   * 
+ * + * @param args + */ + public static void main(String[] args) + { + CollectionMap map = newCollectionMap(new IntegerSet(new ConciseSet())); + System.out.println(map); + + map.putItem("A", 1); + System.out.println(map); + + map.putItem("A", 2); + System.out.println(map); + + map.putItem("B", 3); + System.out.println(map); + + map.putAllItems("B", Arrays.asList(4, 5, 6)); + System.out.println(map); + + System.out.println(map.containsItem(1)); + System.out.println(map.containsItem(6)); + System.out.println(map.containsItem(7)); + + map.removeItem("A", 2); + System.out.println(map); + + map.removeItem("A", 3); + System.out.println(map); + + map.removeAllItems("B", Arrays.asList(1, 2, 3, 4, 5)); + System.out.println(map); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public CollectionMap clone() + { + // result + CollectionMap cloned = new CollectionMap(emptySet); + + // clone all the entries + cloned.putAll(this); + + // clone all the values + if (emptySet instanceof Cloneable) { + for (Entry e : cloned.entrySet()) { + try { + e.setValue((C) e.getValue().getClass().getMethod("clone").invoke(e.getValue())); + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + } else { + for (Entry e : cloned.entrySet()) { + C copy = cloneEmptySet(); + copy.addAll(e.getValue()); + e.setValue(copy); + } + } + return cloned; + } + + /** + * Generates an empty {@link CollectionMap} instance with the same + * collection type for values + * + * @return the empty {@link CollectionMap} instance + */ + public CollectionMap empty() + { + return new CollectionMap(emptySet); + } + + /** + * Populates the current instance with the data from another map. In + * particular, it creates the list of keys associated to each value. + * + * @param map the input map + */ + public void mapValueToKeys(Map map) + { + for (Entry e : map.entrySet()) { + putItem(e.getValue(), e.getKey()); + } + } + + /** + * Generates a clone of the empty set + * + * @return a clone of the empty set + */ + @SuppressWarnings("unchecked") + private C cloneEmptySet() + { + try { + if (emptySet instanceof Cloneable) { + return (C) emptySet.getClass().getMethod("clone").invoke(emptySet); + } + return (C) emptySet.getClass().newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Checks if there are some collections that contain the given item + * + * @param item item to check + * + * @return true if the item exists within some collections + */ + public boolean containsItem(I item) + { + for (Entry e : entrySet()) { + if (e.getValue().contains(item)) { + return true; + } + } + return false; + } + + /** + * Adds an item to the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param item item to add + * + * @return the updated collection of items for the given key + */ + public C putItem(K key, I item) + { + C items = get(key); + if (items == null) { + put(key, items = cloneEmptySet()); + } + items.add(item); + return items; + } + + /** + * Adds a collection of items to the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param c items to add + * + * @return the updated collection of items for the given key + */ + public C putAllItems(K key, Collection c) + { + C items = get(key); + if (c == null) { + put(key, items = cloneEmptySet()); + } + items.addAll(c); + return items; + } + + /** + * Removes the item from the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param item item to remove + * + * @return the updated collection of items for the given key + */ + public C removeItem(K key, I item) + { + C items = get(key); + if (items == null) { + return null; + } + items.remove(item); + if (items.isEmpty()) { + remove(key); + } + return items; + } + + /** + * Removes a collection of items from the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param c items to remove + * + * @return the updated collection of items for the given key + */ + public C removeAllItems(K key, Collection c) + { + C items = get(key); + if (items == null) { + return null; + } + items.removeAll(c); + if (items.isEmpty()) { + remove(key); + } + return items; + } + + /** + * Makes all collections read-only + */ + @SuppressWarnings("unchecked") + public void makeAllCollectionsUnmodifiable() + { + if (emptySet instanceof ExtendedSet) { + for (Entry e : entrySet()) { + e.setValue((C) ((ExtendedSet) e.getValue()).unmodifiable()); + } + } else if (emptySet instanceof List) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableList((List) e.getValue()))); + } + } else if (emptySet instanceof Set) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableSet((Set) e.getValue()))); + } + } else if (emptySet instanceof SortedSet) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableSortedSet((SortedSet) e.getValue()))); + } + } else { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableCollection(e.getValue()))); + } + } + + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java new file mode 100755 index 000000000000..1aaa06bdbe1c --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java @@ -0,0 +1,103 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.utilities; + +/** + * Hash functions for integers and integer arrays. + * + * @author Alessandro Colantonio + * @version $Id: IntHashCode.java 127 2010-12-21 20:22:12Z cocciasik $ + */ +public class IntHashCode +{ + /** + * Computes a hashcode for an integer + *

+ * Inspired by Thomas Wang's function, described at http://www.concentric.net/~ttwang/tech/inthash.htm + * + * @param key the given integer + * + * @return the hashcode + */ + public static int hashCode(int key) + { + key = ~key + (key << 15); + key ^= key >>> 12; + key += key << 2; + key ^= key >>> 4; + key *= 2057; + key ^= key >>> 16; + return key; + } + + /** + * Computes the hashcode of an array of integers + * + * @param keys the given integer array + * + * @return the hashcode + */ + public static int hashCode(int[] keys) + { + return hashCode(keys, keys.length, 0); + } + + /** + * Computes the hashcode of an array of integers + *

+ * It is based on MurmurHash3 Algorithm, described at http://sites.google.com/site/murmurhash + * + * @param keys the given integer array + * @param len number of elements to include, that is + * len <= keys.length + * @param seed initial seed + * + * @return the hashcode + */ + public static int hashCode(int[] keys, int len, int seed) + { + int h = 0x971e137b ^ seed; + int c1 = 0x95543787; + int c2 = 0x2ad7eb25; + + for (int i = 0; i < len; i++) { + int k = keys[i]; + k *= c1; + k = (k << 11) | (k >>> 21); // rotl k, 11 + k *= c2; + h ^= k; + + h = (h << 2) - h + 0x52dce729; + c1 = (c1 << 2) + c1 + 0x7b7d159c; + c2 = (c2 << 2) + c2 + 0x6bce6396; + } + + h ^= len; + h ^= h >>> 16; + h *= 0x85ebca6b; + h ^= h >>> 13; + h *= 0xc2b2ae35; + h ^= h >>> 16; + return h; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java new file mode 100755 index 000000000000..51474c9fccbf --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java @@ -0,0 +1,115 @@ +package io.druid.extendedset.utilities; + +import java.nio.IntBuffer; +import java.util.ArrayList; + +/** + */ +public class IntList +{ + private final ArrayList baseLists = new ArrayList(); + + private final int allocateSize; + + private int maxIndex; + + public IntList() + { + this(1000); + } + + public IntList(final int allocateSize) + { + this.allocateSize = allocateSize; + + maxIndex = -1; + } + + public int length() + { + return maxIndex + 1; + } + + public boolean isEmpty() + { + return (length() == 0); + } + + public void add(int value) + { + set(length(), value); + } + + public void set(int index, int value) + { + int subListIndex = index / allocateSize; + + if (subListIndex >= baseLists.size()) { + for (int i = baseLists.size(); i <= subListIndex; ++i) { + baseLists.add(null); + } + } + + int[] baseList = baseLists.get(subListIndex); + + if (baseList == null) { + baseList = new int[allocateSize]; + baseLists.set(subListIndex, baseList); + } + + baseList[index % allocateSize] = value; + + if (index > maxIndex) { + maxIndex = index; + } + } + + public int get(int index) + { + if (index > maxIndex) { + throw new ArrayIndexOutOfBoundsException(index); + } + + int subListIndex = index / allocateSize; + int[] baseList = baseLists.get(subListIndex); + + if (baseList == null) { + return 0; + } + + return baseList[index % allocateSize]; + } + + public int baseListCount() + { + return baseLists.size(); + } + + public IntBuffer getBaseList(int index) + { + final int[] array = baseLists.get(index); + if (array == null) { + return null; + } + + final IntBuffer retVal = IntBuffer.wrap(array); + + if (index + 1 == baseListCount()) { + retVal.limit(maxIndex - (index * allocateSize)); + } + + return retVal.asReadOnlyBuffer(); + } + + public int[] toArray() + { + int[] retVal = new int[length()]; + int currIndex = 0; + for (int[] arr : baseLists) { + int min = Math.min(length() - currIndex, arr.length); + System.arraycopy(arr, 0, retVal, currIndex, min); + currIndex += min; + } + return retVal; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java new file mode 100755 index 000000000000..5d40f8299b52 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java @@ -0,0 +1,689 @@ +package io.druid.extendedset.utilities; + +import io.druid.extendedset.intset.IntSet; + +import java.util.Collection; +import java.util.Formatter; +import java.util.List; + +/** + * A wrapper class for classes that implement the {@link IntSet} interface to count method calls + * + * @author Alessandro Colantonio + * @version $Id: IntSetStatistics.java 153 2011-05-30 16:39:57Z cocciasik $ + */ +public class IntSetStatistics implements IntSet +{ + /** + * @uml.property name="unionCount" + */ + private static long unionCount = 0; + + + /* + * Monitored characteristics + */ + /** + * @uml.property name="intersectionCount" + */ + private static long intersectionCount = 0; + /** + * @uml.property name="differenceCount" + */ + private static long differenceCount = 0; + /** + * @uml.property name="symmetricDifferenceCount" + */ + private static long symmetricDifferenceCount = 0; + /** + * @uml.property name="complementCount" + */ + private static long complementCount = 0; + /** + * @uml.property name="unionSizeCount" + */ + private static long unionSizeCount = 0; + /** + * @uml.property name="intersectionSizeCount" + */ + private static long intersectionSizeCount = 0; + /** + * @uml.property name="differenceSizeCount" + */ + private static long differenceSizeCount = 0; + /** + * @uml.property name="symmetricDifferenceSizeCount" + */ + private static long symmetricDifferenceSizeCount = 0; + /** + * @uml.property name="complementSizeCount" + */ + private static long complementSizeCount = 0; + /** + * @uml.property name="equalsCount" + */ + private static long equalsCount = 0; + /** + * @uml.property name="hashCodeCount" + */ + private static long hashCodeCount = 0; + /** + * @uml.property name="containsAllCount" + */ + private static long containsAllCount = 0; + /** + * @uml.property name="containsAnyCount" + */ + private static long containsAnyCount = 0; + /** + * @uml.property name="containsAtLeastCount" + */ + private static long containsAtLeastCount = 0; + /** + * instance to monitor + * + * @uml.property name="container" + * @uml.associationEnd + */ + private final IntSet container; + + + /* + * Statistics getters + */ + + /** + * Wraps an {@link IntSet} instance with an {@link IntSetStatistics} + * instance + * + * @param container {@link IntSet} to wrap + */ + public IntSetStatistics(IntSet container) + { + this.container = extractContainer(container); + } + + /** + * @return number of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) + * + * @uml.property name="unionCount" + */ + public static long getUnionCount() {return unionCount;} + + /** + * @return number of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) + * + * @uml.property name="intersectionCount" + */ + public static long getIntersectionCount() {return intersectionCount;} + + /** + * @return number of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) + * + * @uml.property name="differenceCount" + */ + public static long getDifferenceCount() {return differenceCount;} + + /** + * @return number of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) + * + * @uml.property name="symmetricDifferenceCount" + */ + public static long getSymmetricDifferenceCount() {return symmetricDifferenceCount;} + + /** + * @return number of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) + * + * @uml.property name="complementCount" + */ + public static long getComplementCount() {return complementCount;} + + /** + * @return cardinality of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) + * + * @uml.property name="unionSizeCount" + */ + public static long getUnionSizeCount() {return unionSizeCount;} + + /** + * @return cardinality of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) + * + * @uml.property name="intersectionSizeCount" + */ + public static long getIntersectionSizeCount() {return intersectionSizeCount;} + + /** + * @return cardinality of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) + * + * @uml.property name="differenceSizeCount" + */ + public static long getDifferenceSizeCount() {return differenceSizeCount;} + + /** + * @return cardinality of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) + * + * @uml.property name="symmetricDifferenceSizeCount" + */ + public static long getSymmetricDifferenceSizeCount() {return symmetricDifferenceSizeCount;} + + /** + * @return cardinality of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) + * + * @uml.property name="complementSizeCount" + */ + public static long getComplementSizeCount() {return complementSizeCount;} + + /** + * @return number of equality check operations (i.e., {@link #equals(Object)} ) + * + * @uml.property name="equalsCount" + */ + public static long getEqualsCount() {return equalsCount;} + + /** + * @return number of hash code computations (i.e., {@link #hashCode()} ) + * + * @uml.property name="hashCodeCount" + */ + public static long getHashCodeCount() {return hashCodeCount;} + + /** + * @return number of {@link #containsAll(IntSet)} calls + * + * @uml.property name="containsAllCount" + */ + public static long getContainsAllCount() {return containsAllCount;} + + /** + * @return number of {@link #containsAny(IntSet)} calls + * + * @uml.property name="containsAnyCount" + */ + public static long getContainsAnyCount() {return containsAnyCount;} + + /** + * @return number of {@link #containsAtLeast(IntSet, int)} calls + * + * @uml.property name="containsAtLeastCount" + */ + public static long getContainsAtLeastCount() {return containsAtLeastCount;} + + + /* + * Other statistical methods + */ + + /** + * @return the sum of the cardinality of set operations + */ + public static long getSizeCheckCount() + { + return getIntersectionSizeCount() + + + getUnionSizeCount() + + getDifferenceSizeCount() + + getSymmetricDifferenceSizeCount() + + getComplementSizeCount(); + } + + /** + * Resets all counters + */ + public static void resetCounters() + { + unionCount = intersectionCount = differenceCount = symmetricDifferenceCount = complementCount = + unionSizeCount = intersectionSizeCount = differenceSizeCount = symmetricDifferenceSizeCount = complementSizeCount = + equalsCount = hashCodeCount = containsAllCount = containsAnyCount = containsAtLeastCount = 0; + } + + /** + * @return the summary information string + */ + public static String summary() + { + final StringBuilder s = new StringBuilder(); + final Formatter f = new Formatter(s); + + f.format("unionCount: %d\n", Long.valueOf(unionCount)); + f.format("intersectionCount: %d\n", Long.valueOf(intersectionCount)); + f.format("differenceCount: %d\n", Long.valueOf(differenceCount)); + f.format("symmetricDifferenceCount: %d\n", Long.valueOf(symmetricDifferenceCount)); + f.format("complementCount: %d\n", Long.valueOf(complementCount)); + f.format("unionSizeCount: %d\n", Long.valueOf(unionSizeCount)); + f.format("intersectionSizeCount: %d\n", Long.valueOf(intersectionSizeCount)); + f.format("differenceSizeCount: %d\n", Long.valueOf(differenceSizeCount)); + f.format("symmetricDifferenceSizeCount: %d\n", Long.valueOf(symmetricDifferenceSizeCount)); + f.format("complementSizeCount: %d\n", Long.valueOf(complementSizeCount)); + f.format("equalsCount: %d\n", Long.valueOf(equalsCount)); + f.format("hashCodeCount: %d\n", Long.valueOf(hashCodeCount)); + f.format("containsAllCount: %d\n", Long.valueOf(containsAllCount)); + f.format("containsAnyCount: %d\n", Long.valueOf(containsAnyCount)); + f.format("containsAtLeastCount: %d\n", Long.valueOf(containsAtLeastCount)); + + return s.toString(); + } + + /** + * Removes the {@link IntSetStatistics} wrapper + * + * @param c + * + * @return the contained {@link IntSet} instance + */ + public static IntSet extractContainer(IntSet c) + { + if (c instanceof IntSetStatistics) { + return extractContainer(((IntSetStatistics) c).container); + } + return c; + } + + /* + * MONITORED METHODS + */ + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + unionCount++; + return container.addAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet union(IntSet other) + { + unionCount++; + return new IntSetStatistics(container.union(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + intersectionCount++; + return container.retainAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet intersection(IntSet other) + { + intersectionCount++; + return new IntSetStatistics(container.intersection(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + differenceCount++; + return container.removeAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet difference(IntSet other) + { + differenceCount++; + return new IntSetStatistics(container.difference(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet symmetricDifference(IntSet other) + { + symmetricDifferenceCount++; + return container.symmetricDifference(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + complementCount++; + container.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet complemented() + { + complementCount++; + return new IntSetStatistics(container.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(IntSet other) + { + unionSizeCount++; + return container.unionSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet other) + { + intersectionSizeCount++; + return container.intersectionSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(IntSet other) + { + differenceSizeCount++; + return container.differenceSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(IntSet other) + { + symmetricDifferenceSizeCount++; + return container.symmetricDifferenceSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + complementSizeCount++; + return container.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + containsAllCount++; + return container.containsAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet other) + { + containsAnyCount++; + return container.containsAny(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + containsAtLeastCount++; + return container.containsAtLeast(extractContainer(other), minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + hashCodeCount++; + return container.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + equalsCount++; + return obj != null + && ((obj instanceof IntSetStatistics) + ? container.equals(extractContainer((IntSetStatistics) obj)) + : container.equals(obj)); + } + + /* + * SIMPLE REDIRECTION + */ + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() {return container.bitmapCompressionRatio();} + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() {return container.collectionCompressionRatio();} + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) {container.clear(from, to);} + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) {container.fill(from, to);} + + /** + * {@inheritDoc} + */ + @Override + public void clear() {container.clear();} + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int i) {return container.add(i);} + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int i) {return container.remove(i);} + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) {container.flip(e);} + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) {return container.get(i);} + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) {return container.indexOf(e);} + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int i) {return container.contains(i);} + + /** + * {@inheritDoc} + */ + @Override + public int first() {return container.first();} + + /** + * {@inheritDoc} + */ + @Override + public int last() {return container.last();} + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() {return container.isEmpty();} + + /** + * {@inheritDoc} + */ + @Override + public int size() {return container.size();} + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() {return container.iterator();} + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() {return container.descendingIterator();} + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray() {return container.toArray();} + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) {return container.toArray(a);} + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) {return container.compareTo(o);} + + /** + * {@inheritDoc} + */ + @Override + public String toString() {return container.toString();} + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() {return container.powerSet();} + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) {return container.powerSet(min, max);} + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() {return container.powerSetSize();} + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) {return container.powerSetSize(min, max);} + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(IntSet other) {return container.jaccardSimilarity(other);} + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(IntSet other) {return container.jaccardDistance(other);} + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(IntSet other) {return container.weightedJaccardSimilarity(other);} + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(IntSet other) {return container.weightedJaccardDistance(other);} + + /* + * OTHERS + */ + + /** + * {@inheritDoc} + */ + @Override + public IntSet empty() {return new IntSetStatistics(container.empty());} + + /** + * {@inheritDoc} + */ + @Override + public IntSet clone() {return new IntSetStatistics(container.clone());} + + /** + * {@inheritDoc} + */ + @Override + public IntSet convert(int... a) {return new IntSetStatistics(container.convert(a));} + + /** + * {@inheritDoc} + */ + @Override + public IntSet convert(Collection c) {return new IntSetStatistics(container.convert(c));} + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() {return "Analyzed IntSet:\n" + container.debugInfo();} +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java new file mode 100755 index 000000000000..2ba2a6161a89 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java @@ -0,0 +1,869 @@ +package io.druid.extendedset.utilities.random; + + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +/** + *

MersenneTwister and MersenneTwisterFast

+ *

Version 13, based on version MT199937(99/10/29) + * of the Mersenne Twister algorithm found at + * + * The Mersenne Twister Home Page, with the initialization + * improved using the new 2002/1/26 initialization algorithm + * By Sean Luke, October 2004. + *

+ *

MersenneTwister is a drop-in subclass replacement + * for java.util.Random. It is properly synchronized and + * can be used in a multithreaded environment. On modern VMs such + * as HotSpot, it is approximately 1/3 slower than java.util.Random. + *

+ *

MersenneTwisterFast is not a subclass of java.util.Random. It has + * the same public methods as Random does, however, and it is + * algorithmically identical to MersenneTwister. MersenneTwisterFast + * has hard-code inlined all of its methods directly, and made all of them + * final (well, the ones of consequence anyway). Further, these + * methods are not synchronized, so the same MersenneTwisterFast + * instance cannot be shared by multiple threads. But all this helps + * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. + * java.util.Random is about 1/3 slower than MersenneTwisterFast. + *

+ *

About the Mersenne Twister

+ *

This is a Java version of the C-program for MT19937: Integer version. + * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, + * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp + * with an appropriate reference to your work". Indicate that this + * is a translation of their algorithm into Java. + *

+ *

Reference. + * Makato Matsumoto and Takuji Nishimura, + * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform + * Pseudo-Random Number Generator", + * ACM Transactions on Modeling and Computer Simulation, + * Vol. 8, No. 1, January 1998, pp 3--30. + *

+ *

About this Version

+ *

+ *

Changes Since V12: clone() method added. + *

+ *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast + * is equal to other MersenneTwisterFasts with identical state; likewise + * MersenneTwister is equal to other MersenneTwister with identical state. + * This isn't equals(...) because that requires a contract of immutability + * to compare by value. + *

+ *

Changes Since V10: A documentation error suggested that + * setSeed(int[]) required an int[] array 624 long. In fact, the array + * can be any non-zero length. The new version also checks for this fact. + *

+ *

Changes Since V9: readState(stream) and writeState(stream) + * provided. + *

+ *

Changes Since V8: setSeed(int) was only using the first 28 bits + * of the seed; it should have been 32 bits. For small-number seeds the + * behavior is identical. + *

+ *

Changes Since V7: A documentation error in MersenneTwisterFast + * (but not MersenneTwister) stated that nextDouble selects uniformly from + * the full-open interval [0,1]. It does not. nextDouble's contract is + * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, + * namely, selection in the half-open interval [0,1). That is, 1.0 should + * not be returned. A similar contract exists in nextFloat. + *

+ *

Changes Since V6: License has changed from LGPL to BSD. + * New timing information to compare against + * java.util.Random. Recent versions of HotSpot have helped Random increase + * in speed to the point where it is faster than MersenneTwister but slower + * than MersenneTwisterFast (which should be the case, as it's a less complex + * algorithm but is synchronized). + *

+ *

Changes Since V5: New empty constructor made to work the same + * as java.util.Random -- namely, it seeds based on the current time in + * milliseconds. + *

+ *

Changes Since V4: New initialization algorithms. See + * (see + * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) + *

+ *

The MersenneTwister code is based on standard MT19937 C/C++ + * code by Takuji Nishimura, + * with suggestions from Topher Cooper and Marc Rieffel, July 1997. + * The code was originally translated into Java by Michael Lecuyer, + * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. + *

+ *

Java notes

+ *

+ *

This implementation implements the bug fixes made + * in Java 1.2's version of Random, which means it can be used with + * earlier versions of Java. See + * + * the JDK 1.2 java.util.Random documentation for further documentation + * on the random-number generation contracts made. Additionally, there's + * an undocumented bug in the JDK java.util.Random.nextBytes() method, + * which this code fixes. + *

+ *

Just like java.util.Random, this + * generator accepts a long seed but doesn't use all of it. java.util.Random + * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). + * So it's best if your seed does not exceed the int range. + *

+ *

MersenneTwister can be used reliably + * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in + * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor + * java.util.Random) should be used with them. + *

+ *

License

+ *

+ * Copyright (c) 2003 by Sean Luke.
+ * Portions copyright (c) 1993 by Michael Lecuyer.
+ * All rights reserved.
+ *

+ *

Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + *

    + *
  • Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + *
  • Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + *
  • Neither the name of the copyright owners, their employers, nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + *
+ *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @version 13 + */ + +@SuppressWarnings("serial") +public class MersenneTwister extends java.util.Random implements Cloneable +{ + // Period parameters + private static final int N = 624; + private static final int M = 397; + private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a + private static final int UPPER_MASK = 0x80000000; // most significant w-r bits + private static final int LOWER_MASK = 0x7fffffff; // least significant r bits + + // Tempering parameters + private static final int TEMPERING_MASK_B = 0x9d2c5680; + private static final int TEMPERING_MASK_C = 0xefc60000; + + private int mt[]; // the array for the state vector + private int mti; // mti==N+1 means mt[N] is not initialized + private int mag01[]; + + // a good initial seed (of int size, though stored in a long) + //private static final long GOOD_SEED = 4357; + + /* implemented here because there's a bug in Random's implementation + of the Gaussian code (divide by zero, and log(0), ugh!), yet its + gaussian variables are private so we can't access them here. :-( */ + + private double __nextNextGaussian; + private boolean __haveNextNextGaussian; + + /* We're overriding all internal data, to my knowledge, so this should be okay */ + + /** + * Constructor using the default seed. + */ + public MersenneTwister() + { + this(System.currentTimeMillis()); + } + + /** + * Constructor using a given seed. Though you pass this seed in + * as a long, it's best to make sure it's actually an integer. + * + * @param seed + */ + public MersenneTwister(final long seed) + { + super(seed); /* just in case */ + setSeed(seed); + } + + /** + * Constructor using an array of integers as seed. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + public MersenneTwister(final int[] array) + { + super(System.currentTimeMillis()); /* pick something at random just in case */ + setSeed(array); + } + + /** + * Tests the code. + * + * @param args + */ + public static void main(String args[]) + { + int j; + + MersenneTwister r; + + // CORRECTNESS TEST + // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out + + r = new MersenneTwister(new int[]{0x123, 0x234, 0x345, 0x456}); + System.out.println("Output of MersenneTwister with new (2002/1/26) seeding mechanism"); + for (j = 0; j < 1000; j++) { + // first, convert the int from signed to "unsigned" + long l = r.nextInt(); + if (l < 0) { + l += 4294967296L; // max int value + } + String s = String.valueOf(l); + while (s.length() < 10) { + s = " " + s; // buffer + } + System.out.print(s + " "); + if (j % 5 == 4) { + System.out.println(); + } + } + + // SPEED TEST + + final long SEED = 4357; + + int xx; + long ms; + System.out.println("\nTime to test grabbing 100000000 ints"); + + r = new MersenneTwister(SEED); + ms = System.currentTimeMillis(); + xx = 0; + for (j = 0; j < 100000000; j++) { + xx += r.nextInt(); + } + System.out.println("Mersenne Twister: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + System.out.println("To compare this with java.util.Random, run this same test on MersenneTwisterFast."); + System.out.println("The comparison with Random is removed from MersenneTwister because it is a proper"); + System.out.println("subclass of Random and this unfairly makes some of Random's methods un-inlinable,"); + System.out.println("so it would make Random look worse than it is."); + + // TEST TO COMPARE TYPE CONVERSION BETWEEN + // MersenneTwisterFast.java AND MersenneTwister.java + + + System.out.println("\nGrab the first 1000 booleans"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean(j / 999.0) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean(j / 999.0f) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + byte[] bytes = new byte[1000]; + System.out.println("\nGrab the first 1000 bytes using nextBytes"); + r = new MersenneTwister(SEED); + r.nextBytes(bytes); + for (j = 0; j < 1000; j++) { + System.out.print(bytes[j] + " "); + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + byte b; + System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print((b = r.nextByte()) + " "); + if (b != bytes[j]) { + System.out.print("BAD "); + } + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 shorts"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextShort() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints of different sizes"); + r = new MersenneTwister(SEED); + int max = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt(max) + " "); + max *= 2; + if (max <= 0) { + max = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs of different sizes"); + r = new MersenneTwister(SEED); + long max2 = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong(max2) + " "); + max2 *= 2; + if (max2 <= 0) { + max2 = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 floats"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextFloat() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 doubles"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextDouble() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 gaussian doubles"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextGaussian() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + } + + /** + * {@inheritDoc} + */ + @Override + public Object clone() throws CloneNotSupportedException + { + MersenneTwister f = (MersenneTwister) (super.clone()); + f.mt = mt.clone(); + f.mag01 = mag01.clone(); + return f; + } + + /** + * @param o + * + * @return ? + */ + public boolean stateEquals(Object o) + { + if (o == this) { + return true; + } + if (o == null || !(o instanceof MersenneTwister)) { + return false; + } + MersenneTwister other = (MersenneTwister) o; + if (mti != other.mti) { + return false; + } + for (int x = 0; x < mag01.length; x++) { + if (mag01[x] != other.mag01[x]) { + return false; + } + } + for (int x = 0; x < mt.length; x++) { + if (mt[x] != other.mt[x]) { + return false; + } + } + return true; + } + + /** + * Reads the entire state of the MersenneTwister RNG from the stream + * + * @param stream + * + * @throws IOException + */ + public void readState(DataInputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + mt[x] = stream.readInt(); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + mag01[x] = stream.readInt(); + } + + mti = stream.readInt(); + __nextNextGaussian = stream.readDouble(); + __haveNextNextGaussian = stream.readBoolean(); + } + + /** + * Writes the entire state of the MersenneTwister RNG to the stream + * + * @param stream + * + * @throws IOException + */ + public void writeState(DataOutputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mt[x]); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mag01[x]); + } + + stream.writeInt(mti); + stream.writeDouble(__nextNextGaussian); + stream.writeBoolean(__haveNextNextGaussian); + } + + /** + * Initialize the pseudo random number generator. Don't + * pass in a long that's bigger than an int (Mersenne Twister + * only uses the first 32 bits for its seed). + */ + @Override + synchronized public void setSeed(final long seed) + { + // it's always good style to call super + super.setSeed(seed); + + // Due to a bug in java.util.Random clear up to 1.2, we're + // doing our own Gaussian variable. + __haveNextNextGaussian = false; + + mt = new int[N]; + + mag01 = new int[2]; + mag01[0] = 0x0; + mag01[1] = MATRIX_A; + + mt[0] = (int) (seed & 0xffffffff); + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffff; + /* for >32 bit machines */ + } + } + + /** + * Sets the seed of the MersenneTwister using an array of integers. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + synchronized public void setSeed(final int[] array) + { + if (array.length == 0) { + throw new IllegalArgumentException("Array length must be greater than zero"); + } + int i, j, k; + setSeed(19650218); + i = 1; + j = 0; + k = (N > array.length ? N : array.length); + for (; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + if (j >= array.length) { + j = 0; + } + } + for (k = N - 1; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + } + mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ + } + + /* If you've got a truly old version of Java, you can omit these + two next methods. */ + + /** + * Returns an integer with bits bits filled with a random number. + */ + @Override + synchronized protected int next(final int bits) + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return y >>> (32 - bits); // hope that's right! + } + + private synchronized void writeObject(final ObjectOutputStream out) + throws IOException + { + // just so we're synchronized. + out.defaultWriteObject(); + } + + private synchronized void readObject(final ObjectInputStream in) + throws IOException, ClassNotFoundException + { + // just so we're synchronized. + in.defaultReadObject(); + } + + /** + * This method is missing from jdk 1.0.x and below. JDK 1.1 + * includes this for us, but what the heck. + */ + @Override + public boolean nextBoolean() {return next(1) != 0;} + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. Not as precise a random real + * event as nextBoolean(double), but twice as fast. To explicitly + * use this, remember you may need to cast to float first. + * + * @param probability + * + * @return ? + */ + public boolean nextBoolean(final float probability) + { + if (probability < 0.0f || probability > 1.0f) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0f) { + return false; // fix half-open issues + } else if (probability == 1.0f) { + return true; // fix half-open issues + } + return nextFloat() < probability; + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. + * + * @param probability + * + * @return ? + */ + public boolean nextBoolean(final double probability) + { + if (probability < 0.0 || probability > 1.0) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0) { + return false; // fix half-open issues + } else if (probability == 1.0) { + return true; // fix half-open issues + } + return nextDouble() < probability; + } + + /** + * This method is missing from JDK 1.1 and below. JDK 1.2 + * includes this for us, but what the heck. + */ + @Override + public int nextInt(final int n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + if ((n & -n) == n) { + return (int) ((n * (long) next(31)) >> 31); + } + + int bits, val; + do { + bits = next(31); + val = bits % n; + } + while (bits - val + (n - 1) < 0); + return val; + } + + /** + * This method is for completness' sake. + * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public long nextLong(final long n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + long bits, val; + do { + bits = (nextLong() >>> 1); + val = bits % n; + } + while (bits - val + (n - 1) < 0); + return val; + } + + /** + * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes + * this for us, but what the heck. + * + * @return ? + */ + @Override + public double nextDouble() + { + return (((long) next(26) << 27) + next(27)) + / (double) (1L << 53); + } + + /** + * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes + * this for us, but what the heck. + */ + + @Override + public float nextFloat() + { + return next(24) / ((float) (1 << 24)); + } + + /** + * A bug fix for all versions of the JDK. The JDK appears to + * use all four bytes in an integer as independent byte values! + * Totally wrong. I've submitted a bug report. + */ + + @Override + public void nextBytes(final byte[] bytes) + { + for (int x = 0; x < bytes.length; x++) { + bytes[x] = (byte) next(8); + } + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public char nextChar() + { + // chars are 16-bit UniCode values + return (char) (next(16)); + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public short nextShort() + { + return (short) (next(16)); + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public byte nextByte() + { + return (byte) (next(8)); + } +// } + + /** + * A bug fix for all JDK code including 1.2. nextGaussian can theoretically + * ask for the log of 0 and divide it by 0! See Java bug + * + * http://developer.java.sun.com/developer/bugParade/bugs/4254501.html + * + * @return ? + */ + @Override + synchronized public double nextGaussian() + { + if (__haveNextNextGaussian) { + __haveNextNextGaussian = false; + return __nextNextGaussian; + } +// else +// { + double v1, v2, s; + do { + v1 = 2 * nextDouble() - 1; // between -1.0 and 1.0 + v2 = 2 * nextDouble() - 1; // between -1.0 and 1.0 + s = v1 * v1 + v2 * v2; + } while (s >= 1 || s == 0); + double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); + __nextNextGaussian = v2 * multiplier; + __haveNextNextGaussian = true; + return v1 * multiplier; + } + +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java new file mode 100755 index 000000000000..0789c2008f75 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java @@ -0,0 +1,1470 @@ +package io.druid.extendedset.utilities.random; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.Serializable; +import java.util.Random; + +/** + *

MersenneTwister and MersenneTwisterFast

+ *

Version 13, based on version MT199937(99/10/29) + * of the Mersenne Twister algorithm found at + * + * The Mersenne Twister Home Page, with the initialization + * improved using the new 2002/1/26 initialization algorithm + * By Sean Luke, October 2004. + *

+ *

MersenneTwister is a drop-in subclass replacement + * for java.util.Random. It is properly synchronized and + * can be used in a multithreaded environment. On modern VMs such + * as HotSpot, it is approximately 1/3 slower than java.util.Random. + *

+ *

MersenneTwisterFast is not a subclass of java.util.Random. It has + * the same public methods as Random does, however, and it is + * algorithmically identical to MersenneTwister. MersenneTwisterFast + * has hard-code inlined all of its methods directly, and made all of them + * final (well, the ones of consequence anyway). Further, these + * methods are not synchronized, so the same MersenneTwisterFast + * instance cannot be shared by multiple threads. But all this helps + * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. + * java.util.Random is about 1/3 slower than MersenneTwisterFast. + *

+ *

About the Mersenne Twister

+ *

This is a Java version of the C-program for MT19937: Integer version. + * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, + * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp + * with an appropriate reference to your work". Indicate that this + * is a translation of their algorithm into Java. + *

+ *

Reference. + * Makato Matsumoto and Takuji Nishimura, + * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform + * Pseudo-Random Number Generator", + * ACM Transactions on Modeling and. Computer Simulation, + * Vol. 8, No. 1, January 1998, pp 3--30. + *

+ *

About this Version

+ *

+ *

Changes Since V12: clone() method added. + *

+ *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast + * is equal to other MersenneTwisterFasts with identical state; likewise + * MersenneTwister is equal to other MersenneTwister with identical state. + * This isn't equals(...) because that requires a contract of immutability + * to compare by value. + *

+ *

Changes Since V10: A documentation error suggested that + * setSeed(int[]) required an int[] array 624 long. In fact, the array + * can be any non-zero length. The new version also checks for this fact. + *

+ *

Changes Since V9: readState(stream) and writeState(stream) + * provided. + *

+ *

Changes Since V8: setSeed(int) was only using the first 28 bits + * of the seed; it should have been 32 bits. For small-number seeds the + * behavior is identical. + *

+ *

Changes Since V7: A documentation error in MersenneTwisterFast + * (but not MersenneTwister) stated that nextDouble selects uniformly from + * the full-open interval [0,1]. It does not. nextDouble's contract is + * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, + * namely, selection in the half-open interval [0,1). That is, 1.0 should + * not be returned. A similar contract exists in nextFloat. + *

+ *

Changes Since V6: License has changed from LGPL to BSD. + * New timing information to compare against + * java.util.Random. Recent versions of HotSpot have helped Random increase + * in speed to the point where it is faster than MersenneTwister but slower + * than MersenneTwisterFast (which should be the case, as it's a less complex + * algorithm but is synchronized). + *

+ *

Changes Since V5: New empty constructor made to work the same + * as java.util.Random -- namely, it seeds based on the current time in + * milliseconds. + *

+ *

Changes Since V4: New initialization algorithms. See + * (see + * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) + *

+ *

The MersenneTwister code is based on standard MT19937 C/C++ + * code by Takuji Nishimura, + * with suggestions from Topher Cooper and Marc Rieffel, July 1997. + * The code was originally translated into Java by Michael Lecuyer, + * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. + *

+ *

Java notes

+ *

+ *

This implementation implements the bug fixes made + * in Java 1.2's version of Random, which means it can be used with + * earlier versions of Java. See + * + * the JDK 1.2 java.util.Random documentation for further documentation + * on the random-number generation contracts made. Additionally, there's + * an undocumented bug in the JDK java.util.Random.nextBytes() method, + * which this code fixes. + *

+ *

Just like java.util.Random, this + * generator accepts a long seed but doesn't use all of it. java.util.Random + * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). + * So it's best if your seed does not exceed the int range. + *

+ *

MersenneTwister can be used reliably + * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in + * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor + * java.util.Random) should be used with them. + *

+ *

License

+ *

+ * Copyright (c) 2003 by Sean Luke.
+ * Portions copyright (c) 1993 by Michael Lecuyer.
+ * All rights reserved.
+ *

+ *

Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + *

    + *
  • Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + *
  • Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + *
  • Neither the name of the copyright owners, their employers, nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + *
+ *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @version 13 + */ + +// Note: this class is hard-inlined in all of its methods. This makes some of +// the methods well-nigh unreadable in their complexity. In fact, the Mersenne +// Twister is fairly easy code to understand: if you're trying to get a handle +// on the code, I strongly suggest looking at MersenneTwister.java first. +// -- Sean + +@SuppressWarnings("serial") +public class MersenneTwisterFast implements Serializable, Cloneable +{ + // Period parameters + private static final int N = 624; + private static final int M = 397; + private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a + private static final int UPPER_MASK = 0x80000000; // most significant w-r bits + private static final int LOWER_MASK = 0x7fffffff; // least significant r bits + + + // Tempering parameters + private static final int TEMPERING_MASK_B = 0x9d2c5680; + private static final int TEMPERING_MASK_C = 0xefc60000; + + private int mt[]; // the array for the state vector + private int mti; // mti==N+1 means mt[N] is not initialized + private int mag01[]; + + // a good initial seed (of int size, though stored in a long) + //private static final long GOOD_SEED = 4357; + + private double __nextNextGaussian; + private boolean __haveNextNextGaussian; + + /* We're overriding all internal data, to my knowledge, so this should be okay */ + + /** + * Constructor using the default seed. + */ + public MersenneTwisterFast() + { + this(System.currentTimeMillis()); + } + + /** + * Constructor using a given seed. Though you pass this seed in + * as a long, it's best to make sure it's actually an integer. + * + * @param seed + */ + public MersenneTwisterFast(final long seed) + { + setSeed(seed); + } + + /** + * Constructor using an array of integers as seed. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + public MersenneTwisterFast(final int[] array) + { + setSeed(array); + } + + /** + * Tests the code. + * + * @param args + */ + public static void main(String args[]) + { + int j; + + MersenneTwisterFast r; + + // CORRECTNESS TEST + // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out + + r = new MersenneTwisterFast(new int[]{0x123, 0x234, 0x345, 0x456}); + System.out.println("Output of MersenneTwisterFast with new (2002/1/26) seeding mechanism"); + for (j = 0; j < 1000; j++) { + // first, convert the int from signed to "unsigned" + long l = r.nextInt(); + if (l < 0) { + l += 4294967296L; // max int value + } + String s = String.valueOf(l); + while (s.length() < 10) { + s = " " + s; // buffer + } + System.out.print(s + " "); + if (j % 5 == 4) { + System.out.println(); + } + } + + // SPEED TEST + + final long SEED = 4357; + + int xx; + long ms; + System.out.println("\nTime to test grabbing 100000000 ints"); + + Random rr = new Random(SEED); + xx = 0; + ms = System.currentTimeMillis(); + for (j = 0; j < 100000000; j++) { + xx += rr.nextInt(); + } + System.out.println("java.util.Random: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + r = new MersenneTwisterFast(SEED); + ms = System.currentTimeMillis(); + xx = 0; + for (j = 0; j < 100000000; j++) { + xx += r.nextInt(); + } + System.out.println("Mersenne Twister Fast: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + // TEST TO COMPARE TYPE CONVERSION BETWEEN + // MersenneTwisterFast.java AND MersenneTwister.java + + System.out.println("\nGrab the first 1000 booleans"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean((j / 999.0)) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean((j / 999.0f)) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + byte[] bytes = new byte[1000]; + System.out.println("\nGrab the first 1000 bytes using nextBytes"); + r = new MersenneTwisterFast(SEED); + r.nextBytes(bytes); + for (j = 0; j < 1000; j++) { + System.out.print(bytes[j] + " "); + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + byte b; + System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print((b = r.nextByte()) + " "); + if (b != bytes[j]) { + System.out.print("BAD "); + } + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 shorts"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextShort() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints of different sizes"); + r = new MersenneTwisterFast(SEED); + int max = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt(max) + " "); + max *= 2; + if (max <= 0) { + max = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs of different sizes"); + r = new MersenneTwisterFast(SEED); + long max2 = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong(max2) + " "); + max2 *= 2; + if (max2 <= 0) { + max2 = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 floats"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextFloat() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 doubles"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextDouble() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 gaussian doubles"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextGaussian() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + } + + /** + * {@inheritDoc} + */ + @Override + public Object clone() throws CloneNotSupportedException + { + MersenneTwisterFast f = (MersenneTwisterFast) (super.clone()); + f.mt = mt.clone(); + f.mag01 = mag01.clone(); + return f; + } + + /** + * @param o + * + * @return ? + */ + public boolean stateEquals(Object o) + { + if (o == this) { + return true; + } + if (o == null || !(o instanceof MersenneTwisterFast)) { + return false; + } + MersenneTwisterFast other = (MersenneTwisterFast) o; + if (mti != other.mti) { + return false; + } + for (int x = 0; x < mag01.length; x++) { + if (mag01[x] != other.mag01[x]) { + return false; + } + } + for (int x = 0; x < mt.length; x++) { + if (mt[x] != other.mt[x]) { + return false; + } + } + return true; + } + + /** + * Reads the entire state of the MersenneTwister RNG from the stream + * + * @param stream + * + * @throws IOException + */ + public void readState(DataInputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + mt[x] = stream.readInt(); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + mag01[x] = stream.readInt(); + } + + mti = stream.readInt(); + __nextNextGaussian = stream.readDouble(); + __haveNextNextGaussian = stream.readBoolean(); + } + + /** + * Writes the entire state of the MersenneTwister RNG to the stream + * + * @param stream + * + * @throws IOException + */ + public void writeState(DataOutputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mt[x]); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mag01[x]); + } + + stream.writeInt(mti); + stream.writeDouble(__nextNextGaussian); + stream.writeBoolean(__haveNextNextGaussian); + } + + /** + * Initialize the pseudo random number generator. Don't + * pass in a long that's bigger than an int (Mersenne Twister + * only uses the first 32 bits for its seed). + * + * @param seed + */ + synchronized public void setSeed(final long seed) + { + // Due to a bug in java.util.Random clear up to 1.2, we're + // doing our own Gaussian variable. + __haveNextNextGaussian = false; + + mt = new int[N]; + + mag01 = new int[2]; + mag01[0] = 0x0; + mag01[1] = MATRIX_A; + + mt[0] = (int) (seed & 0xffffffff); + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffff; + /* for >32 bit machines */ + } + } + + /** + * Sets the seed of the MersenneTwister using an array of integers. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + synchronized public void setSeed(final int[] array) + { + if (array.length == 0) { + throw new IllegalArgumentException("Array length must be greater than zero"); + } + int i, j, k; + setSeed(19650218); + i = 1; + j = 0; + k = (N > array.length ? N : array.length); + for (; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + if (j >= array.length) { + j = 0; + } + } + for (k = N - 1; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + } + mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ + } + + /** + * @return ? + */ + public final int nextInt() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return y; + } + + /** + * @return ? + */ + public final short nextShort() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (short) (y >>> 16); + } + + /** + * @return ? + */ + public final char nextChar() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (char) (y >>> 16); + } + + /** + * @return ? + */ + public final boolean nextBoolean() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return ((y >>> 31) != 0); + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. Not as precise a random real + * event as nextBoolean(double), but twice as fast. To explicitly + * use this, remember you may need to cast to float first. + * + * @param probability + * + * @return ? + */ + public final boolean nextBoolean(final float probability) + { + int y; + + if (probability < 0.0f || probability > 1.0f) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0f) { + return false; // fix half-open issues + } else if (probability == 1.0f) { + return true; // fix half-open issues + } + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (y >>> 8) / ((float) (1 << 24)) < probability; + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. + * + * @param probability + * + * @return ? + */ + public final boolean nextBoolean(final double probability) + { + int y; + int z; + + if (probability < 0.0 || probability > 1.0) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0) { + return false; // fix half-open issues + } else if (probability == 1.0) { + return true; // fix half-open issues + } + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53) < probability; + } + + /** + * @return ? + */ + public final byte nextByte() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (byte) (y >>> 24); + } + + /** + * @param bytes + */ + public final void nextBytes(byte[] bytes) + { + int y; + + for (int x = 0; x < bytes.length; x++) { + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + bytes[x] = (byte) (y >>> 24); + } + } + + /** + * @return ? + */ + public final long nextLong() + { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + return (((long) y) << 32) + z; + } + + /** + * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public final long nextLong(final long n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + long bits, val; + do { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + bits = (((((long) y) << 32) + z) >>> 1); + val = bits % n; + } while (bits - val + (n - 1) < 0); + return val; + } + + /** + * Returns a random double in the half-open range from [0.0,1.0). Thus 0.0 is a valid + * result but 1.0 is not. + * + * @return ? + */ + public final double nextDouble() + { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53); + } + + /** + * @return ? + */ + public final double nextGaussian() + { + if (__haveNextNextGaussian) { + __haveNextNextGaussian = false; + return __nextNextGaussian; + } +// else +// { + double v1, v2, s; + do { + int y; + int z; + int a; + int b; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (a >>> 1) ^ mag01[a & 0x1]; + } + for (; kk < N - 1; kk++) { + a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (a >>> 1) ^ mag01[a & 0x1]; + } + a = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (a >>> 1) ^ mag01[a & 0x1]; + + mti = 0; + } + + a = mt[mti++]; + a ^= a >>> 11; // TEMPERING_SHIFT_U(a) + a ^= (a << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(a) + a ^= (a << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(a) + a ^= (a >>> 18); // TEMPERING_SHIFT_L(a) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (b >>> 1) ^ mag01[b & 0x1]; + } + for (; kk < N - 1; kk++) { + b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (b >>> 1) ^ mag01[b & 0x1]; + } + b = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (b >>> 1) ^ mag01[b & 0x1]; + + mti = 0; + } + + b = mt[mti++]; + b ^= b >>> 11; // TEMPERING_SHIFT_U(b) + b ^= (b << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(b) + b ^= (b << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(b) + b ^= (b >>> 18); // TEMPERING_SHIFT_L(b) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + v1 = 2 * + (((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53)) + - 1; + v2 = 2 * (((((long) (a >>> 6)) << 27) + (b >>> 5)) / (double) (1L << 53)) + - 1; + s = v1 * v1 + v2 * v2; + } while (s >= 1 || s == 0); + double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); + __nextNextGaussian = v2 * multiplier; + __haveNextNextGaussian = true; + return v1 * multiplier; +// } + } + + /** + * Returns a random float in the half-open range from [0.0f,1.0f). Thus 0.0f is a valid + * result but 1.0f is not. + * + * @return ? + */ + public final float nextFloat() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (y >>> 8) / ((float) (1 << 24)); + } + + /** + * Returns an integer drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public final int nextInt(final int n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + if ((n & -n) == n) // i.e., n is a power of 2 + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (int) ((n * (long) (y >>> 1)) >> 31); + } + + int bits, val; + do { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + bits = (y >>> 1); + val = bits % n; + } while (bits - val + (n - 1) < 0); + return val; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java new file mode 100755 index 000000000000..cb4bf71b41de --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java @@ -0,0 +1,885 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * {@link ExtendedSet}-based class internally managed by an instance of any + * class implementing {@link Collection} + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class GenericExtendedSet> extends AbstractExtendedSet +{ + /** + * class implementing {@link Collection} that is used to collect elements + */ + private final Class setClass; + /** + * elements of the set + */ + private /*final*/ Collection elements; + + /** + * Empty-set constructor + * + * @param setClass {@link Collection}-derived class + */ + @SuppressWarnings("unchecked") + public GenericExtendedSet(Class setClass) + { + this.setClass = setClass; + try { + elements = setClass.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet empty() + { + return new GenericExtendedSet(setClass); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + // prepare the sorted set + final Collection sorted; + if (elements instanceof SortedSet || elements instanceof List) { + //NOTE: SortedSet.comparator() is null + sorted = elements; + } else { + sorted = new ArrayList(elements); + Collections.sort((List) sorted); + } + + // iterate over the sorted set + return new ExtendedIterator() + { + final Iterator itr = sorted.iterator(); + T current; + + { + current = itr.hasNext() ? itr.next() : null; + } + + @Override + public void skipAllBefore(T element) + { + while (element.compareTo(current) > 0) { + next(); + } + } + + @Override + public boolean hasNext() + { + return current != null; + } + + @Override + public T next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + T prev = current; + current = itr.hasNext() ? itr.next() : null; + return prev; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + // prepare the sorted set + final Collection sorted; +//TODO +// if (elements instanceof SortedSet || elements instanceof List) { +// //NOTE: SortedSet.comparator() is null +// sorted = elements; +// } else { + sorted = new ArrayList(elements); + Collections.sort((List) sorted, Collections.reverseOrder()); +// } + + // iterate over the sorted set + return new ExtendedIterator() + { + final Iterator itr = sorted.iterator(); + T current; + + { + current = itr.hasNext() ? itr.next() : null; + } + + @Override + public void skipAllBefore(T element) + { + while (element.compareTo(current) > 0) { + next(); + } + } + + @Override + public boolean hasNext() + { + return current != null; + } + + @Override + public T next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + T prev = current; + current = itr.hasNext() ? itr.next() : null; + return prev; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public GenericExtendedSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + GenericExtendedSet c = empty(); + if (elements instanceof Cloneable) { + try { + c.elements = (Collection) elements.getClass().getMethod("clone").invoke(elements); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } else { + c.elements.addAll(elements); + } + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return setClass.getSimpleName() + ": " + elements.toString(); + } + + + + /* + * Collection methods + */ + + /** + * {@inheritDoc} + */ + @Override + public boolean add(T e) + { + if (elements instanceof List) { + final List l = (List) elements; + int pos = Collections.binarySearch(l, e); + if (pos >= 0) { + return false; + } + l.add(-(pos + 1), e); + return true; + } + return elements.add(e); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean remove(Object o) + { + if (elements instanceof List) { + try { + final List l = (List) elements; + int pos = Collections.binarySearch(l, (T) o); + if (pos < 0) { + return false; + } + l.remove(pos); + return true; + } + catch (ClassCastException e) { + return false; + } + } + return elements.remove(o); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) + { + if (elements instanceof List) { + try { + return Collections.binarySearch((List) elements, (T) o) >= 0; + } + catch (ClassCastException e) { + return false; + } + } + return elements.contains(o); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean containsAll(Collection c) + { + if (isEmpty() || c == null || c.isEmpty()) { + return false; + } + if (this == c) { + return true; + } + + if (elements instanceof List + && c instanceof GenericExtendedSet + && ((GenericExtendedSet) c).elements instanceof List) { + Iterator thisItr = elements.iterator(); + Iterator otherItr = ((GenericExtendedSet) c).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r; + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return false; + } + thisValue = thisItr.next(); + } + if (r < 0) { + return false; + } + } + return !otherItr.hasNext(); + } + + return elements.containsAll(c); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + if (elements instanceof List) { + //TODO: copiare codice di union + Collection res = union(c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + return elements.addAll(c); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean retainAll(Collection c) + { + if (elements instanceof List) { + try { + //TODO: copiare codice di intersection + Collection res = intersection((Collection) c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + catch (ClassCastException e) { + return false; + } + } + return elements.retainAll(c); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean removeAll(Collection c) + { + if (elements instanceof List) { + try { + //TODO: copiare codice di difference + Collection res = difference((Collection) c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + catch (ClassCastException e) { + return false; + } + } + return elements.removeAll(c); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + return o instanceof GenericExtendedSet && ((GenericExtendedSet) o).elements.equals(elements); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() {return elements.size();} + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() {return elements.isEmpty();} + + /** + * {@inheritDoc} + */ + @Override + public void clear() {elements.clear();} + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() {return elements.hashCode();} + + + /* + * SortedSet methods + */ + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + if (elements instanceof SortedSet) { + return ((SortedSet) elements).first(); + } + if (elements instanceof List) { + return ((List) elements).get(0); + } + return super.first(); + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + if (elements instanceof SortedSet) { + return ((SortedSet) elements).last(); + } + if (elements instanceof List) { + return ((List) elements).get(elements.size() - 1); + } + return super.last(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).headSet(toElement); + return c; + } + return super.headSet(toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).tailSet(fromElement); + return c; + } + return super.headSet(fromElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).subSet(fromElement, toElement); + return c; + } + return super.headSet(toElement); + } + + + /* + * ExtendedSet methods + */ + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0; + } + if (this == other) { + return size(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + int res = 0; + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + return res; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return res; + } + thisValue = thisItr.next(); + } + } + + res++; + } + return res; + } + + return super.intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet intersection(Collection other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + return res; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return res; + } + thisValue = thisItr.next(); + } + } + + res.elements.add(thisValue); + } + return res; + } + + GenericExtendedSet clone = clone(); + clone.elements.retainAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet union(Collection other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + GenericExtendedSet res = empty(); + res.elements.addAll(other); + return res; + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + res.elements.add(otherValue); + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + res.elements.add(otherValue); + break mainLoop; + } + thisValue = thisItr.next(); + } + } + + res.elements.add(thisValue); + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + while (otherItr.hasNext()) { + res.elements.add(otherItr.next()); + } + return res; + } + + GenericExtendedSet clone = clone(); + for (T e : other) { + clone.add(e); + } + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet difference(Collection other) + { + if (isEmpty() || this == other) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + break mainLoop; + } + thisValue = thisItr.next(); + } + } + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + return res; + } + + GenericExtendedSet clone = clone(); + clone.elements.removeAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet symmetricDifference(Collection other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + GenericExtendedSet res = empty(); + res.elements.addAll(other); + return res; + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + res.elements.add(otherValue); + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + res.elements.add(otherValue); + break mainLoop; + } + thisValue = thisItr.next(); + } + } + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + while (otherItr.hasNext()) { + res.elements.add(otherItr.next()); + } + return res; + } + + GenericExtendedSet clone = union(other); + clone.removeAll(intersection(other)); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet unmodifiable() + { + GenericExtendedSet c = empty(); + c.elements = Collections.unmodifiableCollection(elements); + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet convert(Collection c) + { + GenericExtendedSet res = (GenericExtendedSet) super.convert(c); + if (res.elements instanceof List) { + Collections.sort((List) res.elements); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet convert(Object... e) + { + GenericExtendedSet res = (GenericExtendedSet) super.convert(e); + if (res.elements instanceof List) { + Collections.sort((List) res.elements); + } + return res; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java new file mode 100755 index 000000000000..11532dbb191d --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java @@ -0,0 +1,741 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * An {@link ExtendedSet} implementation that maps each element of the universe (i.e., the collection of all possible elements) to an integer referred to as its "index". + * + * @param < T > the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: IndexedSet.java 154 2011-05-30 22:19:24Z cocciasik $ + * @see ExtendedSet + * @see AbstractExtendedSet + */ +public class IndexedSet extends AbstractExtendedSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = -2386771695765773453L; + + // indices + /** + * @uml.property name="indices" + * @uml.associationEnd + */ + private final IntSet indices; + + // mapping to translate items to indices and vice-versa + private final Map itemToIndex; + private final T[] indexToItem; + + /** + * Creates an empty {@link IndexedSet} based on a given collection that + * represents the set of all possible items that can be added to the + * {@link IndexedSet} instance. + *

+ * VERY IMPORTANT! to correctly work and effectively reduce the + * memory allocation, new instances of {@link IndexedSet} must be + * created through the {@link #clone()} or {@link #empty()} methods and + * not by calling many times this constructor with the same + * collection for universe! + * + * @param indices {@link IntSet} instance used for internal representation + * @param universe collection of all possible items. Order will be + * preserved. + */ + @SuppressWarnings("unchecked") + public IndexedSet(IntSet indices, final Collection universe) + { + // NOTE: this procedure removes duplicates while keeping the order + indexToItem = universe instanceof Set ? (T[]) universe.toArray() : (T[]) (new LinkedHashSet(universe)).toArray(); + itemToIndex = new HashMap(Math.max((int) (indexToItem.length / .75f) + 1, 16)); + for (int i = 0; i < indexToItem.length; i++) { + itemToIndex.put(indexToItem[i], Integer.valueOf(i)); + } + this.indices = indices; + } + + /** + * Creates a {@link IndexedSet} instance from a given universe + * mapping + * + * @param itemToIndex universe item-to-index mapping + * @param indexToItem universe index-to-item mapping + * @param indices initial item set + */ + private IndexedSet(Map itemToIndex, T[] indexToItem, IntSet indices) + { + this.itemToIndex = itemToIndex; + this.indexToItem = indexToItem; + this.indices = indices; + } + + /** + * A shortcut for new IndexedSet<T>(itemToIndex, indexToItem, indices) + */ + private IndexedSet createFromIndices(IntSet indx) + { + return new IndexedSet(itemToIndex, indexToItem, indx); + } + + /** + * Checks if the given collection is a instance of {@link IndexedSet} with + * the same index mappings + * + * @param c collection to check + * + * @return true if the given collection is a instance of + * {@link IndexedSet} with the same index mappings + */ + private boolean hasSameIndices(Collection c) + { + // since indices are always re-created through constructor and + // referenced through clone(), it is sufficient to check just only one + // mapping table + return (c instanceof IndexedSet) && (indexToItem == ((IndexedSet) c).indexToItem); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet clone() + { + return createFromIndices(indices.clone()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || !(obj instanceof Collection)) { + return false; + } + IndexedSet other = convert((Collection) obj); + return this.indexToItem == other.indexToItem + && this.itemToIndex == other.itemToIndex + && this.indices.equals(other.indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return indices.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet o) + { + return indices.compareTo(convert(o).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return new Comparator() + { + @Override + public int compare(T o1, T o2) + { + // compare elements according to the universe ordering + return itemToIndex.get(o1).compareTo(itemToIndex.get(o2)); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + return indexToItem[indices.first()]; + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + return indexToItem[indices.last()]; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(T e) + { + Integer index = itemToIndex.get(e); + if (index == null) { + throw new IllegalArgumentException("element not in the current universe"); + } + return indices.add(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + return c != null && !c.isEmpty() && indices.addAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + indices.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(T e) + { + indices.flip(itemToIndex.get(e).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(Object o) + { + if (o == null) { + return false; + } + Integer index = itemToIndex.get(o); + return index != null && indices.contains(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return c == null || indices.containsAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return other == null || indices.containsAny(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return other != null && !other.isEmpty() && indices.containsAtLeast(convert(other).indices, minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return indices.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = indices.iterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return indexToItem[itr.next()];} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = indices.descendingIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return indexToItem[itr.next()];} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(Object o) + { + if (o == null) { + return false; + } + Integer index = itemToIndex.get(o); + return index != null && indices.remove(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return c != null && !c.isEmpty() && indices.removeAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + if (isEmpty()) { + return false; + } + if (c == null || c.isEmpty()) { + indices.clear(); + return true; + } + return indices.retainAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return indices.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet intersection(Collection other) + { + if (other == null) { + return empty(); + } + return createFromIndices(indices.intersection(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet union(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.union(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet difference(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.difference(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet symmetricDifference(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.symmetricDifference(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet complemented() + { + return createFromIndices(indices.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + indices.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (other == null) { + return 0; + } + return indices.intersectionSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.unionSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.symmetricDifferenceSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.differenceSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return indices.complementSize(); + } + + /** + * Returns the collection of all possible elements + * + * @return the collection of all possible elements + */ + public IndexedSet universe() + { + IntSet allItems = indices.empty(); + allItems.fill(0, indexToItem.length - 1); + return createFromIndices(allItems); + } + + /** + * Returns the index of the given item + * + * @param item + * + * @return the index of the given item + */ + public Integer absoluteIndexOf(T item) + { + return itemToIndex.get(item); + } + + /** + * Returns the item corresponding to the given index + * + * @param i index + * + * @return the item + */ + public T absoluteGet(int i) + { + return indexToItem[i]; + } + + /** + * Returns the set of indices. Modifications to this set are reflected to + * this {@link IndexedSet} instance. Trying to perform operation on + * out-of-bound indices will throw an {@link IllegalArgumentException} + * exception. + * + * @return the index set + * + * @see #absoluteGet(int) + * @see #absoluteIndexOf(Object) + */ + public IntSet indices() + { + return indices; + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet empty() + { + return createFromIndices(indices.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return indices.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return indices.collectionCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public IndexedSet convert(Collection c) + { + if (c == null) { + return empty(); + } + + // useless to convert... + if (hasSameIndices(c)) { + return (IndexedSet) c; + } + + // NOTE: cannot call super.convert(c) because of loop + IndexedSet res = empty(); + for (T t : (Collection) c) { + res.add(t); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet convert(Object... e) + { + return (IndexedSet) super.convert(e); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet(int min, int max) + { + List ps = indices.powerSet(min, max); + List> res = new ArrayList>(ps.size()); + for (IntSet s : ps) { + res.add(createFromIndices(s)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return String.format("items = %s\nitemToIndex = %s\nindexToItem = %s\n", + indices.debugInfo(), itemToIndex.toString(), Arrays.toString(indexToItem) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + return indices.jaccardSimilarity(convert(other).indices); + } + + //TODO +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet unmodifiable() { +// return createFromIndices(indices.unmodifiable()); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet subSet(T fromElement, T toElement) { +// return createFromIndices(indices.subSet(itemToIndex.get(fromElement), itemToIndex.get(toElement))); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet headSet(T toElement) { +// return createFromIndices(indices.headSet(itemToIndex.get(toElement))); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet tailSet(T fromElement) { +// return createFromIndices(indices.tailSet(itemToIndex.get(fromElement))); +// } + + /** + * {@inheritDoc} + */ + @Override + public T get(int i) + { + return indexToItem[indices.get(i)]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(T e) + { + return indices.indexOf(itemToIndex.get(e).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(T from, T to) + { + indices.clear(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + indices.fill(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java new file mode 100755 index 000000000000..0c92053a2381 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java @@ -0,0 +1,580 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * This class provides a "wrapper" for any {@link IntSet} instance in order to be used as an {@link ExtendedSet} instance. + * + * @author Alessandro Colantonio + * @version $Id: IntegerSet.java 153 2011-05-30 16:39:57Z cocciasik $ + */ +public class IntegerSet extends AbstractExtendedSet +{ + /** + * the collection of int numbers + * + * @uml.property name="items" + * @uml.associationEnd + */ + private final IntSet items; + + /** + * Wraps an instance of {@link IntSet} + * + * @param items the {@link IntSet} to wrap + */ + public IntegerSet(IntSet items) + { + this.items = items; + } + + /** + * @return the internal integer representation + */ + public IntSet intSet() + { + return items; + } + + /** + * Converts a generic collection of {@link Integer} instances to a + * {@link IntSet} instance. If the given collection is an + * {@link IntegerSet} instance, it returns the contained + * {@link #items} object. + * + * @param c the generic collection of {@link Integer} instances + * + * @return the resulting {@link IntSet} instance + */ + private IntSet toIntSet(Collection c) + { + // nothing to convert + if (c == null) { + return null; + } + if (c instanceof IntegerSet) { + return ((IntegerSet) c).items; + } + + // extract integers from the given collection + IntSet res = items.empty(); + List sorted = new ArrayList(c.size()); + for (Object i : c) { + try { + sorted.add((Integer) i); + } + catch (ClassCastException e) { + // do nothing + } + } + Collections.sort(sorted); + for (Integer i : sorted) { + res.add(i.intValue()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + return items.addAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return items.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(Integer from, Integer to) + { + items.clear(from.intValue(), to.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + return new IntegerSet(items.clone()); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return items.collectionCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet o) + { + return items.compareTo(toIntSet(o)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet complemented() + { + return new IntegerSet(items.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return items.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return items.containsAny(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return items.containsAtLeast(toIntSet(other), minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet convert(Collection c) + { + return new IntegerSet(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet convert(Object... e) + { + return convert(Arrays.asList(e)); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return getClass().getSimpleName() + "\n" + items.debugInfo(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = items.descendingIterator(); + + @Override + public void remove() {itr.remove();} + + @Override + public Integer next() {return Integer.valueOf(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet difference(Collection other) + { + return new IntegerSet(items.difference(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + return items.differenceSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet empty() + { + return new IntegerSet(items.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof IntegerSet)) { + return false; + } + return items.equals(((IntegerSet) o).items); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(Integer from, Integer to) + { + items.fill(from.intValue(), to.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public Integer first() + { + return Integer.valueOf(items.first()); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(Integer e) + { + items.flip(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public Integer get(int i) + { + return Integer.valueOf(items.get(i)); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(Integer e) + { + return items.indexOf(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet intersection(Collection other) + { + return new IntegerSet(items.intersection(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + return items.intersectionSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = items.iterator(); + + @Override + public void remove() {itr.remove();} + + @Override + public Integer next() {return Integer.valueOf(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Integer last() + { + return Integer.valueOf(items.last()); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) + { + List ps = items.powerSet(min, max); + List res = new ArrayList(ps.size()); + for (IntSet s : ps) { + res.add(new IntegerSet(s)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return items.removeAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + return items.retainAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet symmetricDifference(Collection other) + { + return new IntegerSet(items.symmetricDifference(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + return items.symmetricDifferenceSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet union(Collection other) + { + return new IntegerSet(items.union(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + return items.unionSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return items.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + items.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(Integer e) + { + return items.add(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + items.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(Object o) + { + return o instanceof Integer && items.contains(((Integer) o).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return items.containsAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return items.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(Object o) + { + return o instanceof Integer && items.remove(((Integer) o).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return items.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + // NOTE: by not calling super.toString(), we avoid to iterate over new + // Integer instances, thus avoiding to waste time and memory with garbage + // collection + return items.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + return items.jaccardSimilarity(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(ExtendedSet other) + { + return items.weightedJaccardSimilarity(toIntSet(other)); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java new file mode 100755 index 000000000000..ad60d782fe98 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java @@ -0,0 +1,1692 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.ConciseSetUtils; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.intset.IntSet.IntIterator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.NavigableMap; +import java.util.NoSuchElementException; +import java.util.SortedSet; +import java.util.TreeMap; + +/** + * Very similar to {@link ExtendedSet} but for the primitive long type. + * + * @author Alessandro Colantonio + * @version $Id: LongSet.java 154 2011-05-30 22:19:24Z cocciasik $ + */ +public class LongSet implements Cloneable, Comparable, java.io.Serializable, Iterable +{ + /** + * generated ID + */ + private static final long serialVersionUID = -6165350530254304256L; + + /** + * maximum cardinality of each subset + */ + private static int SUBSET_SIZE = ConciseSetUtils.MAX_ALLOWED_INTEGER + 1; + + /** + * transaction-item pair indices (from 0 to {@link #SUBSET_SIZE} - 1) + * + * @uml.property name="firstIndices" + * @uml.associationEnd + */ + private final IntSet firstIndices; + + /** + * transaction-item pair indices (from {@link #SUBSET_SIZE}) + */ + private final NavigableMap otherIndices; + + /** + * Creates an empty set + * + * @param block {@link IntSet} instance internally used to represent + * {@link Long} values. It can be non-empty. + */ + public LongSet(IntSet block) + { + firstIndices = block.empty(); + otherIndices = new TreeMap(); + } + + /** + * Shallow-copy constructor + */ + private LongSet(IntSet firstIndices, NavigableMap otherIndices) + { + this.firstIndices = firstIndices; + this.otherIndices = otherIndices; + } + + /** + * @return an empty {@link IntSet} instance of the same type of that of + * internally used to represent integers + */ + public IntSet emptyBlock() + { + return firstIndices.empty(); + } + + /** + * Retains only the elements in this set that are contained in the specified + * collection. In other words, removes from this set all of its elements + * that are not contained in the specified collection. + * + * @param other collection containing elements to be retained in this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(long) + */ + @SuppressWarnings("null") + public boolean retainAll(LongSet other) + { + if (isEmpty() || this == other) { + return false; + } + if (other == null || other.isEmpty()) { + clear(); + return true; + } + + boolean res = firstIndices.retainAll(other.firstIndices); + if (otherIndices.isEmpty()) { + return res; + } + if (other.otherIndices.isEmpty()) { + otherIndices.clear(); + return true; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + itr1.remove(); + while (itr1.hasNext()) { + itr1.next(); + itr1.remove(); + } + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + itr1.remove(); + res = true; + } else if (c == 0) { + res |= e1.getValue().retainAll(e2.getValue()); + if (e1.getValue().isEmpty()) { + itr1.remove(); + } + } + } + } + + /** + * Generates the intersection set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet intersection(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + LongSet res = new LongSet(firstIndices.intersection(other.firstIndices), new TreeMap()); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + IntSet s = e1.getValue().intersection(e2.getValue()); + if (!s.isEmpty()) { + res.otherIndices.put(e1.getKey(), s); + } + } + } + } + + /** + * Adds all of the elements in the specified collection to this set if + * they're not already present. + * + * @param other collection containing elements to be added to this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements, or if + * the specified collection is null + * @throws IllegalArgumentException if some property of an element of the specified collection + * prevents it from being added to this set + * @see #add(long) + */ + @SuppressWarnings("null") + public boolean addAll(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return false; + } + + boolean res = firstIndices.addAll(other.firstIndices); + if (other.otherIndices.isEmpty()) { + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + otherIndices.put(e.getKey(), e.getValue().clone()); + } + return true; + } + Iterator> itr1 = new ArrayList>(otherIndices.entrySet()).iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + otherIndices.put(e2.getKey(), e2.getValue().clone()); + while (itr2.hasNext()) { + e2 = itr2.next(); + otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c > 0) { + otherIndices.put(e2.getKey(), e2.getValue().clone()); + res = true; + } else if (c == 0) { + res |= e1.getValue().addAll(e2.getValue()); + } + } + } + + /** + * Generates the union set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet union(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + if (isEmpty()) { + return other.clone(); + } + + LongSet res = new LongSet(firstIndices.union(other.firstIndices), new TreeMap()); + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + if (c != 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + while (itr2.hasNext()) { + e2 = itr2.next(); + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c > 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().union(e2.getValue())); + } + } + } + + /** + * Removes from this set all of its elements that are contained in the + * specified collection. + * + * @param other collection containing elements to be removed from this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(long) + * @see #contains(long) + */ + @SuppressWarnings("null") + public boolean removeAll(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return false; + } + if (this == other) { + clear(); + return true; + } + + boolean res = firstIndices.removeAll(other.firstIndices); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res |= e1.getValue().removeAll(e2.getValue()); + if (e1.getValue().isEmpty()) { + itr1.remove(); + } + } + } + } + + /** + * Generates the difference set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet difference(LongSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty() || this == other) { + return empty(); + } + + LongSet res = new LongSet(firstIndices.difference(other.firstIndices), new TreeMap()); + if (otherIndices.isEmpty()) { + return res; + } + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c == 0) { + IntSet s = e1.getValue().difference(e2.getValue()); + if (!s.isEmpty()) { + res.otherIndices.put(e1.getKey(), s); + } + } + } + } + + /** + * Generates the symmetric difference set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(long) + */ + @SuppressWarnings("null") + public LongSet symmetricDifference(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + if (isEmpty()) { + return other.clone(); + } + + LongSet res = new LongSet(firstIndices.symmetricDifference(other.firstIndices), new TreeMap()); + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + if (c != 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + while (itr2.hasNext()) { + e2 = itr2.next(); + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c > 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().symmetricDifference(e2.getValue())); + } + } + } + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see LongSet#complement() + */ + public LongSet complemented() + { + LongSet cloned = clone(); + cloned.complement(); + return cloned; + } + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see LongSet#complemented() + */ + public void complement() + { + if (otherIndices.isEmpty()) { + firstIndices.complement(); + return; + } + + // complement the last block + Iterator> itr = otherIndices.descendingMap().entrySet().iterator(); + Entry e = itr.next(); + e.getValue().complement(); + if (e.getValue().isEmpty()) { + itr.remove(); + } + + // complement other blocks + NavigableMap toAdd = new TreeMap(); // avoid concurrent modification + for (long i = e.getKey().longValue() - SUBSET_SIZE; i > 0L; i -= SUBSET_SIZE) { + while (e != null && e.getKey().longValue() > i) { + e = itr.hasNext() ? itr.next() : null; + } + + if (e != null && e.getKey().longValue() == i) { + if (e.getValue().add(SUBSET_SIZE - 1)) { + e.getValue().complement(); + e.getValue().add(SUBSET_SIZE - 1); + } else { + e.getValue().complement(); + } + if (e.getValue().isEmpty()) { + itr.remove(); + } + } else { + IntSet s = firstIndices.empty(); + s.fill(0, SUBSET_SIZE - 1); + toAdd.put(Long.valueOf(i), s); + } + } + otherIndices.putAll(toAdd); + if (firstIndices.add(SUBSET_SIZE - 1)) { + firstIndices.complement(); + firstIndices.add(SUBSET_SIZE - 1); + } else { + firstIndices.complement(); + } + } + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(LongSet)} and + * then {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + @SuppressWarnings("null") + public long intersectionSize(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0L; + } + if (this == other) { + return size(); + } + + long res = firstIndices.intersectionSize(other.firstIndices); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res += e1.getValue().intersectionSize(e2.getValue()); + } + } + } + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(LongSet)} and then + * {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long unionSize(LongSet other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling {@link #symmetricDifference(LongSet)} + * and then {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long symmetricDifferenceSize(LongSet other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(LongSet)} and then + * {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long differenceSize(LongSet other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public long complementSize() + { + if (isEmpty()) { + return 0L; + } + return last() - size() + 1L; + } + + /** + * Generates an empty set + * + * @return the empty set + */ + public LongSet empty() + { + return new LongSet(firstIndices.empty(), new TreeMap()); + } + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + @Override + public LongSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + NavigableMap otherIndicesClone = new TreeMap(); + for (Entry e : otherIndices.entrySet()) { + otherIndicesClone.put(e.getKey(), e.getValue().clone()); + } + return new LongSet(firstIndices.clone(), otherIndicesClone); + } + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio() + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio() + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * @return a {@link ExtendedLongIterator} instance to iterate over the set + */ + public ExtendedLongIterator longIterator() + { + return new ExtendedLongIterator(); + } + + /** + * @return a {@link ExtendedLongIterator} instance to iterate over the set in + * descending order + */ + public ExtendedLongIterator descendingLongIterator() + { + return new ReverseLongIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public Iterator iterator() + { + return new Iterator() + { + final ExtendedLongIterator itr = longIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public Long next() {return Long.valueOf(itr.next());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * Prints debug info about the given {@link LongSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo() + { + StringBuilder s = new StringBuilder(); + + s.append("elements: "); + s.append(toString()); + s.append("\nfirstIndices: " + firstIndices); + s.append('\n'); + s.append("otherIndices: " + otherIndices.size()); + s.append('\n'); + for (Entry e : otherIndices.entrySet()) { + s.append('\t'); + s.append(e.getKey()); + s.append(", "); + s.append(e.getValue()); + s.append('\n'); + } + + return s.toString(); + } + + /** + * Adds to the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void fill(long from, long to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + add(from); + return; + } + + final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; + final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; + if (firstBlockIndex == lastBlockIndex) { + // Case 1: One block + if (firstBlockIndex == 0L) { + firstIndices.fill((int) from, (int) to); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s == null) { + otherIndices.put(firstBlockIndex, s = firstIndices.empty()); + } + s.fill((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); + } + } else { + // Case 2: Multiple blocks + // Handle first block + if (firstBlockIndex == 0L) { + firstIndices.fill((int) from, SUBSET_SIZE - 1); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s == null) { + otherIndices.put(firstBlockIndex, s = firstIndices.empty()); + } + s.fill((int) (from - firstBlockIndex), SUBSET_SIZE - 1); + } + + // Handle intermediate words, if any + for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { + IntSet s = firstIndices.empty(); + s.fill(0, SUBSET_SIZE - 1); + otherIndices.put(Long.valueOf(i), s); + } + + // Handle last word + IntSet s = otherIndices.get(lastBlockIndex); + if (s == null) { + otherIndices.put(lastBlockIndex, s = firstIndices.empty()); + } + s.fill(0, (int) (to - lastBlockIndex)); + } + } + + /** + * Removes from the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void clear(long from, long to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + remove(from); + return; + } + + final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; + final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; + if (firstBlockIndex == lastBlockIndex) { + // Case 1: One block + if (firstBlockIndex == 0L) { + firstIndices.clear((int) from, (int) to); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s != null) { + s.clear((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); + if (s.isEmpty()) { + otherIndices.remove(firstBlockIndex); + } + } + } + } else { + // Case 2: Multiple blocks + // Handle first block + if (firstBlockIndex == 0L) { + firstIndices.clear((int) from, SUBSET_SIZE - 1); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s != null) { + s.clear((int) (from - firstBlockIndex), SUBSET_SIZE - 1); + if (s.isEmpty()) { + otherIndices.remove(firstBlockIndex); + } + } + } + + // Handle intermediate words, if any + for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { + otherIndices.remove(Long.valueOf(i)); + } + + // Handle last word + IntSet s = otherIndices.get(lastBlockIndex); + if (s != null) { + s.clear(0, (int) (to - lastBlockIndex)); + if (s.isEmpty()) { + otherIndices.remove(lastBlockIndex); + } + } + } + } + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(LongSet) + */ + public void flip(long e) + { + if (e < SUBSET_SIZE) { + firstIndices.flip((int) e); + return; + } + + final long block = (e / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(block); + if (s == null) { + otherIndices.put(block, s = firstIndices.empty()); + } + s.flip((int) (e - block)); + if (s.isEmpty()) { + otherIndices.remove(block); + } + } + + /** + * Gets the ith element of the set + * + * @param index position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public long get(long index) + { + if (index < firstIndices.size()) { + return firstIndices.get((int) index); + } + + index -= firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + if (index < e.getValue().size()) { + return e.getKey().longValue() + e.getValue().get((int) index); + } + index -= e.getValue().size(); + } + throw new IndexOutOfBoundsException(Long.toString(index)); + } + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param i element of the set + * + * @return the element position + */ + public long indexOf(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.indexOf((int) i); + } + long prev = firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + if (i < e.getKey().longValue() + SUBSET_SIZE) { + return prev + e.getValue().indexOf((int) (i - e.getKey().longValue())); + } + prev += e.getValue().size(); + } + return -1L; + } + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public LongSet convert(long... a) + { + LongSet res = empty(); + if (a != null) { + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + for (long i : a) { + res.add(i); + } + } + return res; + } + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public LongSet convert(Collection a) + { + LongSet res = empty(); + Collection sorted; + if (a != null) { + if (a instanceof SortedSet && ((SortedSet) a).comparator() == null) { + sorted = a; + } else { + sorted = new ArrayList(a); + Collections.sort((List) sorted); + } + for (long i : sorted) { + res.add(i); + } + } + return res; + } + + /** + * Returns the first (lowest) element currently in this set. + * + * @return the first (lowest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public long first() + { + if (!firstIndices.isEmpty()) { + return firstIndices.first(); + } + if (otherIndices.isEmpty()) { + throw new NoSuchElementException(); + } + Entry e = otherIndices.firstEntry(); + return e.getKey().longValue() + e.getValue().first(); + } + + /** + * Returns the last (highest) element currently in this set. + * + * @return the last (highest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public long last() + { + if (otherIndices.isEmpty() && firstIndices.isEmpty()) { + throw new NoSuchElementException(); + } + if (!otherIndices.isEmpty()) { + Entry e = otherIndices.lastEntry(); + return e.getKey().longValue() + e.getValue().last(); + } + return firstIndices.last(); + } + + /** + * @return the number of elements in this set (its cardinality) + */ + public long size() + { + long res = firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + res += e.getValue().size(); + } + return res; + } + + /** + * @return true if this set contains no elements + */ + public boolean isEmpty() + { + return firstIndices.isEmpty() && otherIndices.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return 31 * firstIndices.hashCode() + otherIndices.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof LongSet)) { + return false; + } + final LongSet other = (LongSet) obj; + return firstIndices.equals(other.firstIndices) + && otherIndices.equals(other.otherIndices); + } + + /** + * Returns true if this set contains the specified element. + * + * @param i element whose presence in this set is to be tested + * + * @return true if this set contains the specified element + */ + public boolean contains(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.contains((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + return false; + } + return s.contains((int) (i - first)); + } + + /** + * Adds the specified element to this set if it is not already present. It + * ensures that sets never contain duplicate elements. + * + * @param i element to be added to this set + * + * @return true if this set did not already contain the specified + * element + * + * @throws IllegalArgumentException if some property of the specified element prevents it from + * being added to this set + */ + public boolean add(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.add((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + otherIndices.put(first, s = firstIndices.empty()); + } + return s.add((int) (i - first)); + } + + /** + * Removes the specified element from this set if it is present. + * + * @param i object to be removed from this set, if present + * + * @return true if this set contained the specified element + * + * @throws UnsupportedOperationException if the remove operation is not supported by this set + */ + public boolean remove(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.remove((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + return false; + } + boolean res = s.remove((int) (i - first)); + if (res && s.isEmpty()) { + otherIndices.remove(first); + } + return res; + } + + /** + * Returns true if this set contains all of the elements of the + * specified collection. + * + * @param other collection to be checked for containment in this set + * + * @return true if this set contains all of the elements of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements + * (optional), or if the specified collection is null + * @see #contains(long) + */ + @SuppressWarnings("null") + public boolean containsAll(LongSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + if (!firstIndices.containsAll(other.firstIndices)) { + return false; + } + if (other.otherIndices.isEmpty()) { + return true; + } + if (otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return c == 0 && !itr2.hasNext(); + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c > 0) { + return false; + } else if (c == 0) { + if (!e1.getValue().containsAll(e2.getValue())) { + return false; + } + } + } + } + + /** + * Returns true if the specified {@link LongSet} + * instance contains any elements that are also contained within this + * {@link LongSet} instance + * + * @param other {@link LongSet} to intersect with + * + * @return a boolean indicating whether this {@link LongSet} + * intersects the specified {@link LongSet}. + */ + @SuppressWarnings("null") + public boolean containsAny(LongSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + if (firstIndices.containsAny(other.firstIndices) && !other.firstIndices.isEmpty()) { + return true; + } + if (other.otherIndices.isEmpty() || otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return false; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return false; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0 && e1.getValue().containsAny(e2.getValue())) { + return true; + } + } + } + + /** + * Returns true if the specified {@link LongSet} + * instance contains at least minElements elements that are + * also contained within this {@link LongSet} instance + * + * @param other {@link LongSet} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link LongSet} instance + * + * @return a boolean indicating whether this {@link LongSet} + * intersects the specified {@link LongSet}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + @SuppressWarnings("null") + public boolean containsAtLeast(LongSet other, long minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if (this == other) { + return size() >= minElements; + } + if (other == null || other.isEmpty() || isEmpty() || size() < minElements) { + return false; + } + + long res = firstIndices.intersectionSize(other.firstIndices); + if (res >= minElements) { + return true; + } + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return false; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return false; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res += e1.getValue().intersectionSize(e2.getValue()); + if (res >= minElements) { + return true; + } + } + } + } + + /** + * Removes all of the elements from this set. The set will be empty after + * this call returns. + */ + public void clear() + { + firstIndices.clear(); + otherIndices.clear(); + } + + /** + * @return an array containing all the elements in this set, in the same + * order. + */ + public long[] toArray() + { + if (isEmpty()) { + return null; + } + return toArray(new long[(int) size()]); + } + + /** + * Returns an array containing all of the elements in this set. + *

+ * If this set fits in the specified array with room to spare (i.e., the + * array has more elements than this set), the element in the array + * immediately following the end of the set are left unchanged. + * + * @param a the array into which the elements of this set are to be + * stored. + * + * @return the array containing all the elements in this set + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this set does not fit in the specified array + */ + public long[] toArray(long[] a) + { + if (a.length < size()) { + throw new IllegalArgumentException(); + } + if (isEmpty()) { + return a; + } + ExtendedLongIterator itr = longIterator(); + int i = 0; + while (itr.hasNext()) { + a[i++] = itr.next(); + } + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + ExtendedLongIterator itr = longIterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + long e = itr.next(); + sb.append(e); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(LongSet o) + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * A {@link Iterator} -like interface that allows to "skip" some elements of the set + */ + public class ExtendedLongIterator + { + /** + * @uml.property name="itr" + * @uml.associationEnd + */ + protected IntIterator itr; + protected Iterator> otherItrs; + protected long first = 0; + /** + * @uml.property name="current" + * @uml.associationEnd + */ + protected IntSet current = null; + + private ExtendedLongIterator() + { + itr = firstIndices.iterator(); + otherItrs = otherIndices.entrySet().iterator(); + first = 0; + } + + protected void nextItr() + { + Entry e = otherItrs.next(); + current = e.getValue(); + itr = e.getValue().iterator(); + first = e.getKey().longValue(); + } + + /** + * @return true if the iterator has more elements. + */ + public boolean hasNext() + { + return otherItrs.hasNext() || itr.hasNext(); + } + + /** + * @return the next element in the iteration. + * + * @throws NoSuchElementException iteration has no more elements. + */ + public long next() + { + if (!itr.hasNext()) { + nextItr(); + } + return first + itr.next(); + } + + /** + * Removes from the underlying collection the last element returned by + * the iterator (optional operation). This method can be called only + * once per call to next. The behavior of an iterator is + * unspecified if the underlying collection is modified while the + * iteration is in progress in any way other than by calling this + * method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + public void remove() + { + itr.remove(); + if (current != null && current.isEmpty()) { + otherItrs.remove(); + } + } + + /** + * Skips all the elements before the the specified element, so that + * {@link #next()} gives the given element or, if it does not exist, the + * element immediately after according to the sorting provided by this + * set. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(long element) + { + while (element >= first + SUBSET_SIZE) { + if (otherItrs.hasNext()) { + nextItr(); + } else { + itr.skipAllBefore(SUBSET_SIZE - 1); // no next + assert !itr.hasNext(); + return; + } + } + if (element < first) { + return; + } + itr.skipAllBefore((int) (element - first)); + } + } + + /** + * Iteration over the union of all indices, reverse order + */ + private class ReverseLongIterator extends ExtendedLongIterator + { + private ReverseLongIterator() + { + super(); + otherItrs = otherIndices.descendingMap().entrySet().iterator(); + nextItr(); + } + + @Override + protected void nextItr() + { + if (otherItrs.hasNext()) { + Entry e = otherItrs.next(); + current = e.getValue(); + itr = e.getValue().descendingIterator(); + first = e.getKey().longValue(); + } else { + itr = firstIndices.descendingIterator(); + current = null; + first = 0; + } + } + + @Override + public void skipAllBefore(long element) + { + while (element <= first) { + nextItr(); + } + if (element > first + SUBSET_SIZE) { + return; + } + itr.skipAllBefore((int) (element - first)); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java new file mode 100755 index 000000000000..3c1529e204be --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java @@ -0,0 +1,2052 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers.matrix; + +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Formatter; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Very similar to {@link IntSet} but for pairs of ints, that is a binary matrix + * + * @author Alessandro Colantonio + * @version $Id$ + * @see IntSet + */ +public class BinaryMatrix implements Cloneable, Comparable +{ + /** + * set of all rows + */ + private final List rows = new ArrayList(); + + /** + * {@link IntSet} instance to create empty rows + * + * @uml.property name="template" + * @uml.associationEnd + */ + private final IntSet template; + + /** + * used to cache the returned value + */ + private final int[] resultCache = new int[2]; + + /** + * Creates an empty matrix. The matrix is internally represented by putting + * rows (transactions) in sequence. The provided constructor allows to + * specify which {@link IntSet} instance must be used to internally + * represent rows. + * + * @param template {@link IntSet} instance to create empty rows + */ + public BinaryMatrix(IntSet template) + { + this.template = template; + } + + /** + * @return {@link IntSet} instance internally used to represent rows + */ + public IntSet emptyRow() + { + return template.empty(); + } + + /** + * Remove null cells at the end of {@link #rows} + */ + private void fixRows() + { + int last = rows.size() - 1; + while (last >= 0 && rows.get(last) == null) { + rows.remove(last--); + } + } + + /** + * Generates the intersection matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(BinaryMatrix) + */ + public BinaryMatrix intersection(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null || s2 == null) { + res.rows.add(null); + } else { + IntSet r = s1.intersection(s2); + if (r.isEmpty()) { + res.rows.add(null); + } else { + res.rows.add(r); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the union matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(BinaryMatrix) + */ + public BinaryMatrix union(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + if (s2 == null) { + res.rows.add(null); + } else { + res.rows.add(s2.clone()); + } + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + res.rows.add(s1.union(s2)); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + return res; + } + + /** + * Generates the difference matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(BinaryMatrix) + */ + public BinaryMatrix difference(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + res.rows.add(null); + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + IntSet r = s1.difference(s2); + res.rows.add(r.isEmpty() ? null : r); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the symmetric difference matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(int, int) + */ + public BinaryMatrix symmetricDifference(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + if (s2 == null) { + res.rows.add(null); + } else { + res.rows.add(s2.clone()); + } + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + res.rows.add(s1.symmetricDifference(s2)); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the complement matrix, namely flipping all the cells. + * + * @return the complement matrix + * + * @see BinaryMatrix#complement() + */ + public BinaryMatrix complemented() + { + BinaryMatrix res = empty(); + + final int maxCol = maxCol(); + + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + + if (s == null) { + s = template.empty(); + s.fill(0, maxCol); + } else { + s.add(maxCol + 1); + s.complemented(); + if (s.isEmpty()) { + s = null; + } + } + + res.rows.add(s); + } + + res.fixRows(); + return res; + } + + /** + * Complements the current matrix. + * + * @see BinaryMatrix#complemented() + */ + public void complement() + { + final int maxCol = maxCol(); + + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + + if (s == null) { + s = template.empty(); + s.fill(0, maxCol - 1); + rows.set(i, s); + } else { + s.add(maxCol + 1); + s.complement(); + if (s.isEmpty()) { + rows.set(i, null); + } + } + } + + fixRows(); + } + + /** + * Returns true if the specified {@link BinaryMatrix} instance + * contains any cell that is also contained within this {@link BinaryMatrix} + * instance + * + * @param other {@link BinaryMatrix} to intersect with + * + * @return a boolean indicating whether this {@link BinaryMatrix} intersects + * the specified {@link BinaryMatrix}. + */ + public boolean containsAny(BinaryMatrix other) + { + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + if (s1.containsAny(s2)) { + return true; + } + } + } + return false; + } + + /** + * Returns true if the specified {@link BinaryMatrix} instance + * contains at least minElements cells that are also contained + * within this {@link BinaryMatrix} instance + * + * @param other {@link BinaryMatrix} instance to intersect with + * @param minCells minimum number of cells to be contained within this + * {@link BinaryMatrix} instance + * + * @return a boolean indicating whether this {@link BinaryMatrix} intersects + * the specified {@link BinaryMatrix}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(BinaryMatrix other, int minCells) + { + // special cases + if (minCells < 1) { + throw new IllegalArgumentException(); + } + int size = size(); + if ((size < minCells) || other == null || other.isEmpty() || isEmpty()) { + return false; + } + if (this == other) { + return size >= minCells; + } + + // exact count before the last row + int res = 0; + final int last = Math.min(rows.size(), other.rows.size()) - 1; + for (int i = 0; i < last; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + res += s1.intersectionSize(s2); + if (res >= minCells) { + return true; + } + } + } + + // last row more efficient! + IntSet l1 = rows.get(last); + IntSet l2 = other.rows.get(last); + if (l1 == null || l2 == null) { + return false; + } + return l1.containsAtLeast(l2, minCells - res); + } + + /** + * Computes the intersection matrix size. + *

+ * This is faster than calling {@link #intersection(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(BinaryMatrix other) + { + int res = 0; + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + res += s1.intersectionSize(s2); + } + } + return res; + } + + /** + * Computes the union matrix size. + *

+ * This is faster than calling {@link #union(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(BinaryMatrix other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * Computes the symmetric difference matrix size. + *

+ * This is faster than calling {@link #symmetricDifference(BinaryMatrix)} + * and then {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(BinaryMatrix other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * Computes the difference matrix size. + *

+ * This is faster than calling {@link #difference(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(BinaryMatrix other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize() + { + final int maxCol = maxCol(); + int res = 0; + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + res += maxCol + 1; + if (s != null) { + res -= s.size(); + } + } + return res; + } + + /** + * Generates an empty matrix of the same dimension + * + * @return the empty matrix + */ + public BinaryMatrix empty() + { + return new BinaryMatrix(template); + } + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + @Override + public BinaryMatrix clone() + { + BinaryMatrix res = empty(); + for (IntSet r : rows) { + res.rows.add(r == null ? null : r.clone()); + } + return res; + } + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * @return a {@link CellIterator} instance to iterate over the matrix + */ + public CellIterator iterator() + { + if (isEmpty()) { + return new CellIterator() + { + @Override + public boolean hasNext() {return false;} + + @Override + public int[] next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new IllegalStateException();} + + @Override + public void skipAllBefore(int row, int col) {return;} + }; + } + + return new CellIterator() + { + private final int[] itrResultCache = new int[2]; + int curRow = 0; + IntSet.IntIterator curRowItr; + + { + while (rows.get(curRow) == null) { + curRow++; + } + curRowItr = rows.get(curRow).iterator(); + itrResultCache[0] = curRow; + } + + @Override + public int[] next() + { + if (!curRowItr.hasNext()) { + IntSet s; + while ((s = rows.get(++curRow)) == null) {/**/} + curRowItr = s.iterator(); + itrResultCache[0] = curRow; + } + itrResultCache[1] = curRowItr.next(); + return itrResultCache; + } + + @Override + public boolean hasNext() + { + return curRow < rows.size() - 1 || curRowItr.hasNext(); + } + + @Override + public void skipAllBefore(int row, int col) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + @Override + public void remove() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + }; + } + + /** + * @return a {@link CellIterator} instance to iterate over the matrix in + * descending order + */ + public CellIterator descendingIterator() + { + if (isEmpty()) { + return new CellIterator() + { + @Override + public boolean hasNext() {return false;} + + @Override + public int[] next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new IllegalStateException();} + + @Override + public void skipAllBefore(int row, int col) {return;} + }; + } + + return new CellIterator() + { + final int minRow; + private final int[] itrResultCache = new int[2]; + int curRow = rows.size() - 1; + IntSet.IntIterator curRowItr; + + { + int m = 0; + while (rows.get(m) == null) { + m++; + } + minRow = m; + curRowItr = rows.get(curRow).descendingIterator(); + itrResultCache[0] = curRow; + } + + @Override + public int[] next() + { + if (!curRowItr.hasNext()) { + IntSet s; + while ((s = rows.get(--curRow)) == null) {/**/} + curRowItr = s.descendingIterator(); + itrResultCache[0] = curRow; + } + itrResultCache[1] = curRowItr.next(); + return itrResultCache; + } + + @Override + public boolean hasNext() + { + return curRow > minRow || curRowItr.hasNext(); + } + + @Override + public void skipAllBefore(int row, int col) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + @Override + public void remove() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + }; + } + + /** + * Prints debug info about the given {@link BinaryMatrix} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo() + { + if (isEmpty()) { + return "empty"; + } + + StringBuilder s = new StringBuilder(); + Formatter f = new Formatter(s); + + String format = String.format("%%%dd) ", (int) Math.log10(rows.size()) + 1); + for (int i = 0; i < rows.size(); i++) { + f.format(format, i); + s.append(rows.get(i) == null ? "-" : rows.get(i).toString()); + s.append('\n'); + } + + return s.toString(); + } + + /** + * Adds to the matrix all the cells of the specified sub-matrix, both + * corners included. + * + * @param fromRow first row of the sub-matrix + * @param fromCol first column of the sub-matrix + * @param toRow last row of the sub-matrix + * @param toCol last column of the sub-matrix + */ + public void fill(int fromRow, int fromCol, int toRow, int toCol) + { + if (fromRow > toRow) { + throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); + } + if (fromCol > toCol) { + throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); + } + + for (int r = rows.size(); r <= toRow; r++) { + rows.add(null); + } + + for (int r = fromRow; r <= toRow; r++) { + IntSet s = rows.get(r); + if (s == null) { + rows.set(r, s = template.empty()); + } + s.fill(fromCol, toCol); + } + } + + /** + * Removes from the set all the cells of the specified sub-matrix, both + * corners included. + * + * @param fromRow first row of the sub-matrix + * @param fromCol first column of the sub-matrix + * @param toRow last row of the sub-matrix + * @param toCol last column of the sub-matrix + */ + public void clear(int fromRow, int fromCol, int toRow, int toCol) + { + if (fromRow > toRow) { + throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); + } + if (fromCol > toCol) { + throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); + } + + for (int r = Math.min(toRow, rows.size() - 1); r >= fromRow; r--) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + s.clear(fromCol, toCol); + if (s.isEmpty()) { + rows.set(r, null); + } + } + fixRows(); + } + + /** + * Adds the cell if it not existing, or removes it if existing + * + * @param row row of the cell to flip + * @param col column of the cell to flip + * + * @see #symmetricDifference(BinaryMatrix) + */ + public void flip(int row, int col) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + r.flip(col); + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + } + + /** + * Gets the ith cell of the matrix. + * IMPORTANT: each call returns an array of two elements, where the + * first element is the row, while the second element is the column of the + * current cell. In order to reduce the produced heap garbage, there is only + * one array instantiated for each {@link BinaryMatrix} instance, + * whose content is overridden at each method call. + * + * @param i position of the cell in the sorted matrix + * + * @return the ith cell of the matrix, as a pair + * <row,column> + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public int[] get(int i) + { + for (int r = 0; r < rows.size(); r++) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + int ss = s.size(); + if (ss <= i) { + i -= ss; + } else { + resultCache[0] = r; + resultCache[1] = s.get(i); + return resultCache; + } + } + throw new NoSuchElementException(); + } + + /** + * Provides position of cell within the matrix. + *

+ * It returns -1 if the cell does not exist within the set. + * + * @param row row of the cell + * @param col column of the cell + * + * @return the cell position + */ + public int indexOf(int row, int col) + { + if (row >= rows.size() || rows.get(row) == null) { + return -1; + } + int res = rows.get(row).indexOf(col); + if (res == -1) { + return -1; + } + for (int r = 0; r < row; r++) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res += s.size(); + } + return res; + } + + /** + * Converts a given matrix of boolean n x m into an instance + * of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public BinaryMatrix convert(boolean[][] a) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Returns the first (lowest) cell currently in this set. IMPORTANT: + * each call returns an array of two elements, where the first element is + * the row, while the second element is the column of the current cell. In + * order to reduce the produced heap garbage, there is only one array + * instantiated for each {@link BinaryMatrix} instance, whose content is + * overridden at each method call. + * + * @return the first (lowest) cell currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int[] first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + + // find the first non-empty row + int i = 0; + IntSet s; + while ((s = rows.get(i)) == null) { + i++; + } + + // prepare the result + resultCache[0] = i; + resultCache[1] = s.first(); + return resultCache; + } + + /** + * Returns the last (highest) cell currently in this set. IMPORTANT: + * each call returns an array of two elements, where the first element is + * the row, while the second element is the column of the current cell. In + * order to reduce the produced heap garbage, there is only one array + * instantiated for each {@link BinaryMatrix} instance, whose content is + * overridden at each method call. + * + * @return the last (highest) cell currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int[] last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + resultCache[0] = rows.size() - 1; + resultCache[1] = rows.get(rows.size() - 1).last(); + return resultCache; + } + + /** + * @return the number of cells in this matrix (its cardinality) + */ + public int size() + { + int res = 0; + for (IntSet s : rows) { + if (s != null) { + res += s.size(); + } + } + return res; + } + + /** + * @return true if this matrix contains no cells + */ + public boolean isEmpty() + { + return rows.isEmpty(); + } + + /** + * Returns true if this set contains the specified cell. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix contains the specified cell + */ + public boolean contains(int row, int col) + { + return row >= 0 && col >= 0 && row < rows.size() + && rows.get(row) != null && rows.get(row).contains(col); + } + + /** + * Adds the specified cell to this matrix if it is not already present. It + * ensures that matrices never contain duplicate cells. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix did not already contain the + * specified cell + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean add(int row, int col) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + return r.add(col); + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(int row, IntSet cols) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + return r.addAll(cols); + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return false; + } + + // prepare the space + final int l = rowSet.last(); + while (l >= rows.size()) { + rows.add(null); + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + rows.set(r, template.convert(col)); + res = true; + } else { + res |= s.add(col); + } + } + return res; + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by the Cartesian product of a given set of rows and + * columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return false; + } + + // prepare the space + final int l = rowSet.last(); + while (l >= rows.size()) { + rows.add(null); + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int row = itr.next(); + IntSet cols = rows.get(row); + if (cols == null) { + IntSet newCols = template.empty(); + newCols.addAll(colSet); + rows.set(row, newCols); + res = true; + } else { + res |= cols.addAll(colSet); + } + } + return res; + } + + /** + * Removes the specified cell from this matrix if it is present. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix contained the specified cell + * + * @throws UnsupportedOperationException if the remove operation is not supported by this + * matrix + */ + public boolean remove(int row, int col) + { + if (row < 0 || col < 0 || row >= rows.size()) { + return false; + } + IntSet r = rows.get(row); + if (r == null) { + return false; + } + if (r.remove(col)) { + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + return true; + } + return false; + } + + /** + * Removes the specified cells from this matrix. The cells are represented by + * a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * removed from this matrix + */ + public boolean removeAll(int row, IntSet cols) + { + if (row < 0 || row >= rows.size()) { + return false; + } + IntSet r = rows.get(row); + if (r == null) { + return false; + } + if (r.removeAll(cols)) { + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + return true; + } + return false; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean removeAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res |= s.remove(col); + if (s.isEmpty()) { + rows.set(r, null); + } + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by the Cartesian product of a given set of rows and columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean removeAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res |= s.removeAll(colSet); + if (s.isEmpty()) { + rows.set(r, null); + } + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Retains the specified cells from this matrix. The cells are represented by + * a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * removed from this matrix + */ + public boolean retainAll(int row, IntSet cols) + { + if (isEmpty()) { + return false; + } + if (row < 0 || row >= rows.size()) { + clear(); + return true; + } + + IntSet r = rows.get(row); + if (r == null) { + clear(); + return true; + } + boolean res = false; + for (int i = 0; i < rows.size(); i++) { + if (i == row) { + continue; + } + final IntSet r1 = rows.get(i); + if (r1 != null) { + res = true; + rows.set(i, null); + } + } + res |= r.retainAll(cols); + fixRows(); + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean retainAll(IntSet rowSet, int col) + { + if (isEmpty()) { + return false; + } + if (rowSet == null || rowSet.isEmpty()) { + clear(); + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + int i = 0; + int r = itr.next(); + do { + IntSet rr = rows.get(i); + if (rr == null) { + i++; + } else if (i < r) { + rows.set(i, null); + res = true; + i++; + } else if (i > r) { + r = itr.next(); + } else { + if (!rr.contains(col)) { + rows.set(i, null); + res = true; + } else if (rr.size() > 1) { + rr.clear(); + rr.add(col); + res = true; + } + i++; + r = itr.next(); + } + } while (i < rows.size() && itr.hasNext()); + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by the Cartesian product of a given set of rows and columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean retainAll(IntSet rowSet, IntSet colSet) + { + if (isEmpty()) { + return false; + } + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + clear(); + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + int i = 0; + int r = itr.next(); + do { + IntSet rr = rows.get(i); + if (rr == null) { + i++; + } else if (i < r) { + rows.set(i, null); + res = true; + i++; + } else if (i > r) { + r = itr.next(); + } else { + res |= rr.retainAll(colSet); + if (rr.isEmpty()) { + rows.set(i, null); + } + i++; + r = itr.next(); + } + } while (i < rows.size() && itr.hasNext()); + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param other matrix to be checked for containment in this matrix + * + * @return true if this matrix contains all of the cells of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null cells + * and this matrix does not permit null cells (optional), or if + * the specified collection is null + * @see #contains(int, int) + */ + public boolean containsAll(BinaryMatrix other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty() || rows.size() < other.rows.size()) { + return false; + } + + for (int i = 0; i < other.rows.size(); i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s2 == null) { + continue; + } + if (s1 == null || !s1.containsAll(s2)) { + return false; + } + } + return true; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int i = itr.next(); + IntSet cols = rows.get(i); + if (cols == null || !cols.containsAll(colSet)) { + return false; + } + } + return true; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param row index of the row + * @param colSet indices of the columns + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(int row, IntSet colSet) + { + if (colSet == null || colSet.isEmpty()) { + return true; + } + if (isEmpty() || row < 0 || row >= rows.size()) { + return false; + } + IntSet cols = rows.get(row); + return cols != null && cols.containsAll(colSet); + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return true; + } + if (isEmpty() || col < 0) { + return false; + } + + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int i = itr.next(); + IntSet cols = rows.get(i); + if (cols == null || !cols.contains(col)) { + return false; + } + } + return true; + } + + /** + * Adds all of the cells in the specified collection to this matrix if + * they're not already present. + * + * @param other matrix containing cells to be added to this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null cells + * and this matrix does not permit null cells, or if the + * specified collection is null + * @throws IllegalArgumentException if some property of an cell of the specified collection + * prevents it from being added to this matrix + * @see #add(int, int) + */ + public boolean addAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s2 == null) { + continue; + } + if (s1 == null) { + rows.set(i, s2.clone()); + res = true; + } else { + res |= s1.addAll(s2); + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + res |= i < other.rows.size(); + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + rows.add(s == null ? null : s.clone()); + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + return res; + } + + /** + * Retains only the cells in this matrix that are contained in the specified + * collection. In other words, removes from this matrix all of its cells + * that are not contained in the specified collection. + * + * @param other matrix containing cells to be retained in this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if this matrix contains a null cell and the specified + * collection does not permit null cells (optional), or if the + * specified collection is null + * @see #remove(int, int) + */ + public boolean retainAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + continue; + } + if (s2 == null) { + rows.set(i, null); + res = true; + } else { + res |= s1.retainAll(s2); + if (s1.isEmpty()) { + rows.set(i, null); + } + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes from this matrix all of its cells that are contained in the + * specified collection. + * + * @param other matrix containing cells to be removed from this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if this matrix contains a null cell and the specified + * collection does not permit null cells (optional), or if the + * specified collection is null + * @see #remove(int, int) + * @see #contains(int, int) + */ + public boolean removeAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null || s2 == null) { + continue; + } + res |= s1.removeAll(s2); + if (s1.isEmpty()) { + rows.set(i, null); + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + if (i < rows.size()) { + return res; + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes all of the cells from this matrix. The matrix will be empty after + * this call returns. + * + * @throws UnsupportedOperationException if the clear method is not supported by this matrix + */ + public void clear() + { + rows.clear(); + } + + /** + * @return an array containing all the cells in this matrix + */ + public boolean[][] toArray() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Returns an array containing all of the cells in this matrix. + *

+ * If this matrix fits in the specified array with room to spare (i.e., the + * array has more cells than this matrix), the cell in the array immediately + * following the end of the matrix are left unchanged. + * + * @param a the array into which the cells of this matrix are to be + * stored. + * + * @return the array containing all the cells in this matrix + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this matrix does not fit in the specified array + */ + public boolean[][] toArray(boolean[][] a) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(BinaryMatrix o) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Gets a copy of the row with the given index + * + * @param row the row index + * + * @return the content of the row + */ + public IntSet getRow(int row) + { + if (row < 0) { + throw new IllegalArgumentException("negative row index: " + row); + } + if (row >= rows.size()) { + return template.empty(); + } + IntSet res = rows.get(row); + if (res == null) { + return template.empty(); + } + return res.clone(); + } + + // /** + // * Computes the power-set of the current matrix. + // *

+ // * It is a particular implementation of the algorithm Apriori (see: + // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + // * Association Rules in Large Databases, in Proceedings of the + // * 20th International Conference on Very Large Data Bases, + // * p.487-499, 1994). The returned power-set does not contain the + // * empty matrix. + // *

+ // * The sub-matrices composing the power-set are returned in a list that is + // * sorted according to the lexicographical order provided by the integer + // * matrix. + // * + // * @return the power-set + // * @see #powerSet(int, int) + // * @see #powerSetSize() + // */ + // public List powerSet(); + // + // /** + // * Computes a subset of the power-set of the current matrix, composed by + // * those sub-matrices that have cardinality between min and + // * max. + // *

+ // * It is a particular implementation of the algorithm Apriori (see: + // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + // * Association Rules in Large Databases, in Proceedings of the + // * 20th International Conference on Very Large Data Bases, + // * p.487-499, 1994). The power-set does not contains the empty + // * matrix. + // *

+ // * The sub-matrices composing the power-set are returned in a list that is + // * sorted according to the lexicographical order provided by the integer + // * matrix. + // * + // * @param min + // * minimum sub-matrix size (greater than zero) + // * @param max + // * maximum sub-matrix size + // * @return the power-set + // * @see #powerSet() + // * @see #powerSetSize(int, int) + // */ + // public List powerSet(int min, int max); + // + // /** + // * Computes the power-set size of the current matrix. + // *

+ // * The power-set does not contains the empty matrix. + // * + // * @return the power-set size + // * @see #powerSet() + // */ + // public int powerSetSize(); + // + // /** + // * Computes the power-set size of the current matrix, composed by those + // * sub-matrices that have cardinality between min and + // * max. + // *

+ // * The returned power-set does not contain the empty matrix. + // * + // * @param min + // * minimum sub-matrix size (greater than zero) + // * @param max + // * maximum sub-matrix size + // * @return the power-set size + // * @see #powerSet(int, int) + // */ + // public int powerSetSize(int min, int max); + // + // /** + // * Computes the Jaccard similarity coefficient between this matrix and the + // * given matrix. + // *

+ // * The coefficient is defined as + // * |A intersection B| / |A union B|. + // * + // * @param other + // * the other matrix + // * @return the Jaccard similarity coefficient + // * @see #jaccardDistance(BinaryMatrix) + // */ + // public double jaccardSimilarity(BinaryMatrix other); + // + // /** + // * Computes the Jaccard distance between this matrix and the given matrix. + // *

+ // * The coefficient is defined as 1 - + // * {@link #jaccardSimilarity(BinaryMatrix)}. + // * + // * @param other + // * the other matrix + // * @return the Jaccard distance + // * @see #jaccardSimilarity(BinaryMatrix) + // */ + // public double jaccardDistance(BinaryMatrix other); + // + // /** + // * Computes the weighted version of the Jaccard similarity coefficient + // * between this matrix and the given matrix. + // *

+ // * The coefficient is defined as + // * sum of min(A_i, B_i) / sum of max(A_i, B_i). + // * + // * @param other + // * the other matrix + // * @return the weighted Jaccard similarity coefficient + // * @see #weightedJaccardDistance(BinaryMatrix) + // */ + // public double weightedJaccardSimilarity(BinaryMatrix other); + // + // /** + // * Computes the weighted version of the Jaccard distance between this + // matrix + // * and the given matrix. + // *

+ // * The coefficient is defined as 1 - + // * {@link #weightedJaccardSimilarity(BinaryMatrix)}. + // * + // * @param other + // * the other matrix + // * @return the weighted Jaccard distance + // * @see #weightedJaccardSimilarity(BinaryMatrix) + // */ + // public double weightedJaccardDistance(BinaryMatrix other); + + /** + * Gets a copy of the column with the given index + * + * @param col the column index + * + * @return the content of the column + */ + public IntSet getCol(int col) + { + if (col < 0) { + throw new IllegalArgumentException("negative column index: " + col); + } + IntSet res = template.empty(); + for (int row = 0; row < rows.size(); row++) { + final IntSet r = rows.get(row); + if (r != null && r.contains(col)) { + res.add(row); + } + } + return res; + } + + /** + * Generated a transposed matrix + * + * @return the transposed matrix + */ + public BinaryMatrix transposed() + { + BinaryMatrix res = empty(); + for (int row = 0; row < rows.size(); row++) { + IntSet r = rows.get(row); + if (r == null) { + continue; + } + IntSet.IntIterator itr = r.iterator(); + while (itr.hasNext()) { + res.add(itr.next(), row); + } + } + return res; + } + + /** + * Generates an ASCII-art matrix representation + */ + @Override + public String toString() + { + StringBuilder s = new StringBuilder(); + + final int maxCol = maxCol(); + + // initial line + s.append('+'); + for (int i = 0; i <= maxCol; i++) { + s.append('-'); + } + s.append("+\n"); + + // cells + for (IntSet row : rows) { + s.append('|'); + int col = 0; + if (row != null) { + IntSet.IntIterator itr = row.iterator(); + while (itr.hasNext()) { + int c = itr.next(); + while (col++ < c) { + s.append(' '); + } + s.append('*'); + } + } + while (col++ <= maxCol) { + s.append(' '); + } + s.append("|\n"); + } + + // final line + s.append('+'); + for (int i = 0; i <= maxCol; i++) { + s.append('-'); + } + s.append("+\n"); + + return s.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof BinaryMatrix)) { + return false; + } + return rows.equals(((BinaryMatrix) obj).rows); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + for (IntSet s : rows) { + h = (h << 5) - h; + if (s != null) { + h += s.hashCode(); + } + } + return h; + } + + /** + * @return the greatest non-empty row + */ + public int maxRow() + { + return rows.size() - 1; + } + + /** + * @return the greatest non-empty column + */ + public int maxCol() + { + int res = 0; + for (IntSet row : rows) { + if (row != null) { + assert !row.isEmpty(); + res = Math.max(res, row.last()); + } + } + return res; + } + + /** + * @return the index set of non-empty rows + */ + public IntSet involvedRows() + { + IntSet res = template.empty(); + for (int i = 0; i < rows.size(); i++) { + if (rows.get(i) != null) { + res.add(i); + } + } + return res; + } + + /** + * @return the index set of non-empty columns + */ + public IntSet involvedCols() + { + IntSet res = template.empty(); + for (int i = 0; i < rows.size(); i++) { + res.addAll(rows.get(i)); + } + return res; + } + + /** + * An {@link Iterator}-like interface + */ + public interface CellIterator + { + /** + * @return true if the iterator has more cells. + */ + boolean hasNext(); + + /** + * Returns the next cell in the iteration. IMPORTANT: each + * iteration returns an array of two elements, where the first element + * is the row, while the second element is the column of the current + * cell. In order to reduce the produced heap garbage, there is only + * one array instantiated for each iterator, whose content is + * overridden at each iteration. + * + * @return the next cell in the iteration. + * + * @throws NoSuchElementException iteration has no more cells. + */ + int[] next(); + + /** + * Removes from the underlying matrix the last cell returned by the + * iterator (optional operation). This method can be called only once + * per call to next. The behavior of an iterator is unspecified + * if the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + void remove(); + + /** + * Skips all the cells before the the specified cell, so that + * {@link #next()} gives the given cell or, if it does not exist, the + * cell immediately after according to the sorting provided by this set. + *

+ * If cell is less than the next cell, it does nothing + * + * @param row row of the cell + * @param col column of the cell + */ + public void skipAllBefore(int row, int col); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java new file mode 100755 index 000000000000..dcdb34205a11 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java @@ -0,0 +1,106 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.wrappers.matrix; + +/** + * A class for representing a single transaction-item relationship. This class + * is mainly used in {@link PairSet} to iterate over the cells of a + * binary matrix. + * + * @param transaction type + * @param item type + * + * @author Alessandro Colantonio + * @version $Id: Pair.java 140 2011-02-07 21:30:29Z cocciasik $ + * @see PairSet + */ +public class Pair implements java.io.Serializable +{ + /** + * generated ID + */ + private static final long serialVersionUID = 328985131584539749L; + + /** + * the transaction + */ + public final T transaction; + + /** + * the item + */ + public final I item; + + /** + * Creates a new transaction-item pair + * + * @param transaction + * @param item + */ + public Pair(T transaction, I item) + { + this.transaction = transaction; + this.item = item; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + // 524287 * i = (i << 19) - i, where 524287 is prime. + // This hash function avoids transactions and items to overlap, + // since "item" can often stay in 32 - 19 = 13 bits. Therefore, it is + // better than multiplying by 31. + final int hi = item.hashCode(); + final int ht = transaction.hashCode(); + return (hi << 19) - hi + ht; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (obj == null) { + return false; + } + if (this == obj) { + return true; + } + if (!(obj instanceof Pair)) { + return false; + } + @SuppressWarnings("unchecked") + Pair other = (Pair) obj; + return transaction.equals(other.transaction) && item.equals(other.item); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "(" + transaction + ", " + item + ")"; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java new file mode 100755 index 000000000000..c68ffd1607c6 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java @@ -0,0 +1,448 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers.matrix; + +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; + +/** + * An class that associates a value to each pair within a {@link PairSet} instance. It is not as fast as {@link HashMap} , but requires much less memory. + * + * @param < T > transaction type + * @param < I > item type + * @param < V > type of the value to associate + * + * @author Alessandro Colantonio + * @version $Id: PairMap.java 153 2011-05-30 16:39:57Z cocciasik $ + * @see PairSet + */ +public class PairMap extends AbstractMap, V> implements Serializable, Cloneable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 4699094886888004702L; + + /** + * all existing keys + * + * @uml.property name="keys" + * @uml.associationEnd + */ + private final PairSet keys; + + /** + * values related to existing keys, according to the ordering provided by {@link #keys} + */ + private final ArrayList values; + + /** + * Creates an empty map + * + * @param keys {@link PairSet} instance internally used to store indices. If + * not empty, {@link #get(Object)} will return null + * for each existing pair if we do not also put a value. + */ + public PairMap(PairSet keys) + { + this.keys = keys; + values = new ArrayList(keys.size()); + for (int i = 0; i < keys.size(); i++) { + values.add(null); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + keys.clear(); + values.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsKey(Object key) + { + return keys.contains(key); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsValue(Object value) + { + return values.contains(value); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V get(Object key) + { + if (key == null || !(key instanceof Pair)) { + return null; + } + int index = keys.indexOf((Pair) key); + if (index < 0) { + return null; + } + return values.get(index); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V put(Pair key, V value) + { + boolean isNew = keys.add(key); + int index = keys.indexOf(key); + Object old = null; + if (isNew) { + values.add(index, value); + } else { + old = values.set(index, value); + } + return (V) old; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V remove(Object key) + { + if (key == null || !(key instanceof Pair)) { + return null; + } + int index = keys.indexOf((Pair) key); + if (index < 0) { + return null; + } + keys.remove(key); + return values.remove(index); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return keys.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public PairMap clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + PairMap cloned = new PairMap(keys.clone()); + cloned.values.clear(); + cloned.values.addAll(values); + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public Set> keySet() + { + return new AbstractSet>() + { + @Override + public boolean add(Pair e) + { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return keys.contains(o); + } + + @Override + public boolean containsAll(Collection c) + { + return keys.containsAll(c); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator> iterator() + { + return new Iterator>() + { + Iterator> itr = keys.iterator(); + + @Override + public boolean hasNext() + { + return itr.hasNext(); + } + + @Override + public Pair next() + { + return itr.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return keys.size(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Collection values() + { + return new AbstractCollection() + { + + @Override + public boolean add(V e) + { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return values.contains(o); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator iterator() + { + return new Iterator() + { + Iterator itr = values.iterator(); + + @Override + public boolean hasNext() + { + return itr.hasNext(); + } + + @Override + public V next() + { + return itr.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return values.size(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Set, V>> entrySet() + { + return new AbstractSet, V>>() + { + @Override + public boolean add(Entry, V> e) + { + V res = PairMap.this.put(e.getKey(), e.getValue()); + return res != e.getValue(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return o != null + && o instanceof Entry + && PairMap.this.containsKey(((Entry) o).getKey()) + && PairMap.this.containsValue(((Entry) o).getValue()); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator, V>> iterator() + { + return new Iterator, V>>() + { + final Iterator> keyItr = keys.iterator(); + int valueIndex = -1; + + @Override + public boolean hasNext() + { + return keyItr.hasNext(); + } + + @Override + public Entry, V> next() + { + final Pair key = keyItr.next(); + valueIndex++; + + return new Entry, V>() + { + @Override + public Pair getKey() + { + return key; + } + + @Override + public V getValue() + { + return values.get(valueIndex); + } + + @Override + public V setValue(V value) + { + return values.set(valueIndex, value); + } + + @Override + public String toString() + { + return "{" + getKey() + "=" + getValue() + "}"; + } + }; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return keys.size(); + } + }; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java new file mode 100755 index 000000000000..41cf34b507e6 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java @@ -0,0 +1,1403 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.wrappers.matrix; + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; + +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * A set of pairs internally represented by a binary matrix.

This class can be used to represent a set of transactions, where each transaction is a set of items. Rows are transactions, columns are the items involved with each transaction. + * + * @param < T > transaction type + * @param < I > item type + * + * @author Alessandro Colantonio + * @version $Id: PairSet.java 153 2011-05-30 16:39:57Z cocciasik $ + * @see Pair + * @see IntSet + */ +public class PairSet extends AbstractExtendedSet> implements Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 7902458899512666217L; + + /** + * binary matrix + * + * @uml.property name="matrix" + * @uml.associationEnd + */ + private final BinaryMatrix matrix; + + /** + * all possible transactions + * + * @uml.property name="allTransactions" + * @uml.associationEnd + */ + private final IndexedSet allTransactions; + + /** + * all possible items + * + * @uml.property name="allItems" + * @uml.associationEnd + */ + private final IndexedSet allItems; + + /** + * Initializes the set by specifying all possible transactions and items. + * + * @param matrix {@link BinaryMatrix} instance used to internally represent the matrix + * @param transactions collection of all possible transactions. The specified + * order will be preserved within when iterating over the + * {@link PairSet} instance. + * @param items collection of all possible items. The specified order + * will be preserved within each transaction {@link PairSet}. + */ + public PairSet(BinaryMatrix matrix, Collection transactions, Collection items) + { + if (transactions == null || items == null) { + throw new NullPointerException(); + } + this.matrix = matrix; + + IntSet tmp = matrix.emptyRow(); + if (transactions instanceof IndexedSet) { + allTransactions = (IndexedSet) transactions; + } else { + allTransactions = new IndexedSet(tmp.empty(), transactions).universe(); //.unmodifiable(); + } + if (items instanceof IndexedSet) { + allItems = (IndexedSet) items; + } else { + allItems = new IndexedSet(tmp.empty(), items).universe(); //.unmodifiable(); + } + } + + /** + * Initializes the set by specifying all possible transactions and items. + * + * @param matrix {@link BinaryMatrix} instance used to internally represent the + * matrix + * @param pairs arrays n x 2 of pairs of transactions (first) and items (second). + */ + public PairSet(BinaryMatrix matrix, final Object[][] pairs) + { + this(matrix, new AbstractCollection>() + { + @Override + public Iterator> iterator() + { + return new Iterator>() + { + int i = 0; + + @SuppressWarnings("unchecked") + @Override + public Pair next() {return new Pair(pairs[i][0], pairs[i++][1]);} + + @Override + public boolean hasNext() {return i < pairs.length;} + + @Override + public void remove() {throw new UnsupportedOperationException();} + }; + } + + @Override + public int size() {return pairs.length;} + }); + } + + /** + * Converts a generic collection of transaction-item pairs to a + * {@link PairSet} instance. + * + * @param matrix {@link IntSet} instance used to internally represent the set + * @param pairs collection of {@link Pair} instances + */ + public PairSet(BinaryMatrix matrix, Collection> pairs) + { + if (pairs == null) { + throw new RuntimeException("null pair set"); + } + if (pairs.isEmpty()) { + throw new RuntimeException("empty pair set"); + } + + // identify all possible transactions and items and their frequencies + final Map ts = new HashMap(); + final Map is = new HashMap(); + for (Pair p : pairs) { + Integer f; + + f = ts.get(p.transaction); + f = f == null ? 1 : f + 1; + ts.put(p.transaction, f); + + f = is.get(p.item); + f = f == null ? 1 : f + 1; + is.put(p.item, f); + } + + // sort transactions and items by descending frequencies + List> sortedPairs = new ArrayList>(pairs); + Collections.sort(sortedPairs, new Comparator>() + { + @Override + public int compare(Pair o1, Pair o2) + { + int r = ts.get(o2.transaction).compareTo(ts.get(o1.transaction)); + if (r == 0) { + r = is.get(o2.item).compareTo(is.get(o1.item)); + } + return r; + } + }); + List sortedTransactions = new ArrayList(ts.keySet()); + Collections.sort(sortedTransactions, new Comparator() + { + @Override + public int compare(T o1, T o2) + { + return ts.get(o2).compareTo(ts.get(o1)); + } + }); + List sortedItems = new ArrayList(is.keySet()); + Collections.sort(sortedItems, new Comparator() + { + @Override + public int compare(I o1, I o2) + { + return is.get(o2).compareTo(is.get(o1)); + } + }); + + // identify all transactions and items + this.matrix = matrix; + matrix.add(0, 0); + allTransactions = new IndexedSet(matrix.getRow(0), sortedTransactions).universe(); // .unmodifiable(); + allItems = new IndexedSet(matrix.getRow(0), sortedItems).universe(); // .unmodifiable(); + matrix.clear(); + + // create the matrix + for (Pair p : sortedPairs) { + add(p); + } + } + + /** + * Wraps a {@link BinaryMatrix} instance with a {@link PairSet} instance. + *

+ * NOTE: the maximum item and transaction IDs are those existing in + * the binary matrix when the wrapping take place + * + * @param b a {@link BinaryMatrix} instance to wrap + * + * @return a new {@link PairSet} instance, indexed by the given matrix + */ + public static PairSet createFromBinaryMatrix(BinaryMatrix b) + { + // TODO this is a little bit costly since PairSet will allocate an array + // and a HashMap of Integers to map elements of BinaryMatrix... + // Think about a IntegerPairSet class or to an "fake" IntegerIndexedSet + // just for this purpose. + + IntegerSet t = new IntegerSet(b.emptyRow()); + t.intSet().add(b.maxRow() + 1); + t.intSet().complement(); + + IntegerSet i = new IntegerSet(b.emptyRow()); + i.intSet().add(b.maxCol() + 1); + i.intSet().complement(); + + return new PairSet(b, t, i); + } + + /** + * maps a transaction to its index and returns -1 if not found + */ + private int transactionToIndex(T t) + { + Integer r = allTransactions.absoluteIndexOf(t); + return r == null ? -1 : r.intValue(); + } + + /** + * maps an item to its index and returns -1 if not found + */ + private int itemToIndex(I i) + { + Integer r = allItems.absoluteIndexOf(i); + return r == null ? -1 : r.intValue(); + } + + /** + * maps a pair of indices to the corresponding {@link Pair} + */ + private Pair indexToPair(int[] i) + { + return new Pair(allTransactions.absoluteGet(i[0]), allItems.absoluteGet(i[1])); + } + + /** + * A shortcut for new PairSet<T, I>(matrix, mapping) + * + * @param bm {@link BinaryMatrix} instance to link + * + * @return the new {@link PairSet} with the given {@link BinaryMatrix} + * instance and the same mapping of this + */ + private PairSet createFromIndices(BinaryMatrix bm) + { + return new PairSet(bm, allTransactions, allItems); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet clone() + { + return createFromIndices(matrix.clone()); + } + + /** + * Checks if the given collection is a instance of {@link PairSet} with + * the same index mappings + * + * @param c collection to check + * + * @return true if the given collection is a instance of + * {@link PairSet} with the same index mappings + */ + private boolean hasSameIndices(Collection c) + { + return c != null + && (c instanceof PairSet) + && (allTransactions == ((PairSet) c).allTransactions) + && (allItems == ((PairSet) c).allItems); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(Pair e) + { + return add(e.transaction, e.item); + } + + /** + * Adds a single transaction-item pair + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the set has been changed + */ + public boolean add(T transaction, I item) + { + return matrix.add(transactionToIndex(transaction), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection> c) + { + return matrix.addAll(convert(c).matrix); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean addAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return false; + } + return matrix.addAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans the given transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean addAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return false; + } + return matrix.addAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans collection of transactions + * @param item the given item + * + * @return true if the set set has been changed + */ + public boolean addAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return false; + } + return matrix.addAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + matrix.clear(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) + { + return o != null + && o instanceof Pair + && contains(((Pair) o).transaction, ((Pair) o).item); + } + + /** + * Checks if the given transaction-item pair is contained within the set + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the given transaction-item pair is contained + * within the set + */ + public boolean contains(T transaction, I item) + { + int t = transactionToIndex(transaction); + if (t < 0) { + return false; + } + int i = itemToIndex(item); + if (i < 0) { + return false; + } + return matrix.contains(t, i); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return matrix.containsAll(convert(c).matrix); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans the transaction + * @param items collection of items + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans collection of transactions + * @param item the item + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return matrix.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator> iterator() + { + return new ExtendedIterator>() + { + CellIterator itr = matrix.iterator(); + + @Override + public Pair next() {return indexToPair(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void remove() {itr.remove();} + + @Override + public void skipAllBefore(Pair element) + { + itr.skipAllBefore( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator> descendingIterator() + { + return new ExtendedIterator>() + { + CellIterator itr = matrix.descendingIterator(); + + @Override + public Pair next() {return indexToPair(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void remove() {itr.remove();} + + @Override + public void skipAllBefore(Pair element) + { + itr.skipAllBefore( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean remove(Object o) + { + return o instanceof Pair + && remove(((Pair) o).transaction, ((Pair) o).item); + } + + /** + * Removes a single transaction-item pair + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the pair set has been changed + */ + public boolean remove(T transaction, I item) + { + return matrix.remove(transactionToIndex(transaction), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return matrix.removeAll(convert(c).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + return matrix.retainAll(convert(c).matrix); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return false; + } + return matrix.removeAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans a transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return false; + } + return matrix.removeAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param item collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return false; + } + return matrix.removeAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean retainAll(Collection trans, Collection items) + { + if (isEmpty()) { + return false; + } + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + clear(); + return true; + } + return matrix.retainAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans the transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean retainAll(T trans, Collection items) + { + if (isEmpty()) { + return false; + } + if (trans == null || items == null || items.isEmpty()) { + clear(); + return true; + } + return matrix.retainAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param item the item + * + * @return true if the set set has been changed + */ + public boolean retainAll(Collection trans, I item) + { + if (isEmpty()) { + return false; + } + if (trans == null || trans.isEmpty() || item == null) { + clear(); + return true; + } + return matrix.retainAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return matrix.size(); + } + + /** + * Gets the set of all possible transactions that can be contained within + * the set + * + * @return the set of all possible transactions that can be contained within + * the set + */ + public IndexedSet allTransactions() + { + return allTransactions; + } + + /** + * Gets the set of all possible items that can be contained within each + * transaction + * + * @return the set of all possible items that can be contained within each + * transaction + */ + public IndexedSet allItems() + { + return allItems; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return matrix.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof PairSet)) { + return false; + } + final PairSet other = (PairSet) obj; + return hasSameIndices(other) && matrix.equals(other.matrix); + } + + /** + * Lists all items contained within a given transaction + * + * @param transaction the given transaction + * + * @return items contained within the given transaction + */ + public IndexedSet itemsOf(T transaction) + { + IndexedSet res = allItems.empty(); + res.indices().addAll(matrix.getRow(transactionToIndex(transaction))); + return res; + } + + /** + * Lists all transactions involved with a specified item + * + * @param item the given item + * + * @return transactions involved with a specified item + */ + public IndexedSet transactionsOf(I item) + { + IndexedSet res = allTransactions.empty(); + res.indices().addAll(matrix.getCol(itemToIndex(item))); + return res; + } + + /** + * Gets the set of transactions in {@link #allTransactions()} that contains + * at least one item + * + * @return the set of transactions in {@link #allTransactions()} that + * contains at least one item + */ + public IndexedSet involvedTransactions() + { + IndexedSet res = allTransactions.empty(); + res.indices().addAll(matrix.involvedRows()); + return res; + } + + /** + * Gets the set of items in {@link #allItems()} that are contained in at + * least one transaction + * + * @return the set of items in {@link #allItems()} that are contained in at + * least one transaction + */ + public IndexedSet involvedItems() + { + IndexedSet res = allItems.empty(); + res.indices().addAll(matrix.involvedCols()); + return res; + } + + /** + * Gets the ith element of the set + * + * @param index position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + @Override + public Pair get(int index) + { + return indexToPair(matrix.get(index)); + } + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param element element of the set + * + * @return the element position + */ + @Override + public int indexOf(Pair element) + { + return matrix.indexOf( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + StringBuilder s = new StringBuilder(); + + s.append("possible transactions: "); + s.append(allTransactions); + s.append('\n'); + s.append("possible items: "); + s.append(allItems); + s.append('\n'); + + s.append("pairs:\n"); + s.append(matrix.toString()); + s.append("info: " + matrix.debugInfo()); + + return s.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return matrix.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return matrix.collectionCompressionRatio(); + } + + /** + * Returns the set of indices. Modifications to this set are reflected to + * this {@link PairSet} instance. Trying to perform operation on + * out-of-bound indices will throw an {@link IllegalArgumentException} + * exception. + * + * @return the index set + */ + public BinaryMatrix matrix() + { + return matrix; + } + +// /** +// * Extracts a subset represented by a certain range of transactions and +// * items, according to the ordering provided by {@link #allTransactions()} +// * and {@link #allItems()}. +// * +// * @param fromTransaction +// * the first transaction of the range (if null it +// * represents the first one) +// * @param toTransaction +// * the last transaction of the range (if null it +// * represents the last one) +// * @param fromItem +// * the first item of the range (if null it +// * represents the first one) +// * @param toItem +// * the last item of the range (if null it represents +// * the last one) +// * @return the specified subset +// */ +// public PairSet subSet(T fromTransaction, T toTransaction, I fromItem, I toItem) { +// BinaryMatrix mask = matrix.empty(); +// mask.fill( +// transactionToIndex(fromTransaction), +// itemToIndex(fromItem), +// transactionToIndex(toTransaction), +// itemToIndex(toItem)); +// return new PairSet(matrix.intersection(mask), allTransactions, allItems); +// } +// +// /** +// * Extracts a subset represented by a collection of transactions and items +// * +// * @param involvedTransactions +// * involved transactions (if null, it represents all +// * transactions in {@link #allTransactions()}) +// * @param involvedItems +// * involved items (if null, it represents all items +// * in {@link #allItems()}) +// * @return all the transaction-item pairs that represent the specified +// * subset +// */ +// public PairSet subSet(Collection involvedTransactions, Collection involvedItems) { +// BinaryMatrix mask = matrix.empty(); +// mask.addAll( +// allTransactions.convert(involvedTransactions).indices(), +// allItems.convert(involvedItems).indices()); +// return new PairSet(matrix.intersection(mask), allTransactions, allItems); +// } + + /** + * {@inheritDoc} + */ + @Override + public PairSet empty() + { + return createFromIndices(matrix.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + matrix.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator> comparator() + { + return new Comparator>() + { + @Override + public int compare(Pair o1, Pair o2) + { + int t1 = transactionToIndex(o1.transaction); + int t2 = transactionToIndex(o2.transaction); + int r = t1 < t2 ? -1 : (t1 == t2 ? 0 : 1); + if (r == 0) { + int i1 = itemToIndex(o1.item); + int i2 = itemToIndex(o2.item); + r = i1 < i2 ? -1 : (i1 == i2 ? 0 : 1); + } + return r; + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet convert(Collection c) + { + if (c == null) { + return empty(); + } + + // useless to convert... + if (hasSameIndices(c)) { + return (PairSet) c; + } + + // convert + PairSet res = empty(); + for (Pair p : (Collection>) c) { + res.matrix.add(transactionToIndex(p.transaction), itemToIndex(p.item)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet convert(Object... e) + { + return (PairSet) super.convert(e); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(Pair from, Pair to) + { + matrix.clear( + transactionToIndex(from.transaction), + itemToIndex(from.item), + transactionToIndex(to.transaction), + itemToIndex(to.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return matrix.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet complemented() + { + return createFromIndices(matrix.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet difference(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.difference(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection> other) + { + return other == null || matrix.containsAny(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection> other, int minElements) + { + return other != null && !other.isEmpty() && matrix.containsAtLeast(convert(other).matrix, minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.differenceSize(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(Pair from, Pair to) + { + matrix.fill( + transactionToIndex(from.transaction), + itemToIndex(from.item), + transactionToIndex(to.transaction), + itemToIndex(to.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(Pair e) + { + matrix.flip( + transactionToIndex(e.transaction), + itemToIndex(e.item) + ); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet subSet(Pair fromElement, Pair toElement) + { + return (PairSet) super.subSet(fromElement, toElement); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet headSet(Pair toElement) + { + return (PairSet) super.headSet(toElement); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet tailSet(Pair fromElement) + { + return (PairSet) super.tailSet(fromElement); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet intersection(Collection> c) + { + return c == null ? empty() : createFromIndices(matrix.intersection(convert(c).matrix)); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public List> powerSet() + { + return (List>) super.powerSet(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public List> powerSet(int min, int max) + { + return (List>) super.powerSet(min, max); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet symmetricDifference(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.symmetricDifference(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.symmetricDifferenceSize(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet union(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.union(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.unionSize(convert(other).matrix); + } + +// /** +// * {@inheritDoc} +// */ +// @Override +// public PairSet unmodifiable() { +// return new PairSet(allTransactions, allItems, maxTransactionCount, maxItemCount, indices.unmodifiable()); +// } + + /** + * {@inheritDoc} + */ + @Override + public Pair first() + { + return indexToPair(matrix.first()); + } + + /** + * {@inheritDoc} + */ + @Override + public Pair last() + { + return indexToPair(matrix.last()); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet> o) + { + return matrix.compareTo(convert(o).matrix); + } + + /** + * @return a transposed {@link PairSet} instance + */ + public PairSet transposed() + { + return new PairSet(matrix.transposed(), allItems, allTransactions); + } + + /** + * Creates a new {@link PairSet} instance with the union of all possible + * transactions and items as result for {@link #allTransactions()} and + * {@link #allItems()}, respectively, and the union of pairs. + * + * @param other the other {@link PairSet} instance to merge + * + * @return the merged {@link PairSet} instance + */ + public PairSet merged(PairSet other) + { + if (other == null) { + return clone(); + } + + // compute the new universe + Set newAllTransactions = new LinkedHashSet(allTransactions); + Set newAllItems = new LinkedHashSet(allItems); + newAllTransactions.addAll(other.allTransactions); + newAllItems.addAll(other.allItems); + + // compute the union of pairs + PairSet res = new PairSet( + matrix.clone(), + newAllTransactions, + newAllItems + ); + if (!other.isEmpty()) { + res.addAll(other); + } + return res; + } + + /** + * Creates a new {@link PairSet} instance with only non-empty transactions + * and items. + * + * @return the compacted {@link PairSet} instance + */ + public PairSet compacted() + { + // trivial case + if (isEmpty()) { + return empty(); + } + + // compute the new universe + final Set newAllTransactions = new LinkedHashSet(involvedTransactions()); + final Set newAllItems = new LinkedHashSet(involvedItems()); + if (newAllTransactions.size() == allTransactions.size() + && newAllItems.size() == allItems.size()) { + return clone(); + } + + // compute the union of pairs + PairSet res = new PairSet( + matrix.empty(), + newAllTransactions, + newAllItems + ); + res.addAll(this); + return res; + } + + +// // +// // COMPRESSED OBJECT SERIALIZATION +// // +// +// private static class ZipObjectOutputStream extends ObjectOutputStream { +// private GZIPOutputStream out; +// ZipObjectOutputStream(ObjectOutputStream out) throws IOException {this(new GZIPOutputStream(out));} +// ZipObjectOutputStream(GZIPOutputStream out) throws IOException {super(out); this.out = out;} +// @Override public void close() throws IOException {out.flush(); out.finish();} +// } +// +// private static class ZipObjectInputStream extends ObjectInputStream { +// ZipObjectInputStream(ObjectInputStream in) throws IOException {super(new GZIPInputStream(in));} +// } +// +// private void writeObject(ObjectOutputStream out) throws IOException { +// if (out instanceof ZipObjectOutputStream) { +// out.defaultWriteObject(); +// } else { +// ObjectOutputStream oos = new ZipObjectOutputStream(out); +// oos.writeObject(this); +// oos.close(); +// } +// } +// +// private transient Object serialize; +// +// @SuppressWarnings("unused") +// private Object readResolve() throws ObjectStreamException { +// if (serialize == null) +// serialize = this; +// return serialize; +// } +// +// private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { +// if (in instanceof ZipObjectInputStream) { +// in.defaultReadObject(); +// } else { +// ObjectInputStream ois = new ZipObjectInputStream(in); +// serialize = ois.readObject(); +// } +// } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/Debug.java b/extendedset/src/test/java/io/druid/extendedset/Debug.java new file mode 100755 index 000000000000..7344af17e940 --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/Debug.java @@ -0,0 +1,1858 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset; + +import io.druid.extendedset.ExtendedSet.ExtendedIterator; +import io.druid.extendedset.intset.AbstractIntSet; +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.FastSet; +import io.druid.extendedset.intset.HashIntSet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.utilities.IntSetStatistics; +import io.druid.extendedset.utilities.random.MersenneTwister; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +//import it.uniroma3.mat.extendedset.intset.Concise2Set; + + +/** + * Test class for {@link ConciseSet}, {@link FastSet}, and {@link IndexedSet}. + * + * @author Alessandro Colantonio + * @version $Id: Debug.java 155 2011-05-30 22:27:00Z cocciasik $ + */ +public class Debug +{ + /** + * Checks if a {@link ExtendedSet} instance and a {@link TreeSet} instance + * contains the same elements. {@link TreeSet} is used because it is the + * most similar class to {@link ExtendedSet}. + * + * @param type of elements within the set + * @param bits bit-set to check + * @param items {@link TreeSet} instance that must contain the same elements + * of the bit-set + * + * @return true if the given {@link ConciseSet} and + * {@link TreeSet} are equals in terms of contained elements + */ + private static boolean checkContent(ExtendedSet bits, SortedSet items) + { + if (bits.size() != items.size()) { + return false; + } + if (bits.isEmpty() && items.isEmpty()) { + return true; + } + for (T i : bits) { + if (!items.contains(i)) { + return false; + } + } + for (T i : items) { + if (!bits.contains(i)) { + return false; + } + } + if (!bits.last().equals(items.last())) { + return false; + } + if (!bits.first().equals(items.first())) { + return false; + } + return true; + } + + /** + * Generates an empty set of the specified class + * + * @param c the given class + * + * @return the empty set + */ + private static > X empty(Class c) + { + try { + return c.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Stress test for {@link ConciseSet#add(Integer)} + *

+ * It starts from a very sparse set (most of the words will be 0's + * sequences) and progressively become very dense (words first + * become 0's sequences with 1 set bit and there will be almost one + * word per item, then words become literals, and finally they + * become 1's sequences and drastically reduce in number) + */ + private static void testForAdditionStress(Class> c) + { + ExtendedSet previousBits = empty(c); + ExtendedSet currentBits = empty(c); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // add 100000 random numbers + for (int i = 0; i < 100000; i++) { + // random number to add + int item = rnd.nextInt(10000 + 1); + + // keep the previous results + previousBits = currentBits; + currentBits = currentBits.clone(); + + // add the element + System.out.format("Adding %d...\n", item); + boolean itemExistsBefore = currentItems.contains(item); + boolean itemAdded = currentItems.add(item); + boolean itemExistsAfter = currentItems.contains(item); + boolean bitExistsBefore = currentBits.contains(item); + boolean bitAdded = currentBits.add(item); + boolean bitExistsAfter = currentBits.contains(item); + if (itemAdded ^ bitAdded) { + System.out.println("wrong add() result"); + return; + } + if (itemExistsBefore ^ bitExistsBefore) { + System.out.println("wrong contains() before"); + return; + } + if (itemExistsAfter ^ bitExistsAfter) { + System.out.println("wrong contains() after"); + return; + } + + // check the list of elements + if (!checkContent(currentBits, currentItems)) { + System.out.println("add() error"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("\tcorrect: " + currentItems.toString()); + System.out.println("\twrong: " + currentBits.toString()); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + ExtendedSet otherBits = previousBits.convert(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation error"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the union size + ExtendedSet singleBitSet = empty(c); + singleBitSet.add(item); + if (currentItems.size() != currentBits.unionSize(singleBitSet)) { + System.out.println("Size error"); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println("Final"); + System.out.println(currentBits.debugInfo()); + + System.out.println(); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Stress test for {@link ConciseSet#remove(Object)} + *

+ * It starts from a very dense set (most of the words will be 1's + * sequences) and progressively become very sparse (words first + * become 1's sequences with 1 unset bit and there will be few + * words per item, then words become literals, and finally they + * become 0's sequences and drastically reduce in number) + * + * @param c class to test + */ + private static void testForRemovalStress(Class> c) + { + ExtendedSet previousBits = empty(c); + ExtendedSet currentBits = empty(c); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // create a 1-filled bitset + currentBits.add((1 << MatrixIntSet.COL_POW) * 5 - 1); + currentBits.complement(); + currentItems.addAll(currentBits); + if (currentItems.size() != (1 << MatrixIntSet.COL_POW) * 5 - 1) { + System.out.println("Unexpected error!"); + System.out.println(currentBits.size()); + System.out.println(currentItems.size()); + return; + } + + // remove 100000 random numbers + for (int i = 0; i < 100000 & !currentBits.isEmpty(); i++) { + // random number to remove + int item = rnd.nextInt(10000 + 1); + + // keep the previous results + previousBits = currentBits; + currentBits = currentBits.clone(); + + // remove the element + System.out.format("Removing %d...\n", item); + boolean itemExistsBefore = currentItems.contains(item); + boolean itemRemoved = currentItems.remove(item); + boolean itemExistsAfter = currentItems.contains(item); + boolean bitExistsBefore = currentBits.contains(item); + boolean bitRemoved = currentBits.remove(item); + boolean bitExistsAfter = currentBits.contains(item); + if (itemRemoved ^ bitRemoved) { + System.out.println("wrong remove() result"); + return; + } + if (itemExistsBefore ^ bitExistsBefore) { + System.out.println("wrong contains() before"); + return; + } + if (itemExistsAfter ^ bitExistsAfter) { + System.out.println("wrong contains() after"); + return; + } + + // check the list of elements + if (!checkContent(currentBits, currentItems)) { + System.out.println("remove() error"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + + // check the representation + ExtendedSet otherBits = empty(c); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation error"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + + // check the union size + ExtendedSet singleBitSet = empty(c); + singleBitSet.add(item); + if (currentItems.size() != currentBits.differenceSize(singleBitSet)) { + System.out.println("Size error"); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + } + + System.out.println("Final"); + System.out.println(currentBits.debugInfo()); + + System.out.println(); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Random operations on random sets. + *

+ * It randomly chooses among {@link ConciseSet#addAll(Collection)}, + * {@link ConciseSet#removeAll(Collection)}, and + * {@link ConciseSet#retainAll(Collection)}, and perform the operation over + * random sets + * + * @param c class to test + */ + private static void testForRandomOperationsStress(Class> c, boolean testFillAndClear) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + SortedSet itemsLeft = new TreeSet(); + SortedSet itemsRight = new TreeSet(); + + Random r = new MersenneTwister(); + final int maxCardinality = 1000; + + // random operation loop + for (int i = 0; i < 1000000; i++) { + System.out.format("Test %,d (%,d): ", i, System.currentTimeMillis()); + + RandomNumbers rn; + switch (r.nextInt(3)) { + case 0: + rn = new RandomNumbers.Uniform( + r.nextInt(maxCardinality), + r.nextDouble() * 0.999, + r.nextInt(maxCardinality / 10) + ); + break; + case 1: + rn = new RandomNumbers.Zipfian( + r.nextInt(maxCardinality), + r.nextDouble() * 0.9, + r.nextInt(maxCardinality / 10), + 2 + ); + break; + case 2: + rn = new RandomNumbers.Markovian( + r.nextInt(maxCardinality), + r.nextDouble() * 0.999, + r.nextInt(maxCardinality / 10) + ); + break; + default: + throw new RuntimeException("unexpected"); + } + + /* + * fill() and clear() + */ + if (testFillAndClear) { + bitsRight.clear(); + itemsRight.clear(); + Iterator itr1 = rn.generate().iterator(); + Iterator itr2 = rn.generate().iterator(); + while (itr1.hasNext() && itr2.hasNext()) { + ExtendedSet clone = bitsRight.clone(); + Integer from = itr1.next(); + Integer to = itr2.next(); + if (from.compareTo(to) > 0) { + Integer s = from; + from = to; + to = s; + } + + boolean fill = r.nextBoolean(); + if (fill) { + for (int j = from; j <= to; j++) { + itemsRight.add(j); + } + bitsRight.fill(from, to); + } else { + for (int j = from; j <= to; j++) { + itemsRight.remove(j); + } + bitsRight.clear(from, to); + } + + if (!checkContent(bitsLeft, itemsLeft)) { + System.out.println("FILL/CLEAR ERROR!"); + System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); + System.out.println("itemsLeft:"); + System.out.println(itemsLeft); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + + System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); + for (Integer x : bitsLeft) { + if (!itemsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + for (Integer x : itemsLeft) { + if (!bitsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); + System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); + + return; + } + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("FILL/CLEAR FORMAT ERROR!"); + System.out.println("fill: " + fill); + System.out.println("from " + from + " to " + to); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + } + + + /* + * contains(), add(), and remove() + */ + bitsRight.clear(); + itemsRight.clear(); + for (Integer e : rn.generate()) { + if (itemsRight.contains(e) ^ bitsRight.contains(e)) { + System.out.println("CONTAINS ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + return; + } + ExtendedSet clone = bitsRight.clone(); + boolean resItems = itemsRight.add(e); + boolean resBits = bitsRight.add(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("ADD ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + if (resItems != resBits) { + System.out.println("ADD BOOLEAN ERROR!"); + System.out.println("itemsRight.add(" + e + "): " + resItems); + System.out.println("bitsRight.add(" + e + "): " + resBits); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + return; + } + } + for (Integer e : rn.generate()) { + ExtendedSet clone = bitsRight.clone(); + boolean resItems = itemsRight.remove(e); + boolean resBits = bitsRight.remove(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("REMOVE ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + if (resItems != resBits) { + System.out.println("REMOVE BOOLEAN ERROR!"); + System.out.println("itemsRight.remove(" + e + "): " + resItems); + System.out.println("bitsRight.remove(" + e + "): " + resBits); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + for (Integer e : rn.generate()) { + ExtendedSet clone = bitsRight.clone(); + if (!itemsRight.remove(e)) { + itemsRight.add(e); + } + bitsRight.flip(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("FLIP ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + + // new right operand + itemsRight = rn.generate(); + bitsRight.clear(); + bitsRight.addAll(itemsRight); + + /* + * check for content correctness, first(), and last() + */ + if (!checkContent(bitsRight, itemsRight)) { + System.out.println("RIGHT OPERAND ERROR!"); + System.out.println("Same elements: " + (itemsRight.toString().equals(bitsRight.toString()))); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + + System.out.println("itemsRight.size(): " + itemsRight.size() + " ?= bitsRight.size(): " + bitsRight.size()); + for (Integer x : bitsRight) { + if (!itemsRight.contains(x)) { + System.out.println("itemsRight does not contain " + x); + } + } + for (Integer x : itemsRight) { + if (!bitsRight.contains(x)) { + System.out.println("itemsRight does not contain " + x); + } + } + System.out.println("bitsRight.last(): " + bitsRight.last() + " ?= itemsRight.last(): " + itemsRight.last()); + System.out.println("bitsRight.first(): " + bitsRight.first() + " ?= itemsRight.first(): " + itemsRight.first()); + + return; + } + + /* + * containsAll() + */ + boolean bitsRes = bitsLeft.containsAll(bitsRight); + boolean itemsRes = itemsLeft.containsAll(itemsRight); + if (bitsRes != itemsRes) { + System.out.println("CONTAINS_ALL ERROR!"); + System.out.println("bitsLeft.containsAll(bitsRight): " + bitsRes); + System.out.println("itemsLeft.containsAll(itemsRight): " + itemsRes); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * containsAny() + */ + bitsRes = bitsLeft.containsAny(bitsRight); + itemsRes = true; + for (Integer x : itemsRight) { + itemsRes = itemsLeft.contains(x); + if (itemsRes) { + break; + } + } + if (bitsRes != itemsRes) { + System.out.println("bitsLeft.containsAny(bitsRight): " + bitsRes); + System.out.println("itemsLeft.containsAny(itemsRight): " + itemsRes); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * containsAtLeast() + */ + int l = 1 + r.nextInt(bitsRight.size() + 1); + bitsRes = bitsLeft.containsAtLeast(bitsRight, l); + int itemsResCnt = 0; + for (Integer x : itemsRight) { + if (itemsLeft.contains(x)) { + itemsResCnt++; + } + if (itemsResCnt >= l) { + break; + } + } + if (bitsRes != (itemsResCnt >= l)) { + System.out.println("bitsLeft.containsAtLeast(bitsRight, " + l + "): " + bitsRes); + System.out.println("itemsLeft.containsAtLeast(itemsRight, " + l + "): " + (itemsResCnt >= l)); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * Perform a random operation with the previous set: + * addAll() and unionSize() + * removeAll() and differenceSize() + * retainAll() and intersectionSize() + * symmetricDifference() and symmetricDifferenceSize() + * complement() and complementSize() + */ + ExtendedSet alternative = null; + int operationSize = 0; + boolean resItems = true, resBits = true; + switch (1 + r.nextInt(5)) { + case 1: + System.out.format(" union of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.unionSize(bitsRight); + resItems = itemsLeft.addAll(itemsRight); + alternative = bitsLeft.union(bitsRight); + resBits = bitsLeft.addAll(bitsRight); + break; + + case 2: + System.out.format(" difference of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.differenceSize(bitsRight); + resItems = itemsLeft.removeAll(itemsRight); + alternative = bitsLeft.difference(bitsRight); + resBits = bitsLeft.removeAll(bitsRight); + break; + + case 3: + System.out.format(" intersection of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.intersectionSize(bitsRight); + resItems = itemsLeft.retainAll(itemsRight); + alternative = bitsLeft.intersection(bitsRight); + resBits = bitsLeft.retainAll(bitsRight); + break; + + case 4: + System.out.format( + " symmetric difference of %d elements with %d elements... ", + itemsLeft.size(), + itemsRight.size() + ); + System.out.flush(); + operationSize = bitsLeft.symmetricDifferenceSize(bitsRight); + TreeSet temp = new TreeSet(itemsRight); + temp.removeAll(itemsLeft); + itemsLeft.removeAll(itemsRight); + itemsLeft.addAll(temp); + bitsLeft = bitsLeft.symmetricDifference(bitsRight); + alternative = bitsLeft; + break; + + case 5: + System.out.format(" complement of %d elements... ", itemsLeft.size()); + System.out.flush(); + operationSize = bitsLeft.complementSize(); + if (!itemsLeft.isEmpty()) { + if ((bitsLeft instanceof IntegerSet) && (((IntegerSet) bitsLeft).intSet() instanceof MatrixIntSet)) { + BinaryMatrix m = ((MatrixIntSet) ((IntegerSet) bitsLeft).intSet()).matrix; + int x = m.maxCol(); + for (int rx = m.maxRow(); rx >= 0; rx--) { + for (int cx = x; cx >= 0; cx--) { + if (!itemsLeft.add(MatrixIntSet.toInt(rx, cx))) { + itemsLeft.remove(MatrixIntSet.toInt(rx, cx)); + } + } + } + } else { + for (int j = itemsLeft.last(); j >= 0; j--) { + if (!itemsLeft.add(j)) { + itemsLeft.remove(j); + } + } + } + } + bitsLeft.complement(); + alternative = bitsLeft; + break; + default: + throw new RuntimeException("Unexpected error!"); + } + + // check the list of elements + if (!checkContent(bitsLeft, itemsLeft)) { + System.out.println("OPERATION ERROR!"); + System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); + System.out.println("itemsLeft:"); + System.out.println(itemsLeft); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + + System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); + for (Integer x : bitsLeft) { + if (!itemsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + for (Integer x : itemsLeft) { + if (!bitsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); + System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); + + return; + } + + // check the size + if (itemsLeft.size() != operationSize) { + System.out.println("OPERATION SIZE ERROR"); + System.out.println("Wrong size: " + operationSize); + System.out.println("Correct size: " + itemsLeft.size()); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check the boolean result + if (resItems != resBits) { + System.out.println("OPERATION BOOLEAN ERROR!"); + System.out.println("resItems: " + resItems); + System.out.println("resBits: " + resBits); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check the internal representation of the result + ExtendedSet x = bitsLeft.empty(); + x.addAll(itemsLeft); + if (x.hashCode() != bitsLeft.hashCode()) { + System.out.println("Internal representation error!"); + System.out.println("FROM APPEND:"); + System.out.println(x.debugInfo()); + System.out.println("FROM OPERATION:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check similar results + if (!bitsLeft.equals(alternative)) { + System.out.println("ALTERNATIVE OPERATION ERROR!"); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("alternative:"); + System.out.println(alternative.debugInfo()); + return; + } + + System.out.println("done."); + } + } + + /** + * Stress test (addition) for {@link #subSet(Integer, Integer)} + */ + private static void testForSubSetAdditionStress() + { + IntegerSet previousBits = new IntegerSet(new ConciseSet()); + IntegerSet currentBits = new IntegerSet(new ConciseSet()); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + for (int j = 0; j < 100000; j++) { + // keep the previous result + previousBits = currentBits; + currentBits = currentBits.clone(); + + // generate a new subview + int min = rnd.nextInt(10000); + int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); + int item = min + rnd.nextInt((max - 1) - min + 1); + System.out.println("Adding " + item + " to the subview from " + min + " to " + max + " - 1"); + SortedSet subBits = currentBits.subSet(min, max); + SortedSet subItems = currentItems.subSet(min, max); + boolean subBitsResult = subBits.add(item); + boolean subItemsResult = subItems.add(item); + + if (subBitsResult != subItemsResult + || subBits.size() != subItems.size() + || !subBits.toString().equals(subItems.toString())) { + System.out.println("Subset error!"); + return; + } + + if (!checkContent(currentBits, currentItems)) { + System.out.println("Subview not correct!"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println(currentBits.debugInfo()); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Stress test (addition) for {@link ConciseSet#subSet(Integer, Integer)} + */ + private static void testForSubSetRemovalStress() + { + IntegerSet previousBits = new IntegerSet(new ConciseSet()); + IntegerSet currentBits = new IntegerSet(new ConciseSet()); + TreeSet currentItems = new TreeSet(); + + // create a 1-filled bitset + currentBits.add(10001); + currentBits.complement(); + currentItems.addAll(currentBits); + if (currentItems.size() != 10001) { + System.out.println("Unexpected error!"); + return; + } + + Random rnd = new MersenneTwister(); + + for (int j = 0; j < 100000; j++) { + // keep the previous result + previousBits = currentBits; + currentBits = currentBits.clone(); + + // generate a new subview + int min = rnd.nextInt(10000); + int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); + int item = rnd.nextInt(10000 + 1); + System.out.println("Removing " + item + " from the subview from " + min + " to " + max + " - 1"); + SortedSet subBits = currentBits.subSet(min, max); + SortedSet subItems = currentItems.subSet(min, max); + boolean subBitsResult = subBits.remove(item); + boolean subItemsResult = subItems.remove(item); + + if (subBitsResult != subItemsResult + || subBits.size() != subItems.size() + || !subBits.toString().equals(subItems.toString())) { + System.out.println("Subset error!"); + return; + } + + if (!checkContent(currentBits, currentItems)) { + System.out.println("Subview not correct!"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println(currentBits.debugInfo()); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Random operations on random sub sets. + *

+ * It randomly chooses among all operations and performs the operation over + * random sets + */ + private static void testForSubSetRandomOperationsStress() + { + IntegerSet bits = new IntegerSet(new ConciseSet()); + IntegerSet bitsPrevious = new IntegerSet(new ConciseSet()); + TreeSet items = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // random operation loop + for (int i = 0; i < 100000; i++) { + System.out.print("Test " + i + ": "); + + // new set + bitsPrevious = bits.clone(); + if (!bitsPrevious.toString().equals(bits.toString())) { + throw new RuntimeException("clone() error!"); + } + bits.clear(); + items.clear(); + final int size = 1 + rnd.nextInt(10000); + final int min = 1 + rnd.nextInt(10000 - 1); + final int max = min + rnd.nextInt(10000 - min + 1); + final int minSub = 1 + rnd.nextInt(10000 - 1); + final int maxSub = minSub + rnd.nextInt(10000 - minSub + 1); + for (int j = 0; j < size; j++) { + int item = min + rnd.nextInt(max - min + 1); + bits.add(item); + items.add(item); + } + + // perform base checks + SortedSet bitsSubSet = bits.subSet(minSub, maxSub); + SortedSet itemsSubSet = items.subSet(minSub, maxSub); + if (!bitsSubSet.toString().equals(itemsSubSet.toString())) { + System.out.println("toString() difference!"); + System.out.println("value: " + bitsSubSet.toString()); + System.out.println("actual: " + itemsSubSet.toString()); + return; + } + if (bitsSubSet.size() != itemsSubSet.size()) { + System.out.println("size() difference!"); + System.out.println("value: " + bitsSubSet.size()); + System.out.println("actual: " + itemsSubSet.size()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + if (!itemsSubSet.isEmpty() && (!bitsSubSet.first().equals(itemsSubSet.first()))) { + System.out.println("first() difference!"); + System.out.println("value: " + bitsSubSet.first()); + System.out.println("actual: " + itemsSubSet.first()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + if (!itemsSubSet.isEmpty() && (!bitsSubSet.last().equals(itemsSubSet.last()))) { + System.out.println("last() difference!"); + System.out.println("value: " + bitsSubSet.last()); + System.out.println("actual: " + itemsSubSet.last()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + + // perform the random operation + boolean resBits = false; + boolean resItems = false; + boolean exceptionBits = false; + boolean exceptionItems = false; + switch (1 + rnd.nextInt(4)) { + case 1: + System.out.format(" addAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.addAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.addAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 2: + System.out.format(" removeAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.removeAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.removeAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 3: + System.out.format(" retainAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.retainAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.retainAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 4: + System.out.format(" clear() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + bitsSubSet.clear(); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + itemsSubSet.clear(); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + } + + if (exceptionBits != exceptionItems) { + System.out.println("Incorrect exception!"); + return; + } + + if (resBits != resItems) { + System.out.println("Incorrect results!"); + System.out.println("resBits: " + resBits); + System.out.println("resItems: " + resItems); + return; + } + + if (!checkContent(bits, items)) { + System.out.println("Subview not correct!"); + System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); + System.out.println("Same elements: " + (items.toString().equals(bits.toString()))); + System.out.println("Original: " + items); + System.out.println(bits.debugInfo()); + System.out.println(bitsPrevious.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(items); + if (otherBits.hashCode() != bits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); + System.out.println(bits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(bitsPrevious.debugInfo()); + return; + } + + System.out.println("done."); + } + } + + /** + * Test the method {@link ExtendedSet#compareTo(ExtendedSet)} + * + * @param c class to test + */ + private static void testForComparatorSimple(Class> c) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + + bitsLeft.add(1); + bitsLeft.add(2); + bitsLeft.add(3); + bitsLeft.add(100); + bitsRight.add(1000000); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.add(1000000); + bitsRight.add(1); + bitsRight.add(2); + bitsRight.add(3); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.remove(100); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsRight.remove(1); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.remove(1); + bitsLeft.remove(2); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + } + + /** + * Another test for {@link ExtendedSet#compareTo(ExtendedSet)} + * + * @param c class to test + */ + private static void testForComparatorComplex(Class> c) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + + Random rnd = new MersenneTwister(); + for (int i = 0; i < 10000; i++) { + // empty numbers + BigInteger correctLeft = BigInteger.ZERO; + BigInteger correctRight = BigInteger.ZERO; + bitsLeft.clear(); + bitsRight.clear(); + + int size = 10 + rnd.nextInt(10000); + RandomNumbers rn; + if (rnd.nextBoolean()) { + rn = new RandomNumbers.Uniform(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); + } else { + rn = new RandomNumbers.Markovian(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); + } + bitsLeft.addAll(rn.generate()); + if (rnd.nextBoolean()) { + bitsRight.addAll(bitsLeft); + bitsRight.add(rnd.nextInt(size)); + } else { + bitsRight.addAll(rn.generate()); + } + for (int x : bitsLeft.descending()) { + correctLeft = correctLeft.setBit(x); + } + for (int x : bitsRight) { + correctRight = correctRight.setBit(x); + } + + // compare them! + boolean correct = bitsLeft.compareTo(bitsRight) == correctLeft.compareTo(correctRight); + System.out.println(i + ": " + correct); + if (!correct) { + System.out.println("ERROR!"); + System.out.println("bitsLeft: " + bitsLeft); + System.out.println(" " + bitsLeft.debugInfo()); + System.out.println("bitsRight: " + bitsRight); + System.out.println(" " + bitsRight.debugInfo()); + int maxLength = Math.max(correctLeft.bitLength(), correctRight.bitLength()); + System.out.format("correctLeft.toString(2): %" + maxLength + "s\n", correctLeft.toString(2)); + System.out.format("correctRight.toString(2): %" + maxLength + "s\n", correctRight.toString(2)); + System.out.println("correctLeft.compareTo(correctRight): " + correctLeft.compareTo(correctRight)); + System.out.println("bitsLeft.compareTo(bitsRight): " + bitsLeft.compareTo(bitsRight)); + + Iterator itrLeft = bitsLeft.descendingIterator(); + Iterator itrRight = bitsRight.descendingIterator(); + while (itrLeft.hasNext() && itrRight.hasNext()) { + int l = itrLeft.next(); + int r = itrRight.next(); + if (l != r) { + System.out.println("l != r --> " + l + ", " + r); + break; + } + } + return; + } + } + System.out.println("Done!"); + } + + /** + * Stress test for {@link ExtendedSet#descendingIterator()} + * + * @param c class to test + */ + private static void testForDescendingIterator(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(); + for (int i = 0; i < 100000; i++) { + int n = rnd.nextInt(10000); + System.out.print(i + ": add " + n); + bits.add(n); + + Set x = new HashSet(bits); + Set y = new HashSet(); + try { + for (Integer e : bits.descending()) { + y.add(e); + } + } + catch (Exception e) { + System.out.println("\nERROR!"); + System.out.println(e.getMessage()); + System.out.println(bits.debugInfo()); + break; + } + boolean correct = x.equals(y); + System.out.println(" --> " + correct); + if (!correct) { + System.out.println(bits.debugInfo()); + System.out.print("result: "); + for (Integer e : bits.descending()) { + System.out.print(e + ", "); + } + System.out.println(); + break; + } + } + + System.out.println("Done!"); + } + + /** + * Stress test for {@link ConciseSet#get(int)} + * + * @param c class to test + */ + private static void testForPosition(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(31); + for (int i = 0; i < 1000; i++) { + // new set + bits.clear(); + final int size = 1 + rnd.nextInt(10000); + final int min = 1 + rnd.nextInt(10000 - 1); + final int max = min + rnd.nextInt(10000 - min + 1); + for (int j = 0; j < size; j++) { + int item = min + rnd.nextInt(max - min + 1); + bits.add(item); + } + + // check correctness + String good = bits.toString(); + StringBuilder other = new StringBuilder(); + int s = bits.size(); + other.append('['); + for (int j = 0; j < s; j++) { + other.append(bits.get(j)); + if (j < s - 1) { + other.append(", "); + } + } + other.append(']'); + + if (good.equals(other.toString())) { + System.out.println(i + ") OK"); + } else { + System.out.println("ERROR"); + System.out.println(bits.debugInfo()); + System.out.println(bits); + System.out.println(other); + return; + } + + int pos = 0; + for (Integer x : bits) { + if (bits.indexOf(x) != pos) { + System.out.println("ERROR! " + pos + " != " + bits.indexOf(x) + " for element " + x); + System.out.println(bits.debugInfo()); + return; + } + pos++; + } + } + } + + /** + * Test for {@link ExtendedIterator#skipAllBefore(Object)} + * + * @param c class to test + */ + private static void testForSkip(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(31); + for (int i = 0; i < 10000; i++) { + int max = rnd.nextInt(10000); + bits = bits.convert(new RandomNumbers.Uniform( + rnd.nextInt(1000), + rnd.nextDouble() * 0.999, + rnd.nextInt(100) + ).generate()); + + for (int j = 0; j < 100; j++) { + int skip = rnd.nextInt(max + 1); + boolean reverse = rnd.nextBoolean(); + System.out.format("%d) size=%d, skip=%d, reverse=%b ---> ", (i * 100) + j + 1, bits.size(), skip, reverse); + + ExtendedIterator itr1, itr2; + if (!reverse) { + itr1 = bits.iterator(); + itr2 = bits.iterator(); + while (itr1.hasNext() && itr1.next() < skip) {/* nothing */} + } else { + itr1 = bits.descendingIterator(); + itr2 = bits.descendingIterator(); + while (itr1.hasNext() && itr1.next() > skip) {/* nothing */} + } + if (!itr1.hasNext()) { + System.out.println("Skipped!"); + continue; + } + itr2.skipAllBefore(skip); + itr2.next(); + Integer i1, i2; + if (!(i1 = itr1.next()).equals(i2 = itr2.next())) { + System.out.println("Error!"); + System.out.println("i1 = " + i1); + System.out.println("i2 = " + i2); + System.out.println(bits.debugInfo()); + return; + } + System.out.println("OK!"); + } + } + System.out.println("Done!"); + } + + /** + * Test launcher + * + * @param args ID of the test to execute + */ + public static void main(String[] args) + { + // NOTE: the most complete test is TestCase.RANDOM_OPERATION_STRESS +// TestCase testCase = TestCase.ADDITION_STRESS; +// TestCase testCase = TestCase.REMOVAL_STRESS; +// TestCase testCase = TestCase.RANDOM_OPERATION_STRESS; +// TestCase testCase = TestCase.FILL_CLEAR_STRESS; +// TestCase testCase = TestCase.SKIP; + TestCase testCase = TestCase.POSITION; +// TestCase testCase = TestCase.COMPARATOR_COMPLEX; +// TestCase testCase = TestCase.DESCENDING_ITERATOR; + +// Class> classToTest = IntegerHashSet.class; +// Class> classToTest = IntegerFastSet.class; +// Class> classToTest = IntegerConciseSet.class; +// Class> classToTest = IntegerConcise2Set.class; +// Class> classToTest = IntegerConcisePlusSet.class; +// Class> classToTest = IntegerWAHSet.class; +// Class> classToTest = ListSet.class; +// Class> classToTest = LinkedSet.class; + Class> classToTest = MatrixSet.class; + + if (args != null && args.length > 0) { + try { + testCase = TestCase.values()[Integer.parseInt(args[0])]; + } + catch (NumberFormatException ignore) { + // nothing to do + } + } + + switch (testCase) { + case ADDITION_STRESS: + testForAdditionStress(classToTest); + break; + case REMOVAL_STRESS: + testForRemovalStress(classToTest); + break; + case RANDOM_OPERATION_STRESS: + testForRandomOperationsStress(classToTest, false); + break; + case FILL_CLEAR_STRESS: + testForRandomOperationsStress(classToTest, true); + break; + case SUBSET_ADDITION_STRESS_CONCISESET: + testForSubSetAdditionStress(); + break; + case SUBSET_REMOVAL_STRESS_CONCISESET: + testForSubSetRemovalStress(); + break; + case SUBSET_RANDOM_OPERATION_STRESS_CONCISESET: + testForSubSetRandomOperationsStress(); + break; + case COMPARATOR_SIMPLE: + testForComparatorSimple(classToTest); + break; + case COMPARATOR_COMPLEX: + testForComparatorComplex(classToTest); + break; + case DESCENDING_ITERATOR: + testForDescendingIterator(classToTest); + break; + case POSITION: + testForPosition(classToTest); + break; + case SKIP: + testForSkip(classToTest); + } + } + + /** + * @author alessandrocolantonio + */ + private enum TestCase + { + /** + * @uml.property name="aDDITION_STRESS" + * @uml.associationEnd + */ + ADDITION_STRESS, + /** + * @uml.property name="rEMOVAL_STRESS" + * @uml.associationEnd + */ + REMOVAL_STRESS, + /** + * @uml.property name="rANDOM_OPERATION_STRESS" + * @uml.associationEnd + */ + RANDOM_OPERATION_STRESS, + /** + * @uml.property name="fILL_CLEAR_STRESS" + * @uml.associationEnd + */ + FILL_CLEAR_STRESS, + /** + * @uml.property name="sUBSET_ADDITION_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_ADDITION_STRESS_CONCISESET, + /** + * @uml.property name="sUBSET_REMOVAL_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_REMOVAL_STRESS_CONCISESET, + /** + * @uml.property name="sUBSET_RANDOM_OPERATION_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_RANDOM_OPERATION_STRESS_CONCISESET, + /** + * @uml.property name="cOMPARATOR_SIMPLE" + * @uml.associationEnd + */ + COMPARATOR_SIMPLE, + /** + * @uml.property name="cOMPARATOR_COMPLEX" + * @uml.associationEnd + */ + COMPARATOR_COMPLEX, + /** + * @uml.property name="dESCENDING_ITERATOR" + * @uml.associationEnd + */ + DESCENDING_ITERATOR, + /** + * @uml.property name="pOSITION" + * @uml.associationEnd + */ + POSITION, + /** + * @uml.property name="sKIP" + * @uml.associationEnd + */ + SKIP,; + } + + @SuppressWarnings("unused") + private static class ListSet extends GenericExtendedSet + { + ListSet() + { + super(ArrayList.class); + } + } + + @SuppressWarnings("unused") + private static class LinkedSet extends GenericExtendedSet + { + LinkedSet() + { + super(LinkedList.class); + } + } + + @SuppressWarnings("unused") + private static class IntegerHashSet extends IntegerSet + { + IntegerHashSet() {super(new IntSetStatistics(new HashIntSet()));} + } + + @SuppressWarnings("unused") + private static class IntegerFastSet extends IntegerSet + { + IntegerFastSet() {super(new IntSetStatistics(new FastSet()));} + } + + @SuppressWarnings("unused") + private static class IntegerConciseSet extends IntegerSet + { + IntegerConciseSet() {super(new IntSetStatistics(new ConciseSet()));} + } + + // @SuppressWarnings("unused") +// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new IntSetStatistics(new Concise2Set()));}} + @SuppressWarnings("unused") + private static class IntegerWAHSet extends IntegerSet + { + IntegerWAHSet() {super(new IntSetStatistics(new ConciseSet(true)));} + } + + @SuppressWarnings("unused") + private static class IntegerArraySet extends IntegerSet + { + IntegerArraySet() {super(new IntSetStatistics(new ArraySet()));} + } + + // @SuppressWarnings("unused") + private static class MatrixSet extends IntegerSet + { + MatrixSet() {super(new MatrixIntSet());} + } + + /** + * @author alessandrocolantonio + */ + final static class MatrixIntSet extends AbstractIntSet + { + final static int COL_POW = 10; + /** + * @uml.property name="matrix" + * @uml.associationEnd + */ + BinaryMatrix matrix = new BinaryMatrix(new FastSet()); + + final static int toInt(int row, int col) {return (row << COL_POW) + col;} + + final static int toRow(int index) {return index >>> COL_POW;} + + final static int toCol(int index) {return index & (0xFFFFFFFF >>> -COL_POW);} + + IntSet convert(BinaryMatrix m) + { + MatrixIntSet res = new MatrixIntSet(); + res.matrix = m; + return res; + } + + BinaryMatrix convert(IntSet s) + { + return ((MatrixIntSet) s).matrix; + } + + @Override + public IntSet convert(int... a) + { + MatrixIntSet res = new MatrixIntSet(); + for (int i : a) { + res.add(i); + } + return res; + } + + @Override + public IntSet convert(Collection c) + { + MatrixIntSet res = new MatrixIntSet(); + for (int i : c) { + res.add(i); + } + return res; + } + + @Override + public boolean add(int i) {return matrix.add(toRow(i), toCol(i));} + + @Override + public boolean addAll(IntSet c) {return matrix.addAll(convert(c));} + + @Override + public double bitmapCompressionRatio() {return matrix.bitmapCompressionRatio();} + + @Override + public void clear(int from, int to) {matrix.clear(toRow(from), toCol(from), toRow(to), toCol(to));} + + @Override + public void clear() {matrix.clear();} + + @Override + public double collectionCompressionRatio() {return matrix.collectionCompressionRatio();} + + @Override + public void complement() {matrix.complement();} + + @Override + public int complementSize() {return matrix.complementSize();} + + @Override + public IntSet complemented() {return convert(matrix.complemented());} + + @Override + public boolean contains(int i) {return matrix.contains(toRow(i), toCol(i));} + + @Override + public boolean containsAll(IntSet c) {return matrix.containsAll(convert(c));} + + @Override + public boolean containsAny(IntSet other) {return matrix.containsAny(convert(other));} + + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + return matrix.containsAtLeast( + convert(other), + minElements + ); + } + + @Override + public IntSet difference(IntSet other) {return convert(matrix.difference(convert(other)));} + + @Override + public int differenceSize(IntSet other) {return matrix.differenceSize(convert(other));} + + @Override + public IntSet empty() {return new MatrixIntSet();} + + @Override + public void fill(int from, int to) {matrix.fill(toRow(from), toCol(from), toRow(to), toCol(to));} + + @Override + public int first() {return toInt(matrix.first()[0], matrix.first()[1]);} + + @Override + public void flip(int e) {matrix.flip(toRow(e), toCol(e));} + + @Override + public int get(int i) {return toInt(matrix.get(i)[0], matrix.get(i)[1]);} + + @Override + public int indexOf(int e) {return matrix.indexOf(toRow(e), toCol(e));} + + @Override + public IntSet intersection(IntSet other) {return convert(matrix.intersection(convert(other)));} + + @Override + public int intersectionSize(IntSet other) {return matrix.intersectionSize(convert(other));} + + @Override + public boolean isEmpty() {return matrix.isEmpty();} + + @Override + public int last() {return toInt(matrix.last()[0], matrix.last()[1]);} + + @Override + public boolean remove(int i) {return matrix.remove(toRow(i), toCol(i));} + + @Override + public boolean removeAll(IntSet c) {return matrix.removeAll(convert(c));} + + @Override + public boolean retainAll(IntSet c) {return matrix.retainAll(convert(c));} + + @Override + public int size() {return matrix.size();} + + @Override + public IntSet symmetricDifference(IntSet other) {return convert(matrix.symmetricDifference(convert(other)));} + + @Override + public int symmetricDifferenceSize(IntSet other) {return matrix.symmetricDifferenceSize(convert(other));} + + @Override + public IntSet union(IntSet other) {return convert(matrix.union(convert(other)));} + + @Override + public int unionSize(IntSet other) {return matrix.unionSize(convert(other));} + + @Override + public int compareTo(IntSet o) {return matrix.compareTo(convert(o));} + + @Override + public double jaccardDistance(IntSet other) {return 0;} + + @Override + public double jaccardSimilarity(IntSet other) {return 0;} + + @Override + public double weightedJaccardDistance(IntSet other) {return 0;} + + @Override + public double weightedJaccardSimilarity(IntSet other) {return 0;} + + @Override + public List powerSet() {return null;} + + @Override + public List powerSet(int min, int max) {return null;} + + @Override + public int powerSetSize() {return 0;} + + @Override + public int powerSetSize(int min, int max) {return 0;} + + @Override + public IntIterator iterator() + { + return new IntIterator() + { + CellIterator itr = matrix.iterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public int next() + { + int[] c = itr.next(); + return toInt(c[0], c[1]); + } + + @Override + public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} + + @Override + public void remove() {itr.remove();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + + @Override + public IntIterator descendingIterator() + { + return new IntIterator() + { + CellIterator itr = matrix.descendingIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public int next() + { + int[] c = itr.next(); + return toInt(c[0], c[1]); + } + + @Override + public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} + + @Override + public void remove() {itr.remove();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + + @Override + public IntSet clone() + { + MatrixIntSet res = new MatrixIntSet(); + res.matrix = matrix.clone(); + return res; + } + + @Override + public int hashCode() {return matrix.hashCode();} + + @Override + public boolean equals(Object obj) {return matrix.equals(((MatrixIntSet) obj).matrix);} + + @Override + public String debugInfo() + { + return super.toString() + "\n" + matrix.debugInfo(); + } + } +} + diff --git a/extendedset/src/test/java/io/druid/extendedset/Performance.java b/extendedset/src/test/java/io/druid/extendedset/Performance.java new file mode 100755 index 000000000000..9aa99c40da17 --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/Performance.java @@ -0,0 +1,496 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.FastSet; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IntegerSet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +//import it.uniroma3.mat.extendedset.intset.Concise2Set; + +/** + * Class for performance evaluation. + * + * @author Alessandro Colantonio + * @version $Id: Performance.java 155 2011-05-30 22:27:00Z cocciasik $ + */ +public class Performance +{ + /** + * number of times to repeat each test + */ + private final static int REPETITIONS = 5; + /** + * minimum element + */ + private final static int SHIFT = 1000; + /** + * test results + */ + private final static Map, Double>> TIME_VALUES = new TreeMap, Double>>(); + /** + * time measurement, in nanoseconds + */ + private static long lastExecTime = -1; +// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new Concise2Set());}} +// private static class IntegerWAHSet extends IntegerSet {IntegerWAHSet() {super(new WAHSet());}} + + /** + * Start time measurement + */ + private static void startTimer() + { + lastExecTime = System.nanoTime(); + } + + /** + * Stop time measurement + * + * @param c class being tested + * @param name method name + * @param div division factor (elapsed time and allocated memory will be + * divided by this number) + */ + private static void endTimer(Class c, String name, long div) + { + // final time + double t = ((double) (System.nanoTime() - lastExecTime)) / div; + Map, Double> measure = TIME_VALUES.get(name); + if (measure == null) { + TIME_VALUES.put(name, measure = new HashMap, Double>()); + } + + Double old = measure.get(c); + if (old == null || old > t) { + measure.put(c, t); + } + } + + /** + * Perform the time test + * + * @param classToTest class of the {@link Collection} instance to test + * @param leftOperand collection of integers representing the left operand + * {@link Collection} + * @param rightOperand collection of integers representing the right operand + * {@link Collection} + */ + @SuppressWarnings("unchecked") + private static void testClass( + Class classToTest, + Collection leftOperand, + Collection rightOperand + ) + { + // collections used for the test cases + Collection[] cAddAndRemove = new Collection[REPETITIONS]; + Collection[] cAddAll = new Collection[REPETITIONS]; + Collection[] cRemoveAll = new Collection[REPETITIONS]; + Collection[] cRetainAll = new Collection[REPETITIONS]; + Collection[] cRighOperand = new Collection[REPETITIONS]; + IntegerSet[] cLeftOperand = new IntegerSet[REPETITIONS]; + IntegerSet[] cUnionResults = new IntegerSet[REPETITIONS]; + IntegerSet[] cDifferenceResults = new IntegerSet[REPETITIONS]; + IntegerSet[] cIntersectionResults = new IntegerSet[REPETITIONS]; + + // CREATION + for (int i = 0; i < REPETITIONS; i++) { + try { + cAddAndRemove[i] = (Collection) classToTest.newInstance(); + cAddAll[i] = (Collection) classToTest.newInstance(); + cRemoveAll[i] = (Collection) classToTest.newInstance(); + cRetainAll[i] = (Collection) classToTest.newInstance(); + cRighOperand[i] = (Collection) classToTest.newInstance(); + cLeftOperand[i] = (IntegerSet) classToTest.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + // APPEND + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cRighOperand[i].add(x); + } + for (Integer x : leftOperand) { + cAddAndRemove[i].add(x); + cLeftOperand[i].add(x); + cAddAll[i].add(x); + cRetainAll[i].add(x); + cRemoveAll[i].add(x); + } + endTimer(classToTest, "00) append()", (5 * leftOperand.size() + rightOperand.size())); + } + +// List xxx = new ArrayList(rightOperand); +// List yyy = new ArrayList(leftOperand); +// Collections.shuffle(xxx); +// Collections.shuffle(yyy); +// for (int i = 0; i < REPETITIONS; i++) { +// cRighOperand[i].clear(); +// cAddAndRemove[i].clear(); +// cLeftOperand[i].clear(); +// cAddAll[i].clear(); +// cRetainAll[i].clear(); +// cRemoveAll[i].clear(); +// } +// +// // ADDITION +// for (int i = 0; i < REPETITIONS; i++) { +// startTimer(); +// for (Integer x : xxx) +// cRighOperand[i].add(x); +// for (Integer x : yyy) { +// cAddAndRemove[i].add(x); +// cLeftOperand[i].add(x); +// cAddAll[i].add(x); +// cRetainAll[i].add(x); +// cRemoveAll[i].add(x); +// } +// endTimer(classToTest, "01) add()", (5 * leftOperand.size() + rightOperand.size())); +// } + + // REMOVAL + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cAddAndRemove[i].remove(x); + } + endTimer(classToTest, "02) remove()", rightOperand.size()); + } + + // CONTAINS + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cAddAll[i].contains(x); + } + endTimer(classToTest, "03) contains()", rightOperand.size()); + } + + // CONTAINS ALL + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cAddAll[i].containsAll(cRighOperand[i]); + endTimer(classToTest, "04) containsAll()", 1); + } + + // UNION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cAddAll[i].addAll(cRighOperand[i]); + endTimer(classToTest, "05) addAll()", 1); + } + + // DIFFERENCE + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cRemoveAll[i].removeAll(cRighOperand[i]); + endTimer(classToTest, "06) removeAll()", 1); + } + + // INTERSECTION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cRetainAll[i].retainAll(cRighOperand[i]); + endTimer(classToTest, "07) retainAll()", 1); + } + + // UNION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cUnionResults[i] = cLeftOperand[i].union(cRighOperand[i]); + endTimer(classToTest, "08) union()", 1); + } + + // DIFFERENCE + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cDifferenceResults[i] = cLeftOperand[i].difference(cRighOperand[i]); + endTimer(classToTest, "09) difference()", 1); + } + + // INTERSECTION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cIntersectionResults[i] = cLeftOperand[i].intersection(cRighOperand[i]); + endTimer(classToTest, "10) intersection()", 1); + } + } + + /** + * Summary information + */ + private static void printSummary(int cardinality, double density, Class[] classes) + { + for (Entry, Double>> e : TIME_VALUES.entrySet()) { + // method name + System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); + System.out.print(e.getKey()); + for (Class c : classes) { + Double op = e.getValue().get(c); + System.out.format("\t%12d", (op == null ? 0 : op.intValue())); + } + System.out.println(); + } + } + + /** + * TEST + * + * @param args + */ + public static void main(String[] args) + { + boolean calcMemory = false; + boolean calcTime = true; + + boolean calcUniform = true; + boolean calcMarkovian = false; + boolean calcZipfian = false; + + int minCardinality = 10000; + int maxCardinality = 10000; + + /* + * MEMORY + */ + for (int i = 0; calcMemory && i < 3; i++) { + System.out.println(); + switch (i) { + case 0: + if (!calcUniform) { + continue; + } + System.out.println("#MEMORY UNIFORM"); + break; + case 1: + if (!calcMarkovian) { + continue; + } + System.out.println("#MEMORY MARKOVIAN"); + break; + case 2: + if (!calcZipfian) { + continue; + } + System.out.println("#MEMORY ZIPFIAN"); + break; + default: + throw new RuntimeException("unexpected"); + } + System.out.println("#cardinality\tdensity\tFastSet\tConciseSet\tWAHSet\tConcise2Set"); + for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { + for (double density = .0001; density < 1D; density *= 1.7) { + System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); + + Collection integers; + switch (i) { + case 0: + integers = new RandomNumbers.Uniform(cardinality, density, SHIFT).generate(); + break; + case 1: + integers = new RandomNumbers.Markovian(cardinality, density, SHIFT).generate(); + break; + case 2: + integers = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2).generate(); + break; + default: + throw new RuntimeException("unexpected"); + } + + IntegerSet s0 = new IntegerSet(new FastSet()); + s0.addAll(integers); + System.out.format("%7d\t", (int) (s0.collectionCompressionRatio() * cardinality)); + + IntegerSet s1 = new IntegerSet(new ConciseSet()); + s1.addAll(integers); + System.out.format("%7d\t", (int) (s1.collectionCompressionRatio() * cardinality)); + + IntegerSet s2 = new IntegerSet(new WAHSet()); + s2.addAll(integers); + System.out.format("%7d\t", (int) (s2.collectionCompressionRatio() * cardinality)); + +// IntegerSet s3 = new IntegerSet(new Concise2Set()); +// s3.addAll(integers); +// System.out.format("%7d\n", (int) (s3.collectionCompressionRatio() * cardinality)); + } + } + } + + Class[] classes = new Class[]{ +// ArrayList.class, +// LinkedList.class, +// ArrayListSet.class, +// LinkedListSet.class, +// HashSet.class, +// TreeSet.class, +IntegerArraySet.class, +IntegerFastSet.class, +// IntegerHashSet.class, +// IntegerWAHSet.class, +IntegerConciseSet.class, +// IntegerConcise2Set.class, + }; + + /* + * TIME + */ + for (int i = 0; calcTime && i < 3; i++) { + System.out.println(); + switch (i) { + case 0: + if (!calcUniform) { + continue; + } + System.out.println("#TIME UNIFORM"); + break; + case 1: + if (!calcMarkovian) { + continue; + } + System.out.println("#TIME MARKOVIAN"); + break; + case 2: + if (!calcZipfian) { + continue; + } + System.out.println("#TIME ZIPFIAN"); + break; + default: + throw new RuntimeException("unexpected"); + } + System.out.print("#cardinality\tdensity\toperation"); + for (Class c : classes) { + System.out.print("\t" + c.getSimpleName()); + } + System.out.println(); + for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { + RandomNumbers r; + switch (i) { + case 0: + r = new RandomNumbers.Uniform(cardinality, 0.5, SHIFT); + break; + case 1: + r = new RandomNumbers.Markovian(cardinality, 0.5, SHIFT); + break; + case 2: + r = new RandomNumbers.Zipfian(cardinality, 0.5, SHIFT, 2); + break; + default: + throw new RuntimeException("unexpected"); + } + Collection x = r.generate(), y = r.generate(); + for (Class c : classes) { + testClass(c, x, y); + testClass(c, x, y); + } + for (double density = .0001; density < 1D; density *= 1.2) { +// for (double density = .0001; density < 1D; density *= 1.7) { +// for (double density = .0041; density < 1D; density *= 1.7) { +// for (double density = 0.8272; density > 0.00005; density /= 1.7) { + switch (i) { + case 0: + r = new RandomNumbers.Uniform(cardinality, density, SHIFT); + break; + case 1: + r = new RandomNumbers.Markovian(cardinality, density, SHIFT); + break; + case 2: + r = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2); + break; + default: + throw new RuntimeException("unexpected"); + } + x = r.generate(); + y = r.generate(); + for (Class c : classes) { + testClass(c, x, y); + } + printSummary(cardinality, density, classes); + TIME_VALUES.clear(); + } + } + } + + System.out.println("\nDone!"); + } + + /* test classes */ + private static class WAHSet extends ConciseSet + { + private static final long serialVersionUID = -5048707825606872979L; + + WAHSet() {super(true);} + } + + private static class IntegerArraySet extends IntegerSet + { + IntegerArraySet() {super(new ArraySet());} + } + + // private static class IntegerHashSet extends IntegerSet {IntegerHashSet() {super(new HashIntSet());}} + private static class IntegerFastSet extends IntegerSet + { + IntegerFastSet() {super(new FastSet());} + } + + private static class IntegerConciseSet extends IntegerSet + { + IntegerConciseSet() {super(new ConciseSet());} + } + + /** + * Class to test the sorted array + */ + @SuppressWarnings("unused") + private static class ArrayListSet extends GenericExtendedSet + { + ArrayListSet() + { + super(ArrayList.class); + } + } + + /** + * Class to test the sorted linked lists + */ + @SuppressWarnings("unused") + private static class LinkedListSet extends GenericExtendedSet + { + LinkedListSet() + { + super(LinkedList.class); + } + } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java b/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java new file mode 100755 index 000000000000..d4b85f1a43ab --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java @@ -0,0 +1,242 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + + +import io.druid.extendedset.utilities.random.MersenneTwister; + +import java.util.Collection; +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * Generation of random integer sets + * + * @author Alessandro Colantonio + * @version $Id: RandomNumbers.java 142 2011-02-15 23:12:28Z cocciasik $ + */ +public abstract class RandomNumbers +{ + /** + * pseudo-random number generator + */ + final private static Random RND = new MersenneTwister(); + + /** + * the smallest integer + */ + protected final int min; + + /** + * number of elements within the set + */ + protected final int cardinality; + + /** + * cardinality to range (i.e., {@link #max} - {@link #min} + 1) ratio + */ + protected final double density; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + private RandomNumbers(int cardinality, double density, int min) + { + // parameter check + if (cardinality < 0) { + throw new IllegalArgumentException("cardinality < 0: " + cardinality); + } + if (density < 0D) { + throw new IllegalArgumentException("density < 0: " + density); + } + if (density > 1D) { + throw new IllegalArgumentException("density > 1: " + density); + } + + this.cardinality = cardinality; + this.density = density; + this.min = min; + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + int size = 100; + System.out.println(new Uniform(size, 0.1, 0).generate()); + System.out.println(new Uniform(size, 0.9, 0).generate()); + System.out.println(new Zipfian(size, 0.1, 0, 2).generate()); + System.out.println(new Zipfian(size, 0.9, 0, 2).generate()); + System.out.println(new Markovian(size, 0.1, 0).generate()); + System.out.println(new Markovian(size, 0.9, 0).generate()); + } + + /** + * Next integer, according to the given probability distribution + * + * @return next pseudo-random integer + */ + protected abstract int next(); + + /** + * Generates the integer set of pseudo-random numbers + * + * @return the integer set + */ + public SortedSet generate() + { + SortedSet res = new TreeSet(); + while (res.size() < cardinality) { + res.add(next()); + } + return res; + } + + /** + * Integral numbers with uniform distribution. + *

+ * The maximum number will be (cardinality / density) - 1, + * while the average gap between two consecutive numbers will be + * density * cardinality. + */ + public static class Uniform extends RandomNumbers + { + /** + * the greatest integer + */ + private final int max; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + public Uniform(int cardinality, double density, int min) + { + super(cardinality, density, min); + max = min + (int) (Math.round(cardinality / density)) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + return min + RND.nextInt(max - min + 1); + } + } + + /** + * Integral numbers with Zipfian (power-law) distribution. + *

+ * The maximum number will be (cardinality / density) - 1, + * while the average gap between two consecutive numbers will be + * density * cardinality. However, integers will be + * concentrated around the minimum value. + */ + public static class Zipfian extends RandomNumbers + { + /** + * the greatest integer + */ + private final int max; + + /** + * power-law exponent + */ + private final int k; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + * @param k power-law exponent + */ + public Zipfian(int cardinality, double density, int min, int k) + { + super(cardinality, density, min); + this.k = k; + max = min + (int) (Math.round(cardinality / density)) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + return min + (int) ((max - min + 1) * Math.pow(RND.nextDouble(), k)); + } + } + + /** + * Integral numbers with Markovian distribution. The data will present + * sequences of subsequent integers followed by "gaps". In this case, + * cardinality indicates the probability of switching from a + * sequence to a gap, and vice-versa. For example, density = 0 + * means a set made up of one long sequence of numbers, while + * density = 1 means a set made up of all odd (or even) + * integers. + */ + public static class Markovian extends RandomNumbers + { + private boolean skip = false; + private int next = min; + + /** + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + public Markovian(int cardinality, double density, int min) + { + super(cardinality, density, min); + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + while (skip ^= RND.nextDouble() < density) { + next++; + } + return min + next++; + } + } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java new file mode 100755 index 000000000000..f2542c1067d2 --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java @@ -0,0 +1,1972 @@ +/* +* Copyright 2012 Metamarkets Group Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package io.druid.extendedset.intset; + +import com.google.common.collect.Lists; +import junit.framework.Assert; +import org.junit.Test; + +import java.nio.IntBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Random; +import java.util.Set; + +/** + */ +public class ImmutableConciseSetTest +{ + public static final int NO_COMPLEMENT_LENGTH = -1; + + @Test + public void testWordIteratorNext1() + { + final int[] ints = {1, 2, 3, 4, 5}; + ConciseSet set = new ConciseSet(); + for (int i : ints) { + set.add(i); + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + Assert.assertEquals(new Integer(0x8000003E), itr.next()); + + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testWordIteratorNext2() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + Assert.assertEquals(new Integer(0x40000C98), itr.next()); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Advance to middle of a fill + */ + @Test + public void testWordIteratorAdvanceTo1() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + itr.advanceTo(50); + Assert.assertEquals(new Integer(1073744998), itr.next()); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Advance past a fill directly to a new literal + */ + @Test + public void testWordIteratorAdvanceTo2() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + itr.advanceTo(3225); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitOneLit() + { + int[] words = {-1, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000001), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitPureOneFill() + { + int[] words = {-1, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitDirtyOneFill() + { + int[] words = {-1, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillOneLit() + { + int[] words = {0x40000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillPureOneFill() + { + int[] words = {0x40000004, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000009), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillDirtyOneFill() + { + int[] words = {0x40000004, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000004), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitZeroLit() + { + int[] words = {0x80000000, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000001), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitPureZeroFill() + { + int[] words = {0x80000000, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitDirtyZeroFill() + { + int[] words = {0x80000000, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000000), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillZeroLit() + { + int[] words = {0x00000004, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillPureZeroFill() + { + int[] words = {0x00000004, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000009), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillDirtyZeroFill() + { + int[] words = {0x00000004, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000004), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitZeroLit() + { + int[] words = {0x80000001, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x02000001), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleOneBitLitZeroLit() + { + int[] words = {0x80000003, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000003), itr.next()); + Assert.assertEquals(new Integer(0x80000000), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitPureZeroFill() + { + int[] words = {0x80000001, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x02000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleOneBitLitPureZeroFill() + { + int[] words = {0x80000003, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000003), itr.next()); + Assert.assertEquals(new Integer(0x00000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitDirtyZeroFill() + { + int[] words = {0x80000001, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000001), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitOneLit() + { + int[] words = {0xFFFFFFFE, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x42000001), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleZeroBitLitOneLit() + { + int[] words = {0xFFFFFFEE, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFEE), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitPureOneFill() + { + int[] words = {0xFFFFFFFE, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x42000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleZeroBitLitPureOneFill() + { + int[] words = {0xFFFFFFFC, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFC), itr.next()); + Assert.assertEquals(new Integer(0x40000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitDirtyOneFill() + { + int[] words = {0xFFFFFFFE, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactTwoLiterals() + { + int[] words = {0xFFFFFFFE, 0xFFEFFEFF}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); + Assert.assertEquals(new Integer(0xFFEFFEFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Set 1: zero literal, zero fill with flipped bit 33, literal + * Set 2: zero literal, zero fill with flipped bit 34, literal + *

+ * Testing merge + */ + @Test + public void testUnion1() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 100000}; + List expected = Arrays.asList(33, 34, 100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, zero fill with flipped bit 33, literal + * Set 2: zero literal, zero fill with flipped bit 34, literal + *

+ * Testing merge + */ + @Test + public void testUnion2() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 200000}; + List expected = Arrays.asList(33, 34, 100000, 200000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero fill, one fill + * Set 2: zero fill, one fill with flipped bit 62 + *

+ * Testing merge + */ + @Test + public void testUnion3() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 62; i < 10001; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 63; i < 10002; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 62; i < 10002; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, one fill with flipped bit 62 + * Set 2: zero literal, literal, one fill, literal + *

+ * Testing merge + */ + @Test + public void testUnion4() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 63; i < 1001; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 64; i < 1002; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 63; i < 1002; i++) { + expected.add(i); + } + + + ConciseSet blah = new ConciseSet(); + for (int i : expected) { + blah.add(i); + } + verifyUnion(expected, sets); + } + + /** + * Set 1: literal + * Set 2: zero fill, zero literal, zero fill with flipped 33 bit, zero fill with flipped 1000000 bit, literal + * Set3: literal, zero fill with flipped 34th bit, literal + *

+ * Testing merge + */ + @Test + public void testUnion5() + { + final int[] ints1 = {1, 2, 3, 4, 5}; + final int[] ints2 = {100000, 2405983, 33}; + final int[] ints3 = {0, 4, 5, 34, 333333}; + final List expected = Arrays.asList(0, 1, 2, 3, 4, 5, 33, 34, 100000, 333333, 2405983); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + ConciseSet set3 = new ConciseSet(); + for (int i : ints3) { + set3.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2), + ImmutableConciseSet.newImmutableFromMutable(set3) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: literal + * Set 2: literal + *

+ * Testing merge + */ + @Test + public void testUnion6() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 30; i++) { + if (i != 28) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 30; i++) { + if (i != 27) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 30; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, literal, one fill with flipped bit + * Set 2: zero literal, one fill with flipped bit + *

+ * Testing merge + */ + @Test + public void testUnion7() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 64; i < 1005; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 63; i < 99; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 63; i < 1005; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: One fill with flipped 27th bit + * Set 2: One fill with flipped 28th bit + *

+ * Testing creation of one fill with no flipped bits + */ + @Test + public void testUnion8() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 27) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 28) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal and one fill + * Set 2: One fill with flipped 28th bit + *

+ * Testing creation of one fill with correct flipped bit + */ + @Test + public void testUnion9() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (!(i == 27 || i == 28)) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 28) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + if (i != 28) { + expected.add(i); + } + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Multiple literals + * Set 2: Multiple literals + *

+ * Testing merge of pure sequences of literals + */ + @Test + public void testUnion10() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i += 2) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 1000; i += 2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Multiple literals + * Set 2: Zero fill and literal + *

+ * Testing skipping of zero fills + */ + @Test + public void testUnion11() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i += 2) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + set2.add(10000); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i += 2) { + expected.add(i); + } + expected.add(10000); + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal with 4 bits marked + * Set 2: Zero fill with flipped bit 5 + *

+ * Testing merge of literal and zero fill with flipped bit + */ + @Test + public void testUnion12() + { + final int[] ints1 = {1, 2, 3, 4}; + final int[] ints2 = {5, 1000}; + final List expected = Arrays.asList(1, 2, 3, 4, 5, 1000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal with bit 0 + * Set 2: One fill with flipped bit 0 + *

+ * Testing merge of literal and one fill with flipped bit + */ + @Test + public void testUnion13() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {0}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Zero fill with flipped bit 0 + * Set 2: One fill with flipped bit 0 + *

+ * Testing merge of flipped bits in zero and one fills + */ + @Test + public void testUnion14() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {0, 100}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i <= 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Zero fill with flipped bit 1 + * Set 2: Literal with 0th bit marked + * Set 3: One Fill from 1 to 100 with flipped bit 0 + *

+ * Testing merge of flipped bits in zero and one fills with a literal + */ + @Test + public void testUnion15() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {1, 100}; + final int[] ints2 = {0}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + ConciseSet set3 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set3.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2), + ImmutableConciseSet.newImmutableFromMutable(set3) + ); + + for (int i = 0; i <= 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Testing merge of offset elements + */ + @Test + public void testUnion16() + { + final int[] ints1 = {1001, 1002, 1003}; + final int[] ints2 = {1034, 1035, 1036}; + List expected = Arrays.asList(1001, 1002, 1003, 1034, 1035, 1036); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Testing merge of same elements + */ + @Test + public void testUnion17() + { + final int[] ints1 = {1, 2, 3, 4, 5}; + final int[] ints2 = {1, 2, 3, 4, 5}; + List expected = Arrays.asList(1, 2, 3, 4, 5); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + @Test + public void testUnion18() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + set2.add(1000); + set2.add(10000); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1001; i++) { + expected.add(i); + } + expected.add(10000); + + verifyUnion(expected, sets); + } + + /** + * Set 1: one fill, all ones literal + * Set 2: zero fill, one fill, literal + */ + @Test + public void testUnion19() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 93; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 62; i < 1000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: literal, one fill, literal + * Set 2: zero fill, literal that falls within the one fill above, one fill that falls in one fill above, one fill + */ + @Test + public void testUnion20() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 5; i++) { + set1.add(i); + } + for (int i = 31; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 62; i < 68; i++) { + set2.add(i); + } + for (int i = 800; i < 1000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 5; i++) { + expected.add(i); + } + for (int i = 31; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + @Test + public void testUnion21() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 32; i < 93; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 62; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 0; i < 93; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + @Test + public void testUnion22() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 93; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 32; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 0; i < 32; i++) { + expected.add(i); + } + for (int i = 93; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + private void verifyUnion(List expected, List sets) + { + List actual = Lists.newArrayList(); + ImmutableConciseSet set = ImmutableConciseSet.union(sets); + IntSet.IntIterator itr = set.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + /** + * Testing basic intersection of similar sets + */ + @Test + public void testIntersection1() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {33, 100000}; + List expected = Arrays.asList(33, 100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Set1: literal, zero fill with flip bit, literal + * Set2: literal, zero fill with different flip bit, literal + */ + @Test + public void testIntersection2() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 100000}; + List expected = Arrays.asList(100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Testing intersection of one fills + */ + @Test + public void testIntersection3() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + set2.add(i); + expected.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Similar to previous test with one bit in the sequence set to zero + */ + @Test + public void testIntersection4() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + if (i != 500) { + set2.add(i); + expected.add(i); + } + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Testing with disjoint sets + */ + @Test + public void testIntersection5() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 200000}; + List expected = Lists.newArrayList(); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Set 1: literal, zero fill, literal + * Set 2: one fill, literal that falls within the zero fill above, one fill + */ + @Test + public void testIntersection6() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 5; i++) { + set1.add(i); + } + for (int i = 1000; i < 1005; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 800; i < 805; i++) { + set2.add(i); + } + for (int i = 806; i < 1005; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 1000; i < 1005; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection7() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(100); + set2.add(500); + for (int i = 600; i < 700; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(100); + expected.add(500); + for (int i = 600; i < 700; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection8() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + set1.add(4001); + + ConciseSet set2 = new ConciseSet(); + set2.add(100); + set2.add(500); + for (int i = 600; i < 700; i++) { + set2.add(i); + } + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(100); + expected.add(500); + for (int i = 600; i < 700; i++) { + expected.add(i); + } + expected.add(4001); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection9() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + set1.add(3005); + set1.add(3008); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + expected.add(3005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection10() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + + set2.add(500); + set2.add(600); + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(500); + expected.add(600); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection11() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + for (int i = 2800; i < 3500; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + for (int i = 2800; i < 3007; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection12() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + for (int i = 2800; i < 3500; i++) { + set1.add(i); + } + set1.add(10005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + set2.add(10005); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + for (int i = 2800; i < 3007; i++) { + expected.add(i); + } + expected.add(10005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection13() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection14() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + set2.add(100); + set2.add(101); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(0); + expected.add(3); + expected.add(5); + expected.add(100); + expected.add(101); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection15() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + for (int i = 100; i < 500; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(0); + expected.add(3); + expected.add(5); + for (int i = 100; i < 500; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection16() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + set2.add(100); + set2.add(101); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection17() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 4002; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(4001); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection18() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 32; i < 93; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 62; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 32; i < 62; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection19() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 10000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersectionTerminates() throws Exception + { + verifyIntersection(Arrays.asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet())); + } + + private void verifyIntersection(List expected, List sets) + { + List actual = Lists.newArrayList(); + ImmutableConciseSet set = ImmutableConciseSet.intersection(sets); + IntSet.IntIterator itr = set.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + /** + * Basic complement with no length + */ + @Test + public void testComplement1() + { + final int[] ints = {1, 100}; + List expected = Lists.newArrayList(); + + ConciseSet set = new ConciseSet(); + for (int i : ints) { + set.add(i); + } + + for (int i = 0; i <= 100; i++) { + if (i != 1 && i != 100) { + expected.add(i); + } + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); + } + + /** + * Complement of a single partial word + */ + @Test + public void testComplement2() + { + List expected = Lists.newArrayList(); + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); + } + + /** + * Complement of a single partial word with a length set in the same word + */ + @Test + public void testComplement3() + { + List expected = Lists.newArrayList(); + final int length = 21; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a single partial word with a length set in a different word + */ + @Test + public void testComplement4() + { + List expected = Lists.newArrayList(); + final int length = 41; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a single partial word with a length set to create a one fill + */ + @Test + public void testComplement5() + { + List expected = Lists.newArrayList(); + final int length = 1001; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of words with a length set to create a one fill + */ + @Test + public void testComplement6() + { + List expected = Lists.newArrayList(); + final int length = 1001; + + ConciseSet set = new ConciseSet(); + for (int i = 65; i <= 100; i++) { + set.add(i); + } + for (int i = 0; i < length; i++) { + if (i < 65 || i > 100) { + expected.add(i); + } + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of 2 words with a length in the second word + */ + @Test + public void testComplement7() + { + List expected = Lists.newArrayList(); + final int length = 37; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i <= 35; i++) { + set.add(i); + } + expected.add(36); + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a one literal with a length set to complement the next bit in the next word + */ + @Test + public void testComplement8() + { + List expected = Lists.newArrayList(); + final int length = 32; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i <= 30; i++) { + set.add(i); + } + expected.add(31); + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a null set with a length + */ + @Test + public void testComplement9() + { + final List lengths = new ArrayList(); + lengths.addAll( + Arrays.asList( + 35, + 31, + 32, + 1, + 0, + 31 * 3, + 1024, + ConciseSetUtils.MAX_ALLOWED_INTEGER + ) + ); + final Random random = new Random(701534702L); + for (int i = 0; i < 10; ++i) { + lengths.add(random.nextInt(ConciseSetUtils.MAX_ALLOWED_INTEGER + 1)); + } + final ImmutableConciseSet emptySet = new ImmutableConciseSet(); + for (final int length : lengths) { + final ImmutableConciseSet complement = ImmutableConciseSet.complement(emptySet, length); + final IntSet.IntIterator intIterator = complement.iterator(); + for (int i = 0; i < length; i++) { + final int n = intIterator.next(); + if (i != n) { + Assert.assertEquals(String.format("Failure at bit [%d] on length [%d]", i, length), i, n); + } + } + NoSuchElementException ex = null; + try { + intIterator.next(); + } + catch (NoSuchElementException e) { + ex = e; + } + Assert.assertNotNull(ex); + } + } + + /** + * Complement of a null set to create a one fill + */ + @Test + public void testComplement10() + { + List expected = Lists.newArrayList(); + final int length = 93; + + for (int i = 0; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with correct last index + */ + @Test + public void testComplement11() + { + List expected = Lists.newArrayList(); + int length = 18930; + for (int i = 0; i < 500; i++) { + expected.add(i); + } + for (int i = 18881; i < length; i++) { + expected.add(i); + } + + ConciseSet set = new ConciseSet(); + for (int i = 500; i <= 18880; i++) { + set.add(i); + } + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with empty set and length in first block + */ + @Test + public void testComplement12() + { + List expected = Lists.newArrayList(); + int length = 10; + for (int i = 0; i < 10; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with empty list of some length + */ + @Test + public void testComplement13() + { + List expected = Lists.newArrayList(); + int length = 10; + for (int i = 0; i < length; i++) { + expected.add(i); + } + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + private void verifyComplement(List expected, ImmutableConciseSet set, int endIndex) + { + List actual = Lists.newArrayList(); + + ImmutableConciseSet res; + if (endIndex == NO_COMPLEMENT_LENGTH) { + res = ImmutableConciseSet.complement(set); + } else { + res = ImmutableConciseSet.complement(set, endIndex); + } + + IntSet.IntIterator itr = res.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + @Test + public void testContains() + { + final ConciseSet conciseSet = new ConciseSet(); + final Random random = new Random(543167436715430L); + final Set integerSet = new HashSet<>(); + int max = -1; + for (int i = 0; i < 100; ++i) { + final int j = random.nextInt(1 << 20); + integerSet.add(j); + conciseSet.add(j); + if (j > max) { + max = j; + } + } + final ImmutableConciseSet immutableConciseSet = ImmutableConciseSet.newImmutableFromMutable(conciseSet); + for (int i = 0; i < max + 10; ++i) { + final String s = Integer.toString(i); + Assert.assertEquals(s, integerSet.contains(i), conciseSet.contains(i)); + Assert.assertEquals(s, integerSet.contains(i), immutableConciseSet.contains(i)); + } + } +} diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/BitMapFactory.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/BitMapFactory.java index 3c517cd4ae07..8a76bf4bc171 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/BitMapFactory.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/BitMapFactory.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; /** */ diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/ConciseBitMapFactory.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/ConciseBitMapFactory.java index e9b2d49b7b93..b87f6ef28b48 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/ConciseBitMapFactory.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/ConciseBitMapFactory.java @@ -19,9 +19,9 @@ package io.druid.query.aggregation.distinctcount; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; public class ConciseBitMapFactory implements BitMapFactory { diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountAggregator.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountAggregator.java index 54f7f5744daa..880ab0b2aae7 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountAggregator.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountAggregator.java @@ -19,7 +19,7 @@ package io.druid.query.aggregation.distinctcount; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.aggregation.Aggregator; import io.druid.segment.DimensionSelector; diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountBufferAggregator.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountBufferAggregator.java index a90dd6f2dcca..809cd0ac796f 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountBufferAggregator.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/DistinctCountBufferAggregator.java @@ -19,8 +19,8 @@ package io.druid.query.aggregation.distinctcount; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.WrappedRoaringBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.WrappedRoaringBitmap; import io.druid.query.aggregation.BufferAggregator; import io.druid.segment.DimensionSelector; diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/JavaBitMapFactory.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/JavaBitMapFactory.java index 5c1f2dc41160..f8d2cc8b7412 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/JavaBitMapFactory.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/JavaBitMapFactory.java @@ -19,9 +19,9 @@ package io.druid.query.aggregation.distinctcount; -import com.metamx.collections.bitmap.BitSetBitmapFactory; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitSetBitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; public class JavaBitMapFactory implements BitMapFactory { diff --git a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/RoaringBitMapFactory.java b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/RoaringBitMapFactory.java index 0da6cc04e17e..a3ed78a83b58 100644 --- a/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/RoaringBitMapFactory.java +++ b/extensions-contrib/distinctcount/src/main/java/io/druid/query/aggregation/distinctcount/RoaringBitMapFactory.java @@ -19,9 +19,9 @@ package io.druid.query.aggregation.distinctcount; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; public class RoaringBitMapFactory implements BitMapFactory { diff --git a/pom.xml b/pom.xml index a0a1b45e2160..eaa2df9d7e2e 100644 --- a/pom.xml +++ b/pom.xml @@ -84,6 +84,8 @@ benchmarks aws-common java-util + bytebuffer-collections + extendedset extensions-core/avro-extensions extensions-core/datasketches diff --git a/processing/pom.xml b/processing/pom.xml index 43c2b11fa54a..624fecd46544 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -36,8 +36,9 @@ ${project.parent.version} - com.metamx + io.druid bytebuffer-collections + ${project.parent.version} it.unimi.dsi diff --git a/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java b/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java index 4675876d5e26..fc3bac6c2911 100644 --- a/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java +++ b/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java @@ -19,9 +19,9 @@ package io.druid.query.filter; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.Indexed; diff --git a/processing/src/main/java/io/druid/query/filter/Filter.java b/processing/src/main/java/io/druid/query/filter/Filter.java index 1a52237981ff..26d634ded491 100644 --- a/processing/src/main/java/io/druid/query/filter/Filter.java +++ b/processing/src/main/java/io/druid/query/filter/Filter.java @@ -19,7 +19,7 @@ package io.druid.query.filter; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; /** */ diff --git a/processing/src/main/java/io/druid/query/filter/RowOffsetMatcherFactory.java b/processing/src/main/java/io/druid/query/filter/RowOffsetMatcherFactory.java index 0de44c34d087..6c2b4411cda7 100644 --- a/processing/src/main/java/io/druid/query/filter/RowOffsetMatcherFactory.java +++ b/processing/src/main/java/io/druid/query/filter/RowOffsetMatcherFactory.java @@ -19,7 +19,7 @@ package io.druid.query.filter; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; /** */ diff --git a/processing/src/main/java/io/druid/query/filter/SpatialDimFilter.java b/processing/src/main/java/io/druid/query/filter/SpatialDimFilter.java index 5d9f67ffbc1d..7555c93e9df8 100644 --- a/processing/src/main/java/io/druid/query/filter/SpatialDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SpatialDimFilter.java @@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.collect.RangeSet; -import com.metamx.collections.spatial.search.Bound; +import io.druid.collections.spatial.search.Bound; import io.druid.java.util.common.StringUtils; import io.druid.segment.filter.SpatialFilter; diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java index 5d84c7b45331..93bdb20db995 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryRunner.java @@ -25,9 +25,9 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; import com.metamx.emitter.EmittingLogger; import io.druid.java.util.common.IAE; import io.druid.java.util.common.ISE; diff --git a/processing/src/main/java/io/druid/segment/BitmapOffset.java b/processing/src/main/java/io/druid/segment/BitmapOffset.java index 1c41ee557182..5e4ae18309ba 100644 --- a/processing/src/main/java/io/druid/segment/BitmapOffset.java +++ b/processing/src/main/java/io/druid/segment/BitmapOffset.java @@ -19,10 +19,10 @@ package io.druid.segment; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.WrappedImmutableRoaringBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.WrappedImmutableRoaringBitmap; import io.druid.segment.data.Offset; import io.druid.segment.data.RoaringBitmapSerdeFactory; import org.roaringbitmap.IntIterator; diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java index 6b7c31c7c8ee..9b9d5c2c9f4f 100644 --- a/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java +++ b/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java @@ -20,9 +20,9 @@ package io.druid.segment; import com.google.common.base.Strings; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.query.filter.BitmapIndexSelector; import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.Column; diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 4ac8672632fc..82d8021bfefa 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -19,8 +19,8 @@ package io.druid.segment; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; import io.druid.query.filter.DruidPredicateFactory; import io.druid.query.filter.ValueMatcher; diff --git a/processing/src/main/java/io/druid/segment/IndexIO.java b/processing/src/main/java/io/druid/segment/IndexIO.java index 7872af8b1744..80c9b69dd930 100644 --- a/processing/src/main/java/io/druid/segment/IndexIO.java +++ b/processing/src/main/java/io/druid/segment/IndexIO.java @@ -36,11 +36,11 @@ import com.google.common.io.Files; import com.google.common.primitives.Ints; import com.google.inject.Inject; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; import com.metamx.emitter.EmittingLogger; import io.druid.common.utils.SerializerUtils; import io.druid.java.util.common.IAE; diff --git a/processing/src/main/java/io/druid/segment/MMappedIndex.java b/processing/src/main/java/io/druid/segment/MMappedIndex.java index 7297458d1fd3..d7d08183648d 100644 --- a/processing/src/main/java/io/druid/segment/MMappedIndex.java +++ b/processing/src/main/java/io/druid/segment/MMappedIndex.java @@ -19,8 +19,8 @@ package io.druid.segment; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.java.util.common.io.smoosh.SmooshedFileMapper; import io.druid.java.util.common.logger.Logger; import io.druid.segment.data.CompressedLongsIndexedSupplier; diff --git a/processing/src/main/java/io/druid/segment/QueryableIndex.java b/processing/src/main/java/io/druid/segment/QueryableIndex.java index c18913946223..31492b18cc43 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndex.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndex.java @@ -19,7 +19,7 @@ package io.druid.segment; -import com.metamx.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; import io.druid.segment.data.Indexed; import org.joda.time.Interval; diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 24bddc89ea1f..80b04635043b 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -29,7 +29,8 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Closer; -import com.metamx.collections.bitmap.ImmutableBitmap; + +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.granularity.QueryGranularity; import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.Sequences; diff --git a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java index 76261309299b..f91fa7aa0291 100644 --- a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java +++ b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java @@ -21,7 +21,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Maps; -import com.metamx.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; import io.druid.java.util.common.io.smoosh.SmooshedFileMapper; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 31866ad449e1..5c94ed307fc7 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -25,9 +25,10 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; + import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.DruidPredicateFactory; diff --git a/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java b/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java index e916588a0dff..e55e999da80f 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionMergerLegacy.java @@ -25,10 +25,10 @@ import com.google.common.io.Files; import com.google.common.io.OutputSupplier; import com.google.common.primitives.Ints; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; -import com.metamx.collections.spatial.RTree; -import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; +import io.druid.collections.spatial.RTree; +import io.druid.collections.spatial.split.LinearGutmanSplitStrategy; import io.druid.common.guava.FileOutputSupplier; import io.druid.common.utils.SerializerUtils; import io.druid.java.util.common.ByteBufferUtils; diff --git a/processing/src/main/java/io/druid/segment/StringDimensionMergerV9.java b/processing/src/main/java/io/druid/segment/StringDimensionMergerV9.java index 595b4c78a118..eee670ac65d7 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionMergerV9.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionMergerV9.java @@ -25,12 +25,12 @@ import com.google.common.io.ByteStreams; import com.google.common.io.Closer; import com.google.common.io.Files; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; -import com.metamx.collections.spatial.RTree; -import com.metamx.collections.spatial.split.LinearGutmanSplitStrategy; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; +import io.druid.collections.spatial.RTree; +import io.druid.collections.spatial.split.LinearGutmanSplitStrategy; import io.druid.java.util.common.ByteBufferUtils; import io.druid.java.util.common.ISE; import io.druid.java.util.common.logger.Logger; diff --git a/processing/src/main/java/io/druid/segment/column/BitmapIndex.java b/processing/src/main/java/io/druid/segment/column/BitmapIndex.java index c64ecfe6f46e..b28b66a0c562 100644 --- a/processing/src/main/java/io/druid/segment/column/BitmapIndex.java +++ b/processing/src/main/java/io/druid/segment/column/BitmapIndex.java @@ -19,8 +19,8 @@ package io.druid.segment.column; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; /** */ diff --git a/processing/src/main/java/io/druid/segment/column/SpatialIndex.java b/processing/src/main/java/io/druid/segment/column/SpatialIndex.java index a905936ca08f..54b5887752be 100644 --- a/processing/src/main/java/io/druid/segment/column/SpatialIndex.java +++ b/processing/src/main/java/io/druid/segment/column/SpatialIndex.java @@ -19,7 +19,7 @@ package io.druid.segment.column; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.spatial.ImmutableRTree; /** */ diff --git a/processing/src/main/java/io/druid/segment/data/BitmapCompressedIndexedInts.java b/processing/src/main/java/io/druid/segment/data/BitmapCompressedIndexedInts.java index 40d84efb8ccb..2c78d39429cf 100644 --- a/processing/src/main/java/io/druid/segment/data/BitmapCompressedIndexedInts.java +++ b/processing/src/main/java/io/druid/segment/data/BitmapCompressedIndexedInts.java @@ -20,7 +20,7 @@ package io.druid.segment.data; import com.google.common.collect.Ordering; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.segment.IntIteratorUtils; import it.unimi.dsi.fastutil.ints.IntIterator; diff --git a/processing/src/main/java/io/druid/segment/data/BitmapSerdeFactory.java b/processing/src/main/java/io/druid/segment/data/BitmapSerdeFactory.java index cbf7dbddb5f9..a7c6fe6bcfb2 100644 --- a/processing/src/main/java/io/druid/segment/data/BitmapSerdeFactory.java +++ b/processing/src/main/java/io/druid/segment/data/BitmapSerdeFactory.java @@ -21,8 +21,8 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; /** */ diff --git a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java index 3442f4edca89..42b6285500ea 100644 --- a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java +++ b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java @@ -20,11 +20,11 @@ package io.druid.segment.data; import com.google.common.collect.Ordering; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.WrappedImmutableConciseBitmap; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.WrappedImmutableConciseBitmap; +import io.druid.extendedset.intset.ImmutableConciseSet; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/io/druid/segment/data/IndexedRTree.java b/processing/src/main/java/io/druid/segment/data/IndexedRTree.java index ce6f778dc1c9..ba1ea9123aa0 100644 --- a/processing/src/main/java/io/druid/segment/data/IndexedRTree.java +++ b/processing/src/main/java/io/druid/segment/data/IndexedRTree.java @@ -20,8 +20,8 @@ package io.druid.segment.data; import com.google.common.collect.Ordering; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/io/druid/segment/data/RoaringBitmapSerdeFactory.java b/processing/src/main/java/io/druid/segment/data/RoaringBitmapSerdeFactory.java index 86e6a0d02173..9c2ae1d21a85 100644 --- a/processing/src/main/java/io/druid/segment/data/RoaringBitmapSerdeFactory.java +++ b/processing/src/main/java/io/druid/segment/data/RoaringBitmapSerdeFactory.java @@ -22,10 +22,10 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.Ordering; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; -import com.metamx.collections.bitmap.WrappedImmutableRoaringBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.bitmap.WrappedImmutableRoaringBitmap; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import java.nio.ByteBuffer; diff --git a/processing/src/main/java/io/druid/segment/filter/AndFilter.java b/processing/src/main/java/io/druid/segment/filter/AndFilter.java index 56fc83af8445..c9a4f3a6d3ae 100644 --- a/processing/src/main/java/io/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/AndFilter.java @@ -21,7 +21,7 @@ import com.google.common.base.Joiner; import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BooleanFilter; import io.druid.query.filter.Filter; diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index 946050f3f324..5318d21700d5 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -21,7 +21,7 @@ import com.google.common.base.Predicate; import com.google.common.base.Supplier; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 1443ef9f4b6e..c3146038ea4d 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -21,7 +21,7 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DruidLongPredicate; diff --git a/processing/src/main/java/io/druid/segment/filter/Filters.java b/processing/src/main/java/io/druid/segment/filter/Filters.java index e322480592b8..0123d8df1b70 100644 --- a/processing/src/main/java/io/druid/segment/filter/Filters.java +++ b/processing/src/main/java/io/druid/segment/filter/Filters.java @@ -24,7 +24,7 @@ import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.common.guava.GuavaUtils; import io.druid.java.util.common.guava.FunctionalIterable; import io.druid.query.Query; diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index 0b34c6fddaa1..f650260b024b 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -24,7 +24,7 @@ import com.google.common.base.Strings; import com.google.common.base.Supplier; import com.google.common.collect.Iterables; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DruidLongPredicate; diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index 760d3a98b2ce..85084e82f44b 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -20,7 +20,7 @@ package io.druid.segment.filter; import com.google.common.base.Predicate; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.JavaScriptDimFilter; diff --git a/processing/src/main/java/io/druid/segment/filter/NotFilter.java b/processing/src/main/java/io/druid/segment/filter/NotFilter.java index 190b727b32d4..0e3c73118242 100644 --- a/processing/src/main/java/io/druid/segment/filter/NotFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/NotFilter.java @@ -19,7 +19,7 @@ package io.druid.segment.filter; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; diff --git a/processing/src/main/java/io/druid/segment/filter/OrFilter.java b/processing/src/main/java/io/druid/segment/filter/OrFilter.java index f44fd3b8af52..ba288b381645 100644 --- a/processing/src/main/java/io/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/OrFilter.java @@ -21,7 +21,7 @@ import com.google.common.base.Joiner; import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BooleanFilter; import io.druid.query.filter.Filter; diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index d7c9d102c782..77b98fd40b67 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -19,7 +19,7 @@ package io.druid.segment.filter; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; diff --git a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java index 988f61eccb89..ff28649aac8a 100644 --- a/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SpatialFilter.java @@ -20,8 +20,8 @@ import com.google.common.base.Preconditions; import com.google.common.base.Predicate; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.spatial.search.Bound; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.search.Bound; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.DruidLongPredicate; import io.druid.query.filter.DruidPredicateFactory; diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java index 95f3e4f12905..ebd008a75d68 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAdapter.java @@ -22,8 +22,8 @@ import com.google.common.base.Function; import com.google.common.collect.Iterators; import com.google.common.collect.Maps; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; import io.druid.java.util.common.logger.Logger; import io.druid.segment.DimensionHandler; import io.druid.segment.DimensionIndexer; diff --git a/processing/src/main/java/io/druid/segment/serde/BitmapIndexColumnPartSupplier.java b/processing/src/main/java/io/druid/segment/serde/BitmapIndexColumnPartSupplier.java index ab162f535201..ea204ced0d11 100644 --- a/processing/src/main/java/io/druid/segment/serde/BitmapIndexColumnPartSupplier.java +++ b/processing/src/main/java/io/druid/segment/serde/BitmapIndexColumnPartSupplier.java @@ -20,8 +20,8 @@ package io.druid.segment.serde; import com.google.common.base.Supplier; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.segment.column.BitmapIndex; import io.druid.segment.data.GenericIndexed; diff --git a/processing/src/main/java/io/druid/segment/serde/DictionaryEncodedColumnPartSerde.java b/processing/src/main/java/io/druid/segment/serde/DictionaryEncodedColumnPartSerde.java index 39b7c7c5bfcb..b6398fc6de29 100644 --- a/processing/src/main/java/io/druid/segment/serde/DictionaryEncodedColumnPartSerde.java +++ b/processing/src/main/java/io/druid/segment/serde/DictionaryEncodedColumnPartSerde.java @@ -23,8 +23,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.primitives.Ints; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.java.util.common.IAE; import io.druid.segment.CompressedVSizeIndexedSupplier; import io.druid.segment.CompressedVSizeIndexedV3Supplier; diff --git a/processing/src/main/java/io/druid/segment/serde/SpatialIndexColumnPartSupplier.java b/processing/src/main/java/io/druid/segment/serde/SpatialIndexColumnPartSupplier.java index f0351990dd1a..04435448ae7c 100644 --- a/processing/src/main/java/io/druid/segment/serde/SpatialIndexColumnPartSupplier.java +++ b/processing/src/main/java/io/druid/segment/serde/SpatialIndexColumnPartSupplier.java @@ -19,7 +19,7 @@ package io.druid.segment.serde; import com.google.common.base.Supplier; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.segment.column.SpatialIndex; /** diff --git a/processing/src/test/java/io/druid/segment/BitmapOffsetTest.java b/processing/src/test/java/io/druid/segment/BitmapOffsetTest.java index 38b65fcdc596..dd8161023229 100644 --- a/processing/src/test/java/io/druid/segment/BitmapOffsetTest.java +++ b/processing/src/test/java/io/druid/segment/BitmapOffsetTest.java @@ -23,11 +23,11 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import com.metamx.collections.bitmap.BitSetBitmapFactory; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.bitmap.BitSetBitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; import io.druid.segment.data.Offset; import org.junit.Assert; import org.junit.Test; diff --git a/processing/src/test/java/io/druid/segment/EmptyIndexTest.java b/processing/src/test/java/io/druid/segment/EmptyIndexTest.java index 2885be71c21b..8d21f034e9b3 100644 --- a/processing/src/test/java/io/druid/segment/EmptyIndexTest.java +++ b/processing/src/test/java/io/druid/segment/EmptyIndexTest.java @@ -21,7 +21,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; import io.druid.granularity.QueryGranularities; import io.druid.query.aggregation.AggregatorFactory; import io.druid.segment.column.Column; diff --git a/processing/src/test/java/io/druid/segment/IndexMergerTest.java b/processing/src/test/java/io/druid/segment/IndexMergerTest.java index a6651c09d8fc..85aef5928e41 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerTest.java @@ -27,8 +27,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; -import com.metamx.collections.bitmap.RoaringBitmapFactory; + import io.druid.data.input.InputRow; +import io.druid.collections.bitmap.RoaringBitmapFactory; import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; diff --git a/processing/src/test/java/io/druid/segment/IndexMergerV9WithSpatialIndexTest.java b/processing/src/test/java/io/druid/segment/IndexMergerV9WithSpatialIndexTest.java index f4a3186f69d0..ee9db667a339 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerV9WithSpatialIndexTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerV9WithSpatialIndexTest.java @@ -23,8 +23,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.metamx.collections.spatial.search.RadiusBound; -import com.metamx.collections.spatial.search.RectangularBound; +import io.druid.collections.spatial.search.RadiusBound; +import io.druid.collections.spatial.search.RectangularBound; import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.SpatialDimensionSchema; diff --git a/processing/src/test/java/io/druid/segment/data/BitmapCreationBenchmark.java b/processing/src/test/java/io/druid/segment/data/BitmapCreationBenchmark.java index 26dcb8bc8fb2..c4de414e4d42 100644 --- a/processing/src/test/java/io/druid/segment/data/BitmapCreationBenchmark.java +++ b/processing/src/test/java/io/druid/segment/data/BitmapCreationBenchmark.java @@ -20,9 +20,9 @@ import com.carrotsearch.junitbenchmarks.AbstractBenchmark; import com.carrotsearch.junitbenchmarks.BenchmarkOptions; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; import io.druid.java.util.common.logger.Logger; import org.junit.AfterClass; import org.junit.Assert; diff --git a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java index c45f773bfbe9..bd1432c825e6 100644 --- a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java @@ -21,12 +21,12 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; diff --git a/processing/src/test/java/io/druid/segment/filter/SpatialFilterBonusTest.java b/processing/src/test/java/io/druid/segment/filter/SpatialFilterBonusTest.java index 591056cecb8d..c1a46038aa1e 100644 --- a/processing/src/test/java/io/druid/segment/filter/SpatialFilterBonusTest.java +++ b/processing/src/test/java/io/druid/segment/filter/SpatialFilterBonusTest.java @@ -23,8 +23,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; -import com.metamx.collections.spatial.search.RadiusBound; -import com.metamx.collections.spatial.search.RectangularBound; +import io.druid.collections.spatial.search.RadiusBound; +import io.druid.collections.spatial.search.RectangularBound; import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.SpatialDimensionSchema; diff --git a/processing/src/test/java/io/druid/segment/filter/SpatialFilterTest.java b/processing/src/test/java/io/druid/segment/filter/SpatialFilterTest.java index 3458559a100a..bc4f283b42c9 100644 --- a/processing/src/test/java/io/druid/segment/filter/SpatialFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/SpatialFilterTest.java @@ -23,8 +23,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.metamx.collections.spatial.search.RadiusBound; -import com.metamx.collections.spatial.search.RectangularBound; +import io.druid.collections.spatial.search.RadiusBound; +import io.druid.collections.spatial.search.RectangularBound; import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.SpatialDimensionSchema; diff --git a/services/src/main/java/io/druid/cli/DumpSegment.java b/services/src/main/java/io/druid/cli/DumpSegment.java index ec144a3a708a..5420270a7cb6 100644 --- a/services/src/main/java/io/druid/cli/DumpSegment.java +++ b/services/src/main/java/io/druid/cli/DumpSegment.java @@ -35,10 +35,10 @@ import com.google.inject.Key; import com.google.inject.Module; import com.google.inject.name.Names; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ConciseBitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ConciseBitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; import io.airlift.airline.Command; import io.airlift.airline.Option; From b1fc56672ebc5d7f66583541412a873d6f280f39 Mon Sep 17 00:00:00 2001 From: dwivedi Date: Wed, 2 Nov 2016 10:24:50 -0700 Subject: [PATCH 2/5] pom.xml cleanup. --- bytebuffer-collections/pom.xml | 4 ---- extendedset/pom.xml | 12 ------------ 2 files changed, 16 deletions(-) diff --git a/bytebuffer-collections/pom.xml b/bytebuffer-collections/pom.xml index 668692de8087..0709b2cec248 100755 --- a/bytebuffer-collections/pom.xml +++ b/bytebuffer-collections/pom.xml @@ -104,10 +104,6 @@ - - org.apache.maven.plugins - maven-release-plugin - org.apache.maven.plugins maven-surefire-plugin diff --git a/extendedset/pom.xml b/extendedset/pom.xml index 2831462af2c8..eae8bc41e087 100755 --- a/extendedset/pom.xml +++ b/extendedset/pom.xml @@ -51,16 +51,4 @@ - - - - maven-compiler-plugin - 2.5.1 - - 1.7 - 1.7 - - - - From 4fe6199ceb08ff2688692c44cd31043b022d5523 Mon Sep 17 00:00:00 2001 From: dwivedi Date: Wed, 2 Nov 2016 11:53:39 -0700 Subject: [PATCH 3/5] Remove extendedset. --- .../druid/benchmark/BoundFilterBenchmark.java | 37 +- .../benchmark/ConciseComplementBenchmark.java | 5 +- bytebuffer-collections/pom.xml | 4 +- .../bitmap/ConciseBitmapFactory.java | 5 +- .../bitmap/WrappedConciseBitmap.java | 12 +- .../bitmap/WrappedConciseIntIterator.java | 3 +- .../bitmap/WrappedImmutableConciseBitmap.java | 7 +- .../collections/bitmap/BitmapBenchmark.java | 22 +- .../bitmap/ConciseBitmapFactoryTest.java | 14 +- .../bitmap/RangeBitmapBenchmarkTest.java | 13 +- .../bitmap/UniformBitmapBenchmarkTest.java | 13 +- extendedset/pom.xml | 54 - .../extendedset/AbstractExtendedSet.java | 1432 -------- .../io/druid/extendedset/ExtendedSet.java | 592 --- .../extendedset/intset/AbstractIntSet.java | 744 ---- .../io/druid/extendedset/intset/ArraySet.java | 1157 ------ .../druid/extendedset/intset/ConciseSet.java | 3178 ----------------- .../extendedset/intset/ConciseSetUtils.java | 563 --- .../io/druid/extendedset/intset/FastSet.java | 1403 -------- .../druid/extendedset/intset/HashIntSet.java | 1012 ------ .../intset/ImmutableConciseSet.java | 1157 ------ .../io/druid/extendedset/intset/IntSet.java | 662 ---- .../druid/extendedset/utilities/ArrayMap.java | 299 -- .../druid/extendedset/utilities/BitCount.java | 350 -- .../extendedset/utilities/CollectionMap.java | 317 -- .../extendedset/utilities/IntHashCode.java | 103 - .../druid/extendedset/utilities/IntList.java | 115 - .../utilities/IntSetStatistics.java | 689 ---- .../utilities/random/MersenneTwister.java | 869 ----- .../utilities/random/MersenneTwisterFast.java | 1470 -------- .../wrappers/GenericExtendedSet.java | 885 ----- .../extendedset/wrappers/IndexedSet.java | 741 ---- .../extendedset/wrappers/IntegerSet.java | 580 --- .../druid/extendedset/wrappers/LongSet.java | 1692 --------- .../wrappers/matrix/BinaryMatrix.java | 2052 ----------- .../extendedset/wrappers/matrix/Pair.java | 106 - .../extendedset/wrappers/matrix/PairMap.java | 448 --- .../extendedset/wrappers/matrix/PairSet.java | 1403 -------- .../test/java/io/druid/extendedset/Debug.java | 1858 ---------- .../io/druid/extendedset/Performance.java | 496 --- .../io/druid/extendedset/RandomNumbers.java | 242 -- .../intset/ImmutableConciseSetTest.java | 1972 ---------- pom.xml | 1 - .../data/ConciseBitmapSerdeFactory.java | 7 +- 44 files changed, 77 insertions(+), 28707 deletions(-) delete mode 100755 extendedset/pom.xml delete mode 100755 extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java delete mode 100755 extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java delete mode 100755 extendedset/src/test/java/io/druid/extendedset/Debug.java delete mode 100755 extendedset/src/test/java/io/druid/extendedset/Performance.java delete mode 100755 extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java delete mode 100755 extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java diff --git a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java index 3ae4ff03d73e..adba83188fe7 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java @@ -19,16 +19,32 @@ package io.druid.benchmark; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.FluentIterable; + import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.collections.bitmap.MutableBitmap; import io.druid.collections.bitmap.RoaringBitmapFactory; import io.druid.collections.spatial.ImmutableRTree; - -import io.druid.extendedset.intset.ConciseSetUtils; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; import io.druid.query.ordering.StringComparators; @@ -39,22 +55,7 @@ import io.druid.segment.data.RoaringBitmapSerdeFactory; import io.druid.segment.filter.BoundFilter; import io.druid.segment.serde.BitmapIndexColumnPartSupplier; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; +import it.uniroma3.mat.extendedset.intset.ConciseSetUtils; @State(Scope.Benchmark) @Fork(value = 1) diff --git a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java index a31a3f713f39..feb8f0f4ba8f 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java @@ -20,7 +20,8 @@ package io.druid.benchmark; -import io.druid.extendedset.intset.ImmutableConciseSet; +import java.util.concurrent.TimeUnit; + import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Mode; @@ -30,7 +31,7 @@ import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.infra.Blackhole; -import java.util.concurrent.TimeUnit; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; @State(Scope.Benchmark) public class ConciseComplementBenchmark diff --git a/bytebuffer-collections/pom.xml b/bytebuffer-collections/pom.xml index 0709b2cec248..7a11fed8b8e1 100755 --- a/bytebuffer-collections/pom.xml +++ b/bytebuffer-collections/pom.xml @@ -34,9 +34,9 @@ - io.druid + com.metamx extendedset - ${project.parent.version} + 1.3.10 com.google.guava diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java index d9d9324d61ea..679f53316828 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java @@ -19,12 +19,11 @@ package io.druid.collections.bitmap; - -import io.druid.extendedset.intset.ImmutableConciseSet; - import java.nio.ByteBuffer; import java.util.Iterator; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; + /** * As the name suggests, this class instantiates bitmaps of the types * WrappedConciseBitmap and WrappedImmutableConciseBitmap. diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java index a4651ae4db91..6fe730ce0f19 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java @@ -19,13 +19,15 @@ package io.druid.collections.bitmap; -import com.google.common.primitives.Ints; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.ImmutableConciseSet; -import io.druid.extendedset.intset.IntSet; +import java.nio.ByteBuffer; + import org.roaringbitmap.IntIterator; -import java.nio.ByteBuffer; +import com.google.common.primitives.Ints; + +import it.uniroma3.mat.extendedset.intset.ConciseSet; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import it.uniroma3.mat.extendedset.intset.IntSet; public class WrappedConciseBitmap implements MutableBitmap { diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java index e4a21d9adfd9..c357cefdebd2 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java @@ -19,9 +19,10 @@ package io.druid.collections.bitmap; -import io.druid.extendedset.intset.IntSet; import org.roaringbitmap.IntIterator; +import it.uniroma3.mat.extendedset.intset.IntSet; + /** */ public class WrappedConciseIntIterator implements IntIterator diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java index a0e0203dcd62..5fe4515b89fc 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java @@ -20,11 +20,12 @@ package io.druid.collections.bitmap; -import io.druid.extendedset.intset.ImmutableConciseSet; -import io.druid.extendedset.intset.IntSet; +import java.nio.ByteBuffer; + import org.roaringbitmap.IntIterator; -import java.nio.ByteBuffer; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import it.uniroma3.mat.extendedset.intset.IntSet; public class WrappedImmutableConciseBitmap implements ImmutableBitmap { diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java index 6ffe81a79a3e..3366319adaaf 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java @@ -19,11 +19,12 @@ package io.druid.collections.bitmap; -import com.carrotsearch.junitbenchmarks.BenchmarkOptions; -import com.carrotsearch.junitbenchmarks.BenchmarkRule; -import com.carrotsearch.junitbenchmarks.Clock; -import com.google.common.collect.Lists; -import io.druid.extendedset.intset.ImmutableConciseSet; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Random; + import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -32,11 +33,12 @@ import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Random; +import com.carrotsearch.junitbenchmarks.BenchmarkOptions; +import com.carrotsearch.junitbenchmarks.BenchmarkRule; +import com.carrotsearch.junitbenchmarks.Clock; +import com.google.common.collect.Lists; + +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; @BenchmarkOptions(clock = Clock.NANO_TIME, benchmarkRounds = 50) diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java index 673431e6b6e0..7ab0ce5301e1 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java @@ -19,17 +19,19 @@ package io.druid.collections.bitmap; +import java.util.Arrays; +import java.util.Set; + +import org.junit.Test; + import com.google.common.base.Function; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.ImmutableConciseSet; -import junit.framework.Assert; -import org.junit.Test; -import java.util.Arrays; -import java.util.Set; +import it.uniroma3.mat.extendedset.intset.ConciseSet; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import junit.framework.Assert; public class ConciseBitmapFactoryTest { diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java index 8800d167350f..84c170556cd5 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java @@ -19,17 +19,18 @@ package io.druid.collections.bitmap; -import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; -import com.carrotsearch.junitbenchmarks.annotation.LabelType; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.ImmutableConciseSet; -import io.druid.test.annotation.Benchmark; +import java.util.BitSet; import org.junit.BeforeClass; import org.junit.experimental.categories.Category; import org.roaringbitmap.buffer.MutableRoaringBitmap; -import java.util.BitSet; +import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; +import com.carrotsearch.junitbenchmarks.annotation.LabelType; + +import io.druid.test.annotation.Benchmark; +import it.uniroma3.mat.extendedset.intset.ConciseSet; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; @Category({Benchmark.class}) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java index 7e7306d9f4c6..a88e3133fd5e 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java @@ -19,17 +19,18 @@ package io.druid.collections.bitmap; -import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; -import com.carrotsearch.junitbenchmarks.annotation.LabelType; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.ImmutableConciseSet; -import io.druid.test.annotation.Benchmark; +import java.util.BitSet; import org.junit.BeforeClass; import org.junit.experimental.categories.Category; import org.roaringbitmap.buffer.MutableRoaringBitmap; -import java.util.BitSet; +import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; +import com.carrotsearch.junitbenchmarks.annotation.LabelType; + +import io.druid.test.annotation.Benchmark; +import it.uniroma3.mat.extendedset.intset.ConciseSet; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; @Category({Benchmark.class}) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) diff --git a/extendedset/pom.xml b/extendedset/pom.xml deleted file mode 100755 index eae8bc41e087..000000000000 --- a/extendedset/pom.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - 4.0.0 - - extendedset - extendedset - - Implementation of CONCISE (COmpressed 'N" Composable Integer SEt) bit map compression algorithm by Alessandro - Colantonio with some enhanced features - http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf - - - - io.druid - druid - 0.9.3-SNAPSHOT - - - - - com.google.guava - guava - 16.0.1 - - - - - junit - junit - 4.8.1 - test - - - - diff --git a/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java deleted file mode 100755 index c47eb79bc0df..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java +++ /dev/null @@ -1,1432 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset; - - -import java.util.AbstractCollection; -import java.util.AbstractSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; -import java.util.NoSuchElementException; - -/** - * This class provides a skeletal implementation of the {@link ExtendedSet} - * interface to minimize the effort required to implement this interface. - *

- * The process of implementing a set by extending this class is very similar, - * for example, to that of implementing a {@link Collection} by extending - * {@link AbstractCollection}. - * - * @param the type of elements maintained by this set - * - * @author Alessandro Colantonio - * @version $Id: AbstractExtendedSet.java 157 2011-11-14 14:25:15Z cocciasik $ - */ -public abstract class AbstractExtendedSet extends AbstractSet implements ExtendedSet -{ - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet intersection(Collection other) - { - ExtendedSet clone = clone(); - clone.retainAll(other); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet union(Collection other) - { - ExtendedSet clone = clone(); - clone.addAll(other); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet difference(Collection other) - { - ExtendedSet clone = clone(); - clone.removeAll(other); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet symmetricDifference(Collection other) - { - ExtendedSet res = union(other); - res.removeAll(intersection(other)); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet complemented() - { - ExtendedSet clone = clone(); - clone.complement(); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(Collection other) - { - return other == null || other.isEmpty() || intersectionSize(other) > 0; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(Collection other, int minElements) - { - if (minElements < 1) { - throw new IllegalArgumentException(); - } - return intersectionSize(other) >= minElements; - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(Collection other) - { - if (other == null || other.isEmpty() || isEmpty()) { - return 0; - } - return intersection(other).size(); - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(Collection other) - { - return other == null ? size() : size() + other.size() - intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(Collection other) - { - return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(Collection other) - { - return other == null ? size() : size() - intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - return complemented().size(); - } - - /** - * {@inheritDoc} - */ - @Override - public abstract ExtendedSet empty(); - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet headSet(T toElement) - { - return new ExtendedSubSet(null, toElement); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet subSet(T fromElement, T toElement) - { - return new ExtendedSubSet(fromElement, toElement); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet tailSet(T fromElement) - { - return new ExtendedSubSet(fromElement, null); - } - - /** - * {@inheritDoc} - */ - @Override - public T first() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return iterator().next(); - } - - /** - * {@inheritDoc} - */ - @Override - public T last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return descendingIterator().next(); - } - - /** - * {@inheritDoc} - *

- * NOTE: When overriding this method, please note that - * Object.clone() is much slower then performing - * new and "manually" copying data! - */ - @SuppressWarnings("unchecked") - @Override - public ExtendedSet clone() - { - try { - return (ExtendedSet) super.clone(); - } - catch (CloneNotSupportedException e) { - throw new InternalError(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public abstract double bitmapCompressionRatio(); - - /** - * {@inheritDoc} - */ - @Override - public abstract double collectionCompressionRatio(); - - /** - * {@inheritDoc} - */ - @Override - @SuppressWarnings("unchecked") - public ExtendedIterator descendingIterator() - { - // used to compare items - Comparator tmpComp = AbstractExtendedSet.this.comparator(); - if (tmpComp == null) { - tmpComp = new Comparator() - { - @Override - public int compare(T o1, T o2) - { - return ((Comparable) o1).compareTo(o2); - } - }; - } - final Comparator comp = tmpComp; - - return new ExtendedIterator() - { - // iterator from last element - private final ListIterator itr = new ArrayList(AbstractExtendedSet.this) - .listIterator(AbstractExtendedSet.this.size()); - - @Override - public boolean hasNext() - { - return itr.hasPrevious(); - } - - @Override - public T next() - { - return itr.previous(); - } - - @Override - public void skipAllBefore(T element) - { - // iterate until the element is found - while (itr.hasPrevious()) { - int res = comp.compare(itr.previous(), element); - - // the element has not been found, thus the next call to - // itr.previous() will provide the right value - if (res < 0) { - return; - } - - // the element has been found. Hence, we have to get back - // to make itr.previous() provide the right value - if (res == 0) { - itr.next(); - return; - } - } - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public Iterable descending() - { - return new Iterable() - { - @Override - public Iterator iterator() - { - return descendingIterator(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public List> powerSet() - { - return powerSet(1, Integer.MAX_VALUE); - } - - /** - * {@inheritDoc} - */ - @Override - public List> powerSet(int min, int max) - { - if (min < 1 || max < min) { - throw new IllegalArgumentException(); - } - - // special cases - List> res = new ArrayList>(); - if (size() < min) { - return res; - } - if (size() == min) { - res.add(this.clone()); - return res; - } - if (size() == min + 1) { - for (T item : this.descending()) { - ExtendedSet set = this.clone(); - set.remove(item); - res.add(set); - } - if (max > min) { - res.add(this.clone()); - } - return res; - } - - // the first level contains only one prefix made up of all 1-subsets - List>> level = new ArrayList>>(); - level.add(new ArrayList>()); - for (T item : this) { - ExtendedSet single = this.empty(); - single.add(item); - level.get(0).add(single); - } - if (min == 1) { - res.addAll(level.get(0)); - } - - // all combinations - int l = 2; - while (!level.isEmpty() && l <= max) { - List>> newLevel = new ArrayList>>(); - for (List> prefix : level) { - for (int i = 0; i < prefix.size() - 1; i++) { - List> newPrefix = new ArrayList>(); - for (int j = i + 1; j < prefix.size(); j++) { - ExtendedSet x = prefix.get(i).clone(); - x.add(prefix.get(j).last()); - newPrefix.add(x); - if (l >= min) { - res.add(x); - } - } - if (newPrefix.size() > 1) { - newLevel.add(newPrefix); - } - } - } - level = newLevel; - l++; - } - - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize() - { - return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize(int min, int max) - { - if (min < 1 || max < min) { - throw new IllegalArgumentException(); - } - final int size = size(); - - // special cases - if (size < min) { - return 0; - } - if (size == min) { - return 1; - } - - /* - * Compute the sum of binomial coefficients ranging from (size choose - * max) to (size choose min) using dynamic programming - */ - - // trivial cases - max = Math.min(size, max); - if (max == min && (max == 0 || max == size)) { - return 1; - } - - // compute all binomial coefficients for "n" - int[] b = new int[size + 1]; - for (int i = 0; i <= size; i++) { - b[i] = 1; - } - for (int i = 1; i <= size; i++) { - for (int j = i - 1; j > 0; j--) { - b[j] += b[j - 1]; - } - } - - // sum binomial coefficients - int res = 0; - for (int i = min; i <= max; i++) { - res += b[i]; - } - return res; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public int compareTo(ExtendedSet o) - { - Iterator thisIterator = this.descendingIterator(); - Iterator otherIterator = o.descendingIterator(); - while (thisIterator.hasNext() && otherIterator.hasNext()) { - T thisItem = thisIterator.next(); - T otherItem = otherIterator.next(); - int res = ((Comparable) thisItem).compareTo(otherItem); - if (res != 0) { - return res; - } - } - return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(T from, T to) - { - ExtendedSet toAdd = empty(); - toAdd.add(to); - toAdd.complement(); - toAdd.add(to); - - ExtendedSet toRemove = empty(); - toRemove.add(from); - toRemove.complement(); - - toAdd.removeAll(toRemove); - - this.addAll(toAdd); - } - - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public void clear(T from, T to) - { - ExtendedIterator itr = iterator(); - itr.skipAllBefore(from); - while (itr.hasNext()) { - if (((Comparable) itr.next()).compareTo(to) < 0) { - itr.remove(); - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(T e) - { - if (!add(e)) { - remove(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public T get(int i) - { - int size = size(); - if (i < 0 || i >= size) { - throw new IndexOutOfBoundsException(); - } - - Iterator itr; - if (i < (size / 2)) { - itr = iterator(); - for (int j = 0; j <= i - 1; j++) { - itr.next(); - } - } else { - itr = descendingIterator(); - for (int j = size - 1; j >= i + 1; j--) { - itr.next(); - } - } - return itr.next(); - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(T e) - { - Iterator itr = iterator(); - int i = 0; - while (itr.hasNext()) { - if (itr.next().equals(e)) { - return i; - } - i++; - } - return -1; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet unmodifiable() - { - return new UnmodifiableExtendedSet(); - } - - /** - * {@inheritDoc} - */ - @Override - public abstract ExtendedIterator iterator(); - - /** - * {@inheritDoc} - */ - @Override - public double jaccardSimilarity(ExtendedSet other) - { - if (isEmpty() && other.isEmpty()) { - return 1D; - } - int inters = intersectionSize(other); - return (double) inters / (size() + other.size() - inters); - } - - /** - * {@inheritDoc} - */ - @Override - public double jaccardDistance(ExtendedSet other) - { - return 1D - jaccardSimilarity(other); - } - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardSimilarity(ExtendedSet other) - { - if (isEmpty() && other.isEmpty()) { - return 1D; - } - ExtendedSet inters = intersection(other); - double intersSum = 0D; - for (T t : inters) { - if (t instanceof Integer) { - intersSum += (Integer) t; - } else if (t instanceof Double) { - intersSum += (Double) t; - } else if (t instanceof Float) { - intersSum += (Float) t; - } else if (t instanceof Byte) { - intersSum += (Byte) t; - } else if (t instanceof Long) { - intersSum += (Long) t; - } else if (t instanceof Short) { - intersSum += (Short) t; - } else { - throw new IllegalArgumentException("A collection of numbers is required"); - } - } - - ExtendedSet symmetricDiff = symmetricDifference(other); - double symmetricDiffSum = 0D; - for (T t : symmetricDiff) { - if (t instanceof Integer) { - symmetricDiffSum += (Integer) t; - } else if (t instanceof Double) { - symmetricDiffSum += (Double) t; - } else if (t instanceof Float) { - symmetricDiffSum += (Float) t; - } else if (t instanceof Byte) { - symmetricDiffSum += (Byte) t; - } else if (t instanceof Long) { - symmetricDiffSum += (Long) t; - } else if (t instanceof Short) { - symmetricDiffSum += (Short) t; - } else { - throw new IllegalArgumentException("A collection of numbers is required"); - } - } - - return intersSum / (intersSum + symmetricDiffSum); - } - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardDistance(ExtendedSet other) - { - return 1D - weightedJaccardSimilarity(other); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet convert(Object... e) - { - if (e == null) { - return empty(); - } - return convert(Arrays.asList(e)); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public ExtendedSet convert(Collection c) - { - ExtendedSet res = empty(); - res.addAll((Collection) c); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return toString(); - } - - /** - * Base class for {@link ExtendedSubSet} and {@link UnmodifiableExtendedSet} - */ - protected abstract class FilteredSet implements ExtendedSet - { - /** - * @return the container instance, namely the "internal" representation - */ - protected abstract ExtendedSet raw(); - - /* - * Converter methods that allows for good performances with collection - * operations by directly working on internal representation - */ - @Override - public ExtendedSet convert(Collection c) - { - if (c instanceof AbstractExtendedSet.FilteredSet) { - convert(((AbstractExtendedSet.FilteredSet) c).raw()); - } - return raw().convert(c); - } - - @Override - public ExtendedSet convert(Object... e) - { - return raw().convert(e); - } - - /* - * Methods that directly apply to container instance - */ - @Override - public ExtendedSet clone() {return AbstractExtendedSet.this.clone();} - - @Override - public ExtendedSet empty() {return AbstractExtendedSet.this.empty();} - - @Override - public Comparator comparator() {return AbstractExtendedSet.this.comparator();} - - /* - * Read-only methods - */ - @Override - public ExtendedSet unmodifiable() {return raw().unmodifiable();} - - @Override - public ExtendedIterator iterator() {return raw().iterator();} - - @Override - public ExtendedIterator descendingIterator() {return raw().descendingIterator();} - - @Override - public boolean isEmpty() {return raw().isEmpty();} - - @Override - public boolean equals(Object o) {return raw().equals(o);} - - @Override - public int hashCode() {return raw().hashCode();} - - @Override - public int compareTo(ExtendedSet o) {return raw().compareTo(o);} - - @Override - public T first() {return raw().first();} - - @Override - public T last() {return raw().last();} - - @Override - public double bitmapCompressionRatio() {return raw().bitmapCompressionRatio();} - - @Override - public double collectionCompressionRatio() {return raw().collectionCompressionRatio();} - - @Override - public List> powerSet() {return raw().powerSet();} - - @Override - public List> powerSet(int mins, int maxs) {return raw().powerSet(mins, maxs);} - - @Override - public int powerSetSize() {return raw().powerSetSize();} - - @Override - public int powerSetSize(int mins, int maxs) {return raw().powerSetSize(mins, maxs);} - - @Override - public Object[] toArray() {return raw().toArray();} - - @Override - public X[] toArray(X[] a) {return raw().toArray(a);} - - @Override - public String toString() {return raw().toString();} - - @Override - public ExtendedSet complemented() {return raw().complemented();} - - @Override - public int complementSize() {return raw().complementSize();} - - @Override - public int size() {return raw().size();} - - @Override - public boolean contains(Object o) {return raw().contains(o);} - - @Override - public Iterable descending() {return raw().descending();} - - @Override - public String debugInfo() {return raw().debugInfo();} - - @Override - public T get(int i) {return raw().get(i);} - - @Override - public int indexOf(T e) {return raw().indexOf(e);} - - /* - * Methods that requires a call to convert() to assure good performances - */ - @Override - public double jaccardDistance(ExtendedSet other) {return raw().jaccardDistance(convert(other));} - - @Override - public double jaccardSimilarity(ExtendedSet other) {return raw().jaccardSimilarity(convert(other));} - - @Override - public double weightedJaccardDistance(ExtendedSet other) {return raw().weightedJaccardDistance(convert(other));} - - @Override - public double weightedJaccardSimilarity(ExtendedSet other) {return raw().weightedJaccardSimilarity(convert(other));} - - @Override - public ExtendedSet difference(Collection other) {return raw().difference(convert(other));} - - @Override - public ExtendedSet symmetricDifference(Collection other) - { - return raw().symmetricDifference(convert(other)); - } - - @Override - public ExtendedSet intersection(Collection other) {return raw().intersection(convert(other));} - - @Override - public ExtendedSet union(Collection other) {return raw().union(convert(other));} - - @Override - public int intersectionSize(Collection other) {return raw().intersectionSize(convert(other));} - - @Override - public int differenceSize(Collection other) {return raw().differenceSize(convert(other));} - - @Override - public int unionSize(Collection other) {return raw().unionSize(convert(other));} - - @Override - public int symmetricDifferenceSize(Collection other) - { - return raw().symmetricDifferenceSize(convert(other)); - } - - @Override - public boolean containsAll(Collection c) {return raw().containsAll(convert(c));} - - @Override - public boolean containsAny(Collection other) {return raw().containsAny(convert(other));} - - @Override - public boolean containsAtLeast( - Collection other, - int minElements - ) - {return raw().containsAtLeast(convert(other), minElements);} - } - - /** - * Read-only view of the set. - *

- * Note that it extends {@link AbstractExtendedSet} instead of implementing - * {@link ExtendedSet} because of the methods {@link #tailSet(Object)}, - * {@link #headSet(Object)}, and {@link #subSet(Object, Object)}. - */ - protected class UnmodifiableExtendedSet extends AbstractExtendedSet.FilteredSet - { - // exception message when writing operations are performed on {@link #unmodifiable()} - private final static String UNSUPPORTED_MSG = "The class is read-only!"; - - /* - * Unsupported writing methods - */ - @Override - public boolean add(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public boolean addAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public boolean remove(Object o) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public boolean removeAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public boolean retainAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public void clear() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public void clear(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public void fill(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public void complement() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - @Override - public void flip(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - - /* - * Special purpose methods - */ - - // create new iterators where the remove() operation is not permitted - @Override - public ExtendedIterator iterator() - { - final ExtendedIterator itr = AbstractExtendedSet.this.iterator(); - return new ExtendedIterator() - { - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public T next() {return itr.next();} - - @Override - public void skipAllBefore(T element) {itr.skipAllBefore(element);} - - @Override - public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - }; - } - - @Override - public ExtendedIterator descendingIterator() - { - final ExtendedIterator itr = AbstractExtendedSet.this.descendingIterator(); - return new ExtendedIterator() - { - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public T next() {return itr.next();} - - @Override - public void skipAllBefore(T element) {itr.skipAllBefore(element);} - - @Override - public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} - }; - } - - /** - * Returns a read-only subset - */ - // TODO: There is a known bug. Indeed, this implementation does not work - // since modifications to the read-write set are not reflected to the - // read-only set. - private ExtendedSet unmodifiableSubSet(T min, T max) - { - ExtendedSet res; - ExtendedSet range = AbstractExtendedSet.this.empty(); - if (min != null && max != null) { - range.fill(min, max); - range.remove(max); - res = AbstractExtendedSet.this.intersection(range).unmodifiable(); - } else if (max != null) { - range.add(max); - range.complement(); - res = AbstractExtendedSet.this.intersection(range).unmodifiable(); - } else { - range.add(min); - range.complement(); - res = AbstractExtendedSet.this.difference(range).unmodifiable(); - } - return res; - } - - // subset operations must be read-only - @Override - public ExtendedSet headSet(T toElement) {return unmodifiableSubSet(null, toElement);} - - @Override - public ExtendedSet subSet(T fromElement, T toElement) {return unmodifiableSubSet(fromElement, toElement);} - - @Override - public ExtendedSet tailSet(T fromElement) {return unmodifiableSubSet(fromElement, null);} - - @Override - public ExtendedSet unmodifiable() - { - // useless to create another instance - return this; - } - - @Override - protected ExtendedSet raw() - { - return AbstractExtendedSet.this; - } - } - - /** - * Used by {@link AbstractExtendedSet#headSet(T)} , {@link AbstractExtendedSet#tailSet(T)} and {@link AbstractExtendedSet#subSet(T, T)} to offer a restricted view of the entire set - */ - protected class ExtendedSubSet extends AbstractExtendedSet.FilteredSet - { - /** - * Minimun allowed element (included) and maximum allowed element - * (excluded) - */ - private final T min; - - /** - * Minimun allowed element (included) and maximum allowed element - * (excluded) - */ - private final T max; - - /** - * When max != null, it contains all elements from {@link #min} to {@link #max} - 1. Otherwise, it contains all the elements strictly below {@link #min} - * - * @uml.property name="range" - * @uml.associationEnd - */ - private final ExtendedSet range; - /** - * Comparator for elements of type T - */ - private final Comparator localComparator; - - - - /* - * PRIVATE UTILITY METHODS - */ - - // initialize the comparator - { - final Comparator c = AbstractExtendedSet.this.comparator(); - if (c != null) { - localComparator = c; - } else { - localComparator = new Comparator() - { - @SuppressWarnings("unchecked") - @Override - public int compare(T o1, T o2) - { - return ((Comparable) o1).compareTo(o2); - } - }; - } - } - - /** - * Creates the subset - * - * @param min minimun allowed element (included) - * @param max maximum allowed element (excluded) - */ - public ExtendedSubSet(T min, T max) - { - if (min == null && max == null) { - throw new IllegalArgumentException(); - } - - if (min != null && max != null - && localComparator.compare(min, max) > 0) { - throw new IllegalArgumentException("min > max"); - } - - this.min = min; - this.max = max; - - // add all elements that are strictly less than "max" - range = AbstractExtendedSet.this.empty(); - if (min != null && max != null) { - range.fill(min, max); - range.remove(max); - } else if (max != null) { - range.add(max); - range.complement(); - } else { - range.add(min); - range.complement(); - } - } - - /** - * Checks if a given set is completely contained within {@link #min} and - * {@link #max} - * - * @param other given set - * - * @return true if the given set is completely contained - * within {@link #min} and {@link #max} - */ - private boolean isInRange(ExtendedSet other) - { - return other.isEmpty() || - ((max == null || localComparator.compare(other.last(), max) < 0) - && (min == null || localComparator.compare(other.first(), min) >= 0)); - } - - /** - * Checks if a given element is completely contained within {@link #min} - * and {@link #max} - * - * @param e given element - * - * @return true if the given element is completely - * contained within {@link #min} and {@link #max} - */ - @SuppressWarnings("unchecked") - private boolean isInRange(Object e) - { - return (max == null || localComparator.compare((T) e, max) < 0) - && (min == null || localComparator.compare((T) e, min) >= 0); - } - - /** - * Generates a set that represent a subview of the given set, namely - * elements from {@link #min} (included) to {@link #max} (excluded) - * - * @param toFilter given set - * - * @return the subview - */ - private ExtendedSet filter(ExtendedSet toFilter) - { - if (isInRange(toFilter)) { - return toFilter; - } - if (max != null) { - return toFilter.intersection(range); - } - return toFilter.difference(range); - } - - - @Override - protected ExtendedSet raw() - { - return filter(AbstractExtendedSet.this); - } - - - - /* - * PUBLIC METHODS - */ - - @Override - public ExtendedSet headSet(T toElement) - { - if (localComparator.compare(toElement, max) > 0) { - throw new IllegalArgumentException(); - } - return AbstractExtendedSet.this.new ExtendedSubSet(min, toElement); - } - - @Override - public ExtendedSet subSet(T fromElement, T toElement) - { - if (localComparator.compare(fromElement, min) < 0 - || localComparator.compare(toElement, max) > 0) { - throw new IllegalArgumentException(); - } - return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, toElement); - } - - @Override - public ExtendedSet tailSet(T fromElement) - { - if (localComparator.compare(fromElement, min) < 0) { - throw new IllegalArgumentException(); - } - return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, max); - } - - @Override - public boolean addAll(Collection c) - { - if (c == null) { - return false; - } - ExtendedSet other = convert(c); - if (!isInRange(other)) { - throw new IllegalArgumentException(); - } - return AbstractExtendedSet.this.addAll(other); - } - - @Override - public boolean removeAll(Collection c) - { - if (c == null) { - return false; - } - return AbstractExtendedSet.this.removeAll(filter(convert(c))); - } - - @Override - public boolean retainAll(Collection c) - { - if (c == null) { - return false; - } - ExtendedSet other = convert(c); - - if (isInRange(AbstractExtendedSet.this)) { - return AbstractExtendedSet.this.retainAll(other); - } - - int sizeBefore = AbstractExtendedSet.this.size(); - ExtendedSet res = AbstractExtendedSet.this.intersection(other); - clear(); - AbstractExtendedSet.this.addAll(res); - return AbstractExtendedSet.this.size() != sizeBefore; - } - - @Override - public boolean containsAll(Collection c) - { - if (c == null) { - return false; - } - ExtendedSet other = convert(c); - return isInRange(other) && AbstractExtendedSet.this.containsAll(other); - } - - @Override - public boolean add(T e) - { - if (!isInRange(e)) { - throw new IllegalArgumentException(); - } - return AbstractExtendedSet.this.add(e); - } - - @Override - public void clear() - { - if (isInRange(AbstractExtendedSet.this)) { - AbstractExtendedSet.this.clear(); - } else if (max != null) { - AbstractExtendedSet.this.removeAll(range); - } else { - AbstractExtendedSet.this.retainAll(range); - } - } - - @Override - public boolean contains(Object o) - { - return o != null && isInRange(o) && AbstractExtendedSet.this.contains(o); - } - - @Override - public boolean remove(Object o) - { - return o != null && isInRange(o) && AbstractExtendedSet.this.remove(o); - } - - @Override - public int size() - { - if (isInRange(AbstractExtendedSet.this)) { - return AbstractExtendedSet.this.size(); - } - if (max != null) { - return AbstractExtendedSet.this.intersectionSize(range); - } - return AbstractExtendedSet.this.differenceSize(range); - } - - @Override - public void complement() - { - ExtendedSet c = complemented(); - clear(); - AbstractExtendedSet.this.addAll(c); - } - - @Override - public int complementSize() - { - return complemented().size(); - } - - @Override - public ExtendedSet complemented() - { - return filter(raw().complemented()); - } - - @Override - public String debugInfo() - { - return String.format("min = %s, max = %s\nmask = %s\nelements = %s", - min.toString(), max.toString(), range.debugInfo(), AbstractExtendedSet.this.toString() - ); - } - - @Override - public void clear(T from, T to) - { - ExtendedSet toRemove = empty(); - toRemove.fill(from, to); - removeAll(toRemove); - } - - @Override - public boolean containsAny(Collection other) - { - return AbstractExtendedSet.this.containsAny(filter(convert(other))); - } - - @Override - public boolean containsAtLeast(Collection other, int minElements) - { - return AbstractExtendedSet.this.containsAtLeast(filter(convert(other)), minElements); - } - - @Override - public Iterable descending() - { - return new Iterable() - { - @Override - public Iterator iterator() - { - return descendingIterator(); - } - }; - } - - @Override - public void fill(T from, T to) - { - if (!isInRange(from) || !isInRange(to)) { - throw new IllegalArgumentException(); - } - AbstractExtendedSet.this.fill(from, to); - } - - @Override - public void flip(T e) - { - if (!isInRange(e)) { - throw new IllegalArgumentException(); - } - AbstractExtendedSet.this.flip(e); - } - - @Override - public T get(int i) - { - int minIndex = 0; - if (min != null) { - minIndex = AbstractExtendedSet.this.indexOf(min); - } - T r = AbstractExtendedSet.this.get(minIndex + i); - if (!isInRange(r)) { - throw new IllegalArgumentException(); - } - return r; - } - - @Override - public int indexOf(T e) - { - if (!isInRange(e)) { - throw new IllegalArgumentException(); - } - int minIndex = 0; - if (min != null) { - minIndex = AbstractExtendedSet.this.indexOf(min); - } - return AbstractExtendedSet.this.indexOf(e) - minIndex; - } - - @Override - public ExtendedSet clone() - { - return raw(); - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java deleted file mode 100755 index beaa52368adc..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java +++ /dev/null @@ -1,592 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset; - - -import io.druid.extendedset.intset.ArraySet; -import io.druid.extendedset.intset.IntSet; -import io.druid.extendedset.wrappers.GenericExtendedSet; -import io.druid.extendedset.wrappers.IndexedSet; -import io.druid.extendedset.wrappers.IntegerSet; -import io.druid.extendedset.wrappers.LongSet; -import io.druid.extendedset.wrappers.matrix.PairSet; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.SortedSet; - -/** - * An interface which extends {@link SortedSet} by adding - * intersection/union/difference and other set operations. - * - * @param the type of elements maintained by this set - * - * @author Alessandro Colantonio - * @version $Id: ExtendedSet.java 140 2011-02-07 21:30:29Z cocciasik $ - * @see AbstractExtendedSet - * @see IndexedSet - * @see GenericExtendedSet - * @see ArraySet - * @see IntegerSet - * @see LongSet - * @see PairSet - */ -public interface ExtendedSet extends SortedSet, Cloneable, Comparable> -{ - /** - * Generates the intersection set - * - * @param other {@link ExtendedSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #retainAll(java.util.Collection) - */ - public ExtendedSet intersection(Collection other); - - /** - * Generates the union set - * - * @param other {@link ExtendedSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #addAll(java.util.Collection) - */ - public ExtendedSet union(Collection other); - - /** - * Generates the difference set - * - * @param other {@link ExtendedSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #removeAll(java.util.Collection) - */ - public ExtendedSet difference(Collection other); - - /** - * Generates the symmetric difference set - * - * @param other {@link ExtendedSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #flip(Object) - */ - public ExtendedSet symmetricDifference(Collection other); - - /** - * Generates the complement set. The returned set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @return the complement set - * - * @see ExtendedSet#complement() - */ - public ExtendedSet complemented(); - - /** - * Complements the current set. The modified set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @see ExtendedSet#complemented() - */ - public void complement(); - - /** - * Returns true if the specified {@link Collection} instance - * contains any elements that are also contained within this - * {@link ExtendedSet} instance - * - * @param other {@link ExtendedSet} to intersect with - * - * @return a boolean indicating whether this {@link ExtendedSet} intersects - * the specified {@link ExtendedSet}. - */ - public boolean containsAny(Collection other); - - /** - * Returns true if the specified {@link Collection} instance - * contains at least minElements elements that are also - * contained within this {@link ExtendedSet} instance - * - * @param other {@link Collection} instance to intersect with - * @param minElements minimum number of elements to be contained within this - * {@link ExtendedSet} instance - * - * @return a boolean indicating whether this {@link ExtendedSet} intersects - * the specified {@link Collection}. - * - * @throws IllegalArgumentException if minElements < 1 - */ - public boolean containsAtLeast(Collection other, int minElements); - - /** - * Computes the intersection set size. - *

- * This is faster than calling {@link #intersection(Collection)} and - * then {@link #size()} - * - * @param other {@link Collection} instance that represents the right - * operand - * - * @return the size - */ - public int intersectionSize(Collection other); - - /** - * Computes the union set size. - *

- * This is faster than calling {@link #union(Collection)} and then - * {@link #size()} - * - * @param other {@link Collection} instance that represents the right - * operand - * - * @return the size - */ - public int unionSize(Collection other); - - /** - * Computes the symmetric difference set size. - *

- * This is faster than calling - * {@link #symmetricDifference(Collection)} and then {@link #size()} - * - * @param other {@link Collection} instance that represents the right - * operand - * - * @return the size - */ - public int symmetricDifferenceSize(Collection other); - - /** - * Computes the difference set size. - *

- * This is faster than calling {@link #difference(Collection)} and - * then {@link #size()} - * - * @param other {@link Collection} instance that represents the right - * operand - * - * @return the size - */ - public int differenceSize(Collection other); - - /** - * Computes the complement set size. - *

- * This is faster than calling {@link #complemented()} and then - * {@link #size()} - * - * @return the size - */ - public int complementSize(); - - /** - * Generates an empty set - * - * @return the empty set - */ - public ExtendedSet empty(); - - /** - * See the clone() of {@link Object} - * - * @return cloned object - */ - public ExtendedSet clone(); - - /** - * Computes the compression factor of the equivalent bitmap representation - * (1 means not compressed, namely a memory footprint similar to - * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) - * - * @return the compression factor - */ - public double bitmapCompressionRatio(); - - /** - * Computes the compression factor of the equivalent integer collection (1 - * means not compressed, namely a memory footprint similar to - * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) - * - * @return the compression factor - */ - public double collectionCompressionRatio(); - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator iterator(); - - /** - * Gets the descending order iterator over the elements of type - * T - * - * @return descending iterator - */ - public ExtendedIterator descendingIterator(); - - /** - * Allows to use the Java "for-each" statement in descending order - * - * @return {@link Iterable} instance to iterate items in descending - * order - */ - public Iterable descending(); - - /** - * Computes the power-set of the current set. - *

- * It is a particular implementation of the algorithm Apriori (see: - * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - * Association Rules in Large Databases, in Proceedings of the - * 20th International Conference on Very Large Data Bases, - * p.487-499, 1994). The returned power-set does not contain the - * empty set. - *

- * The subsets composing the powerset are returned in a list that is sorted - * according to the lexicographical order provided by the sorted set. - * - * @return the power-set - * - * @see #powerSet(int, int) - * @see #powerSetSize() - */ - public List> powerSet(); - - /** - * Computes a subset of the power-set of the current set, composed by those - * subsets that have cardinality between min and - * max. - *

- * It is a particular implementation of the algorithm Apriori (see: - * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - * Association Rules in Large Databases, in Proceedings of the - * 20th International Conference on Very Large Data Bases, - * p.487-499, 1994). The power-set does not contains the empty set. - *

- * The subsets composing the powerset are returned in a list that is sorted - * according to the lexicographical order provided by the sorted set. - * - * @param min minimum subset size (greater than zero) - * @param max maximum subset size - * - * @return the power-set - * - * @see #powerSet() - * @see #powerSetSize(int, int) - */ - public List> powerSet(int min, int max); - - /** - * Computes the power-set size of the current set. - *

- * The power-set does not contains the empty set. - * - * @return the power-set size - * - * @see #powerSet() - */ - public int powerSetSize(); - - /** - * Computes the power-set size of the current set, composed by those subsets - * that have cardinality between min and max. - *

- * The returned power-set does not contain the empty set. - * - * @param min minimum subset size (greater than zero) - * @param max maximum subset size - * - * @return the power-set size - * - * @see #powerSet(int, int) - */ - public int powerSetSize(int min, int max); - - /** - * Prints debug info about the given {@link ExtendedSet} implementation - * - * @return a string that describes the internal representation of the - * instance - */ - public String debugInfo(); - - /** - * Adds to the set all the elements between first and - * last, both included. It supposes that there is an ordering - * of the elements of type T and that the universe of all - * possible elements is known. - * - * @param from first element - * @param to last element - */ - public void fill(T from, T to); - - /** - * Removes from the set all the elements between first and - * last, both included. It supposes that there is an ordering - * of the elements of type T and that the universe of all - * possible elements is known. - * - * @param from first element - * @param to last element - */ - public void clear(T from, T to); - - /** - * Adds the element if it not existing, or removes it if existing - * - * @param e element to flip - * - * @see #symmetricDifference(Collection) - */ - public void flip(T e); - - /** - * Gets the read-only version of the current set - * - * @return the read-only version of the current set - */ - public ExtendedSet unmodifiable(); - - /** - * Gets the ith element of the set - * - * @param i position of the element in the sorted set - * - * @return the ith element of the set - * - * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to - * {@link #size()} - */ - public T get(int i); - - /** - * Provides position of element within the set. - *

- * It returns -1 if the element does not exist within the set. - * - * @param e element of the set - * - * @return the element position - */ - public int indexOf(T e); - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet tailSet(T fromElement); - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet headSet(T toElement); - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet subSet(T fromElement, T toElement); - - /** - * Converts a given {@link Collection} instance into an instance of the - * current class. NOTE: when the collection is already an instance of - * the current class, the method returns the collection itself. - * - * @param c collection to use to generate the new instance - * - * @return the converted collection - * - * @see #convert(Object...) - */ - public ExtendedSet convert(Collection c); - - /** - * Converts a given integer array into an instance of the current class - * - * @param e objects to use to generate the new instance - * - * @return the converted collection - * - * @see #convert(Collection) - */ - public ExtendedSet convert(Object... e); - - /** - * Computes the Jaccard similarity coefficient between this set and the - * given set. - *

- * The coefficient is defined as - * |A intersection B| / |A union B|. - * - * @param other the other set - * - * @return the Jaccard similarity coefficient - * - * @see #jaccardDistance(ExtendedSet) - */ - public double jaccardSimilarity(ExtendedSet other); - - /** - * Computes the Jaccard distance between this set and the given set. - *

- * The coefficient is defined as - * 1 - {@link #jaccardSimilarity(ExtendedSet)}. - * - * @param other the other set - * - * @return the Jaccard distance - * - * @see #jaccardSimilarity(ExtendedSet) - */ - public double jaccardDistance(ExtendedSet other); - - /** - * Computes the weighted version of the Jaccard similarity coefficient - * between this set and the given set. - *

- * The coefficient is defined as - * sum of min(A_i, B_i) / sum of max(A_i, B_i). - *

- * NOTE: T must be a number, namely one of - * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, - * {@link Long}, {@link Short}. - * - * @param other the other set - * - * @return the weighted Jaccard similarity coefficient - * - * @throws IllegalArgumentException if T is not a number - * @see #weightedJaccardDistance(ExtendedSet) - */ - public double weightedJaccardSimilarity(ExtendedSet other); - - /** - * Computes the weighted version of the Jaccard distance between this set - * and the given set. - *

- * The coefficient is defined as 1 - - * {@link #weightedJaccardSimilarity(ExtendedSet)}. - *

- * NOTE: T must be a number, namely one of - * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, - * {@link Long}, {@link Short}. - * - * @param other the other set - * - * @return the weighted Jaccard distance - * - * @throws IllegalArgumentException if T is not a number - * @see #weightedJaccardSimilarity(ExtendedSet) - */ - public double weightedJaccardDistance(ExtendedSet other); - - /** - * Compares this object with the specified object for order. Returns a - * negative integer, zero, or a positive integer as this object is less - * than, equal to, or greater than the specified object. An {@link IntSet} - * instance A is less than another {@link IntSet} instance - * B if B-A (that is, the elements in - * B that are not contained in A) contains at - * least one element that is greater than all the elements in - * A-B. - *

- *

- * The implementor must ensure sgn(x.compareTo(y)) == - * -sgn(y.compareTo(x)) for all x and y. (This - * implies that x.compareTo(y) must throw an exception iff - * y.compareTo(x) throws an exception.) - *

- *

- * The implementor must also ensure that the relation is transitive: - * (x.compareTo(y)>0 && y.compareTo(z)>0) implies - * x.compareTo(z)>0. - *

- *

- * Finally, the implementor must ensure that x.compareTo(y)==0 - * implies that sgn(x.compareTo(z)) == sgn(y.compareTo(z)), for all - * z. - *

- *

- * It is strongly recommended, but not strictly required that - * (x.compareTo(y)==0) == (x.equals(y)). Generally speaking, any - * class that implements the Comparable interface and violates this - * condition should clearly indicate this fact. The recommended language is - * "Note: this class has a natural ordering that is inconsistent with - * equals." - *

- *

- * In the foregoing description, the notation sgn(expression - * ) designates the mathematical signum function, which is - * defined to return one of -1, 0, or 1 according - * to whether the value of expression is negative, zero or positive. - * - * @param o the object to be compared. - * - * @return a negative integer, zero, or a positive integer as this object is - * less than, equal to, or greater than the specified object. - * - * @throws ClassCastException if the specified object's type prevents it from being - * compared to this object. - */ - @Override - public int compareTo(ExtendedSet o); - - /** - * Extended version of the {@link Iterator} interface that allows to "skip" - * some elements of the set - * - * @param the type of elements maintained by this set - */ - public interface ExtendedIterator extends Iterator - { - /** - * Skips all the elements before the the specified element, so that - * {@link Iterator#next()} gives the given element or, if it does not - * exist, the element immediately after according to the sorting - * provided by this {@link SortedSet} instance. - *

- * If element is less than the next element, it does - * nothing - * - * @param element first element to not skip - */ - public void skipAllBefore(X element); - } -} - - diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java deleted file mode 100755 index 48805215ee18..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java +++ /dev/null @@ -1,744 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.intset; - - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.NoSuchElementException; - -/** - * This class provides a skeletal implementation of the {@link IntSet} - * interface to minimize the effort required to implement this interface. - * - * @author Alessandro Colantonio - * @version $Id: AbstractIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ - */ -public abstract class AbstractIntSet implements IntSet -{ - /** - * {@inheritDoc} - */ - @Override - public IntSet union(IntSet other) - { - IntSet res = clone(); - res.addAll(other); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet difference(IntSet other) - { - IntSet res = clone(); - res.removeAll(other); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet intersection(IntSet other) - { - IntSet res = clone(); - res.retainAll(other); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet symmetricDifference(IntSet c) - { - IntSet res = clone(); - IntIterator itr = c.iterator(); - while (itr.hasNext()) { - res.flip(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet complemented() - { - IntSet res = clone(); - res.complement(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - if (isEmpty()) { - return; - } - for (int e = last(); e >= 0; e--) { - flip(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - IntIterator itr = c.iterator(); - boolean res = true; - while (res && itr.hasNext()) { - res &= contains(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet c) - { - IntIterator itr = c.iterator(); - boolean res = true; - while (res && itr.hasNext()) { - if (contains(itr.next())) { - return true; - } - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet c, int minElements) - { - IntIterator itr = c.iterator(); - while (minElements > 0 && itr.hasNext()) { - if (contains(itr.next())) { - minElements--; - } - } - return minElements == 0; - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet c) - { - int res = 0; - IntIterator itr = c.iterator(); - while (itr.hasNext()) { - if (contains(itr.next())) { - res++; - } - } - return res; - - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(IntSet other) - { - return other == null ? size() : size() + other.size() - intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(IntSet other) - { - return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(IntSet other) - { - return other == null ? size() : size() - intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - if (isEmpty()) { - return 0; - } - return last() - size() + 1; - } - - /** - * {@inheritDoc} - */ - @Override - public abstract IntSet empty(); - - /** - * {@inheritDoc} - */ - @Override - public abstract IntSet clone(); - - /** - * {@inheritDoc} - */ - @Override - public abstract double bitmapCompressionRatio(); - - /** - * {@inheritDoc} - */ - @Override - public abstract double collectionCompressionRatio(); - - /** - * {@inheritDoc} - */ - @Override - public abstract IntIterator iterator(); - - /** - * {@inheritDoc} - */ - @Override - public abstract IntIterator descendingIterator(); - - /** - * {@inheritDoc} - */ - @Override - public abstract String debugInfo(); - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - IntIterator itr = iterator(); - while (itr.hasNext()) { - itr.next(); - itr.remove(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(int from, int to) - { - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - for (int e = from; e <= to; e++) { - remove(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(int from, int to) - { - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - for (int e = from; e <= to; e++) { - add(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(int e) - { - if (!add(e)) { - remove(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public abstract int get(int i); - - /** - * {@inheritDoc} - */ - @Override - public abstract int indexOf(int e); - - /** - * {@inheritDoc} - */ - @Override - public abstract IntSet convert(int... a); - - /** - * {@inheritDoc} - */ - @Override - public abstract IntSet convert(Collection c); - - /** - * {@inheritDoc} - */ - @Override - public int first() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return iterator().next(); - } - - /** - * {@inheritDoc} - */ - @Override - public abstract int last(); - - /** - * {@inheritDoc} - */ - @Override - public abstract int size(); - - /** - * {@inheritDoc} - */ - @Override - public abstract boolean isEmpty(); - - /** - * {@inheritDoc} - */ - @Override - public abstract boolean contains(int i); - - /** - * {@inheritDoc} - */ - @Override - public abstract boolean add(int i); - - /** - * {@inheritDoc} - */ - @Override - public abstract boolean remove(int i); - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - IntIterator itr = c.iterator(); - boolean res = false; - while (itr.hasNext()) { - res |= add(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - IntIterator itr = c.iterator(); - boolean res = false; - while (itr.hasNext()) { - res |= remove(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - IntIterator itr = iterator(); - boolean res = false; - while (itr.hasNext()) { - int e = itr.next(); - if (!c.contains(e)) { - res = true; - itr.remove(); - } - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int[] toArray() - { - if (isEmpty()) { - return null; - } - return toArray(new int[size()]); - } - - /** - * {@inheritDoc} - */ - @Override - public int[] toArray(int[] a) - { - if (a.length < size()) { - a = new int[size()]; - } - IntIterator itr = iterator(); - int i = 0; - while (itr.hasNext()) { - a[i++] = itr.next(); - } - for (; i < a.length; i++) { - a[i] = 0; - } - return a; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - IntIterator itr = iterator(); - if (!itr.hasNext()) { - return "[]"; - } - - StringBuilder sb = new StringBuilder(); - sb.append('['); - for (; ; ) { - int e = itr.next(); - sb.append(e); - if (!itr.hasNext()) { - return sb.append(']').toString(); - } - sb.append(", "); - } - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(IntSet o) - { - IntIterator thisIterator = this.descendingIterator(); - IntIterator otherIterator = o.descendingIterator(); - while (thisIterator.hasNext() && otherIterator.hasNext()) { - int thisItem = thisIterator.next(); - int otherItem = otherIterator.next(); - if (thisItem < otherItem) { - return -1; - } - if (thisItem > otherItem) { - return 1; - } - } - return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); - } - - /** - * {@inheritDoc} - */ - @Override - public List powerSet() - { - return powerSet(1, Integer.MAX_VALUE); - } - - /** - * {@inheritDoc} - */ - @Override - public List powerSet(int min, int max) - { - if (min < 1 || max < min) { - throw new IllegalArgumentException(); - } - - // special cases - List res = new ArrayList(); - if (size() < min) { - return res; - } - if (size() == min) { - res.add(clone()); - return res; - } - if (size() == min + 1) { - IntIterator itr = descendingIterator(); - while (itr.hasNext()) { - IntSet set = clone(); - set.remove(itr.next()); - res.add(set); - } - if (max > min) { - res.add(clone()); - } - return res; - } - - // the first level contains only one prefix made up of all 1-subsets - List> level = new ArrayList>(); - level.add(new ArrayList()); - IntIterator itr = iterator(); - while (itr.hasNext()) { - IntSet single = empty(); - single.add(itr.next()); - level.get(0).add(single); - } - if (min == 1) { - res.addAll(level.get(0)); - } - - // all combinations - int lvl = 2; - while (!level.isEmpty() && lvl <= max) { - List> newLevel = new ArrayList>(); - for (List prefix : level) { - for (int i = 0; i < prefix.size() - 1; i++) { - List newPrefix = new ArrayList(); - for (int j = i + 1; j < prefix.size(); j++) { - IntSet x = prefix.get(i).clone(); - x.add(prefix.get(j).last()); - newPrefix.add(x); - if (lvl >= min) { - res.add(x); - } - } - if (newPrefix.size() > 1) { - newLevel.add(newPrefix); - } - } - } - level = newLevel; - lvl++; - } - - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize() - { - return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize(int min, int max) - { - if (min < 1 || max < min) { - throw new IllegalArgumentException(); - } - final int size = size(); - - // special cases - if (size < min) { - return 0; - } - if (size == min) { - return 1; - } - - /* - * Compute the sum of binomial coefficients ranging from (size choose - * max) to (size choose min) using dynamic programming - */ - - // trivial cases - max = Math.min(size, max); - if (max == min && (max == 0 || max == size)) { - return 1; - } - - // compute all binomial coefficients for "n" - int[] b = new int[size + 1]; - for (int i = 0; i <= size; i++) { - b[i] = 1; - } - for (int i = 1; i <= size; i++) { - for (int j = i - 1; j > 0; j--) { - b[j] += b[j - 1]; - } - } - - // sum binomial coefficients - int res = 0; - for (int i = min; i <= max; i++) { - res += b[i]; - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public double jaccardSimilarity(IntSet other) - { - if (isEmpty() && other.isEmpty()) { - return 1D; - } - int inters = intersectionSize(other); - return (double) inters / (size() + other.size() - inters); - } - - /** - * {@inheritDoc} - */ - @Override - public double jaccardDistance(IntSet other) - { - return 1D - jaccardSimilarity(other); - } - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardSimilarity(IntSet other) - { - if (isEmpty() && other.isEmpty()) { - return 1D; - } - IntIterator itr = intersection(other).iterator(); - double intersectionSum = 0D; - while (itr.hasNext()) { - intersectionSum += itr.next(); - } - - itr = symmetricDifference(other).iterator(); - double symmetricDifferenceSum = 0D; - while (itr.hasNext()) { - symmetricDifferenceSum += itr.next(); - } - - return intersectionSum / (intersectionSum + symmetricDifferenceSum); - } - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardDistance(IntSet other) - { - return 1D - weightedJaccardSimilarity(other); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - // special cases - if (this == obj) { - return true; - } - if (!(obj instanceof IntSet)) { - return false; - } - if (size() != ((IntSet) obj).size()) { - return false; - } - - // compare all the integrals, according to their natural order - IntIterator itr1 = iterator(); - IntIterator itr2 = ((IntSet) obj).iterator(); - while (itr1.hasNext()) { - if (itr1.next() != itr2.next()) { - return false; - } - } - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - if (isEmpty()) { - return 0; - } - int h = 1; - IntIterator itr = iterator(); - if (!itr.hasNext()) { - h = (h << 5) - h + itr.next(); - } - return h; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java deleted file mode 100755 index aee867240a9a..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java +++ /dev/null @@ -1,1157 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.intset; - - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.SortedSet; - -/** - * {@link IntSet}-based class internally managed by a sorted array of - * ints. - * - * @author Alessandro Colantonio - * @version $Id: ArraySet.java 156 2011-09-01 00:13:57Z cocciasik $ - */ -public class ArraySet extends AbstractIntSet -{ - /** - * elements of the set - */ - private int[] elements; - - /** - * set cardinality - */ - private int size; - - /** - * Empty-set constructor - */ - public ArraySet() - { - size = 0; - elements = null; - } - - /** - * Replace the content of the current instance with the content of another - * instance - * - * @param other - */ - private void replaceWith(ArraySet other) - { - size = other.size; - elements = other.elements; - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return size() / Math.ceil(elements[size - 1] / 32D); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return isEmpty() ? 0D : 1D; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet empty() - { - return new ArraySet(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator iterator() - { - return new IntIterator() - { - int next = 0; - - @Override - public void skipAllBefore(int e) - { - if (e <= elements[next]) { - return; - } - next = Arrays.binarySearch(elements, next + 1, size, e); - if (next < 0) { - next = -(next + 1); - } - } - - @Override - public boolean hasNext() - { - return next < size; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return elements[next++]; - } - - @Override - public void remove() - { - next--; - size--; - System.arraycopy(elements, next + 1, elements, next, size - next); - compact(); - } - - @Override - public IntIterator clone() - { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator descendingIterator() - { - return new IntIterator() - { - int next = size - 1; - - @Override - public void skipAllBefore(int e) - { - if (e >= elements[next]) { - return; - } - next = Arrays.binarySearch(elements, 0, next, e); - if (next < 0) { - next = -(next + 1) - 1; - } - } - - @Override - public boolean hasNext() - { - return next >= 0; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return elements[next--]; - } - - @Override - public void remove() - { - next++; - size--; - System.arraycopy(elements, next + 1, elements, next, size - next); - compact(); - } - - @Override - public IntIterator clone() - { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - ArraySet c = empty(); - if (!isEmpty()) { - c.elements = Arrays.copyOf(elements, elements.length); - c.size = size; - } - return c; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return toString(); - } - - /** - * Assures that the size of {@link #elements} is sufficient to contain - * {@link #size} elements. - */ - private void ensureCapacity() - { - int capacity = elements == null ? 0 : elements.length; - if (capacity >= size) { - return; - } - capacity = Math.max(capacity << 1, size); - - if (elements == null) { - // nothing to copy - elements = new int[capacity]; - return; - } - elements = Arrays.copyOf(elements, capacity); - } - - /** - * Removes unused allocated words at the end of {@link #words} only when they - * are more than twice of the needed space - */ - private void compact() - { - if (size == 0) { - elements = null; - return; - } - if (elements != null && (size << 1) < elements.length) { - elements = Arrays.copyOf(elements, size); - } - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(int element) - { - // append - if (isEmpty() || elements[size - 1] < element) { - size++; - ensureCapacity(); - elements[size - 1] = element; - return true; - } - - // insert - int pos = Arrays.binarySearch(elements, 0, size, element); - if (pos >= 0) { - return false; - } - - size++; - ensureCapacity(); - pos = -(pos + 1); - System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); - elements[pos] = element; - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(int element) - { - if (element < 0) { - return false; - } - - int pos = Arrays.binarySearch(elements, 0, size, element); - if (pos < 0) { - return false; - } - - size--; - System.arraycopy(elements, pos + 1, elements, pos, size - pos); - compact(); - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(int element) - { - // first - if (isEmpty()) { - size++; - ensureCapacity(); - elements[size - 1] = element; - return; - } - - int pos = Arrays.binarySearch(elements, 0, size, element); - - // add - if (pos < 0) { - size++; - ensureCapacity(); - pos = -(pos + 1); - System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); - elements[pos] = element; - return; - } - - // remove - size--; - System.arraycopy(elements, pos + 1, elements, pos, size - pos); - compact(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(int element) - { - if (isEmpty()) { - return false; - } - return Arrays.binarySearch(elements, 0, size, element) >= 0; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - if (c == null || c.isEmpty() || c == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final ArraySet o = convert(c); - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] < otherElements[otherIndex]) { - if (thisIndex == size - 1) { - return false; - } - thisIndex++; - } - if (thisElements[thisIndex] > otherElements[otherIndex]) { - return false; - } - } - return otherIndex == otherSize - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet other) - { - if (other == null || other.isEmpty() || other == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final ArraySet o = convert(other); - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - if (otherIndex == otherSize - 1) { - return false; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - if (thisIndex == size - 1) { - return false; - } - thisIndex++; - } - } - return true; - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet other, int minElements) - { - if (minElements < 1) { - throw new IllegalArgumentException(); - } - if ((size >= 0 && size < minElements) || other == null || other.isEmpty() || isEmpty()) { - return false; - } - if (this == other) { - return size() >= minElements; - } - - final ArraySet o = convert(other); - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int res = 0; - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - if (otherIndex == otherSize - 1) { - return false; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - if (thisIndex == size - 1) { - return false; - } - thisIndex++; - } - } - res++; - if (res >= minElements) { - return true; - } - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - ArraySet res = union(c); - boolean r = !equals(res); - replaceWith(res); - return r; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - ArraySet res = intersection(c); - boolean r = !equals(res); - replaceWith(res); - return r; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - ArraySet res = difference(c); - boolean r = !equals(res); - replaceWith(res); - return r; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - if (isEmpty()) { - return 0; - } - final int[] thisElements = elements; // faster - int h = 1; - for (int i = 0; i < size; i++) { - h = (h << 5) - h + thisElements[i]; - } - return h; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof ArraySet)) { - return super.equals(obj); - } - final ArraySet other = (ArraySet) obj; - if (size != other.size) { - return false; - } - final int[] thisElements = elements; // faster - final int[] otherElements = other.elements; // faster - for (int i = 0; i < size; i++) { - if (thisElements[i] != otherElements[i]) { - return false; - } - } - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return size; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return size == 0; - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - elements = null; - size = 0; - } - - /** - * {@inheritDoc} - */ - @Override - public int first() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return elements[0]; - } - - /** - * {@inheritDoc} - */ - @Override - public int last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return elements[size - 1]; - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return 0; - } - if (this == other) { - return size(); - } - - final ArraySet o = convert(other); - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int res = 0; - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - if (otherIndex == otherSize - 1) { - return res; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - if (thisIndex == size - 1) { - return res; - } - thisIndex++; - } - } - res++; - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet intersection(IntSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return empty(); - } - if (this == other) { - return clone(); - } - - final ArraySet o = convert(other); - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int resSize = 0; - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - final int[] resElements = new int[Math.min(size, otherSize)]; - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - if (otherIndex == otherSize - 1) { - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - if (thisIndex == size - 1) { - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - thisIndex++; - } - } - resElements[resSize++] = thisElements[thisIndex]; - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet union(IntSet other) - { - if (this == other || other == null || other.isEmpty()) { - return clone(); - } - if (isEmpty()) { - ArraySet cloned = convert(other); - if (cloned == other) { - cloned = cloned.clone(); - } - return cloned; - } - - final ArraySet o = convert(other); - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int resSize = 0; - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - final int[] resElements = new int[size + otherSize]; -mainLoop: - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - resElements[resSize++] = otherElements[otherIndex]; - if (otherIndex == otherSize - 1) { - resElements[resSize++] = thisElements[thisIndex]; - break mainLoop; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - resElements[resSize++] = thisElements[thisIndex]; - if (thisIndex == size - 1) { - resElements[resSize++] = otherElements[otherIndex]; - break mainLoop; - } - thisIndex++; - } - } - resElements[resSize++] = thisElements[thisIndex]; - } - while (thisIndex < size - 1) { - resElements[resSize++] = thisElements[++thisIndex]; - } - while (otherIndex < otherSize - 1) { - resElements[resSize++] = otherElements[++otherIndex]; - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet difference(IntSet other) - { - if (isEmpty() || this == other) { - return empty(); - } - if (other == null || other.isEmpty()) { - return clone(); - } - - final ArraySet o = convert(other); - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int resSize = 0; - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - final int[] resElements = new int[size]; -mainLoop: - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - if (otherIndex == otherSize - 1) { - resElements[resSize++] = thisElements[thisIndex]; - break mainLoop; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - resElements[resSize++] = thisElements[thisIndex]; - if (thisIndex == size - 1) { - break mainLoop; - } - thisIndex++; - } - } - } - while (thisIndex < size - 1) { - resElements[resSize++] = thisElements[++thisIndex]; - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet symmetricDifference(IntSet other) - { - if (this == other || other == null || other.isEmpty()) { - return clone(); - } - if (isEmpty()) { - return convert(other).clone(); - } - - final ArraySet o = convert(other); - int otherSize = o.size; - int thisIndex = -1; - int otherIndex = -1; - int resSize = 0; - final int[] thisElements = elements; // faster - final int[] otherElements = o.elements; // faster - final int[] resElements = new int[size + otherSize]; -mainLoop: - while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { - thisIndex++; - otherIndex++; - while (thisElements[thisIndex] != otherElements[otherIndex]) { - while (thisElements[thisIndex] > otherElements[otherIndex]) { - resElements[resSize++] = otherElements[otherIndex]; - if (otherIndex == otherSize - 1) { - resElements[resSize++] = thisElements[thisIndex]; - break mainLoop; - } - otherIndex++; - } - if (thisElements[thisIndex] == otherElements[otherIndex]) { - break; - } - while (thisElements[thisIndex] < otherElements[otherIndex]) { - resElements[resSize++] = thisElements[thisIndex]; - if (thisIndex == size - 1) { - resElements[resSize++] = otherElements[otherIndex]; - break mainLoop; - } - thisIndex++; - } - } - } - while (thisIndex < size - 1) { - resElements[resSize++] = thisElements[++thisIndex]; - } - while (otherIndex < otherSize - 1) { - resElements[resSize++] = otherElements[++otherIndex]; - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - res.compact(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - if (isEmpty()) { - return; - } - - IntIterator thisItr = clone().iterator(); // avoid concurrency - elements = new int[complementSize()]; - final int[] thisElements = elements; // faster - size = 0; - int u = -1; - while (thisItr.hasNext()) { - int c = thisItr.next(); - while (++u < c) { - thisElements[size++] = u; - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(int from, int to) - { - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - if (from == to) { - add(from); - return; - } - - int[] thisElements = elements; // faster - - if (isEmpty()) { - size = to - from + 1; - ensureCapacity(); - thisElements = elements; - for (int i = 0; i < size; i++) { - thisElements[i] = from++; - } - return; - } - - // increase capacity, if necessary - int posFrom = Arrays.binarySearch(thisElements, 0, size, from); - boolean fromMissing = posFrom < 0; - if (fromMissing) { - posFrom = -posFrom - 1; - } - - int posTo = Arrays.binarySearch(thisElements, posFrom, size, to); - boolean toMissing = posTo < 0; - if (toMissing) { - posTo = -posTo - 1; - } - - int delta = 0; - if (toMissing || (fromMissing && (posFrom == posTo + 1))) { - delta = 1; - } - - int gap = to - from; - delta += gap - (posTo - posFrom); - if (delta > 0) { - size += delta; - ensureCapacity(); - thisElements = elements; - System.arraycopy(thisElements, posTo, thisElements, posTo + delta, size - delta - posTo); - posTo = posFrom + gap; - - // set values - for (int i = posFrom; i <= posTo; i++) { - thisElements[i] = from++; - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(int from, int to) - { - if (isEmpty()) { - return; - } - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - if (from == to) { - remove(from); - return; - } - - int posFrom = Arrays.binarySearch(elements, 0, size, from); - if (posFrom < 0) { - posFrom = -posFrom - 1; - } - if (posFrom >= size) { - return; - } - int posTo = Arrays.binarySearch(elements, posFrom, size, to); - if (posTo >= 0) { - posTo++; - } else { - posTo = -posTo - 1; - } - if (posFrom == posTo) { - return; - } - System.arraycopy(elements, posTo, elements, posFrom, size - posTo); - size -= posTo - posFrom; - } - - /** - * Convert a generic {@link IntSet} instance to an {@link ArraySet} instance - * - * @param c - * - * @return - */ - private ArraySet convert(IntSet c) - { - if (c instanceof ArraySet) { - return (ArraySet) c; - } - - int[] resElements = new int[c.size()]; - int resSize = 0; - IntIterator itr = c.iterator(); - while (itr.hasNext()) { - resElements[resSize++] = itr.next(); - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet convert(int... a) - { - int[] resElements = null; - int resSize = 0; - int last = -1; - if (a != null) { - resElements = new int[a.length]; - a = Arrays.copyOf(a, a.length); - Arrays.sort(a); - if (a[0] < 0) { - throw new ArrayIndexOutOfBoundsException(Integer.toString(a[0])); - } - for (int i : a) { - if (last != i) { - resElements[resSize++] = last = i; - } - } - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet convert(Collection c) - { - Collection sorted; - int[] resElements = null; - int resSize = 0; - int last = -1; - if (c != null) { - resElements = new int[c.size()]; - if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { - sorted = c; - } else { - sorted = new ArrayList(c); - Collections.sort((List) sorted); - int first = ((ArrayList) sorted).get(0).intValue(); - if (first < 0) { - throw new ArrayIndexOutOfBoundsException(Integer.toString(first)); - } - } - for (int i : sorted) { - if (last != i) { - resElements[resSize++] = last = i; - } - } - } - - ArraySet res = empty(); - res.elements = resElements; - res.size = resSize; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ArraySet complemented() - { - ArraySet res = clone(); - res.complement(); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int get(int i) - { - if (i < 0 || i >= size) { - throw new IndexOutOfBoundsException(Integer.toString(i)); - } - return elements[i]; - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(int e) - { - if (e < 0) { - throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); - } - int pos = Arrays.binarySearch(elements, 0, size, e); - if (pos < 0) { - return -1; - } - return pos; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java deleted file mode 100755 index b3f3d87bc0c4..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java +++ /dev/null @@ -1,3178 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.intset; - - -import io.druid.extendedset.utilities.BitCount; - -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.ConcurrentModificationException; -import java.util.Formatter; -import java.util.List; -import java.util.Locale; -import java.util.NoSuchElementException; -import java.util.SortedSet; - -/** - * This is CONCISE: COmpressed 'N' Composable Integer SEt. - *

- * This class is an instance of {@link IntSet} internally represented by - * compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm. - * See http - * ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf for more - * details. - *

- * Notice that the iterator by {@link #iterator()} is fail-fast, - * similar to most {@link Collection}-derived classes. If the set is - * structurally modified at any time after the iterator is created, the iterator - * will throw a {@link ConcurrentModificationException}. Thus, in the face of - * concurrent modification, the iterator fails quickly and cleanly, rather than - * risking arbitrary, non-deterministic behavior at an undetermined time in the - * future. The iterator throws a {@link ConcurrentModificationException} on a - * best-effort basis. Therefore, it would be wrong to write a program that - * depended on this exception for its correctness: the fail-fast behavior of - * iterators should be used only to detect bugs. - * - * @author Alessandro Colantonio - * @version $Id$ - */ -public class ConciseSet extends AbstractIntSet implements java.io.Serializable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = 560068054685367266L; - /** - * true if the class must simulate the behavior of WAH - */ - private final boolean simulateWAH; - /** - * User for fail-fast iterator. It counts the number of operations - * that do modify {@link #words} - */ - protected transient volatile int modCount = 0; - /** - * This is the compressed bitmap, that is a collection of words. For each - * word: - *

    - *
  • 1* (0x80000000) means that it is a 31-bit literal. - *
  • 00* (0x00000000) indicates a sequence made up of at - * most one set bit in the first 31 bits, and followed by blocks of 31 0's. - * The following 5 bits (00xxxxx*) indicates which is the set bit ( - * 00000 = no set bit, 00001 = LSB, 11111 = MSB), - * while the remaining 25 bits indicate the number of following 0's blocks. - *
  • 01* (0x40000000) indicates a sequence made up of at - * most one unset bit in the first 31 bits, and followed by blocks of - * 31 1's. (see the 00* case above). - *
- *

- * Note that literal words 0xFFFFFFFF and 0x80000000 are allowed, thus - * zero-length sequences (i.e., such that getSequenceCount() == 0) cannot - * exists. - */ - private int[] words; - /** - * Most significant set bit within the uncompressed bit string. - */ - private transient int last; - /** - * Cached cardinality of the bit-set. Defined for efficient {@link #size()} - * calls. When -1, the cache is invalid. - */ - private transient int size; - /** - * Index of the last word in {@link #words} - */ - private transient int lastWordIndex; - - /** - * Creates an empty integer set - */ - public ConciseSet() - { - this(false); - } - - /** - * Creates an empty integer set - * - * @param simulateWAH true if the class must simulate the behavior of - * WAH - */ - public ConciseSet(boolean simulateWAH) - { - this.simulateWAH = simulateWAH; - reset(); - } - - public ConciseSet(int[] words, boolean simulateWAH) - { - this.words = words; - this.lastWordIndex = isEmpty() ? -1 : words.length - 1; - this.size = -1; - updateLast(); - this.simulateWAH = simulateWAH; - } - - /** - * Calculates the modulus division by 31 in a faster way than using n % 31 - *

- * This method of finding modulus division by an integer that is one less - * than a power of 2 takes at most O(lg(32)) time. The number of operations - * is at most 12 + 9 * ceil(lg(32)). - *

- * See http://graphics.stanford.edu/~seander/bithacks.html - * - * @param n number to divide - * - * @return n % 31 - */ - private static int maxLiteralLengthModulus(int n) - { - int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); - m = (m >>> 15) + (m & 0x00007FFF); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - return m == 31 ? 0 : m; - } - - /** - * Calculates the multiplication by 31 in a faster way than using n * 31 - * - * @param n number to multiply - * - * @return n * 31 - */ - private static int maxLiteralLengthMultiplication(int n) - { - return (n << 5) - n; - } - - /** - * Calculates the division by 31 - * - * @param n number to divide - * - * @return n / 31 - */ - private static int maxLiteralLengthDivision(int n) - { - return n / 31; - } - - /** - * Checks whether a word is a literal one - * - * @param word word to check - * - * @return true if the given word is a literal word - */ - private static boolean isLiteral(int word) - { - // "word" must be 1* - // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" - return (word & 0x80000000) != 0; - } - - /** - * Checks whether a word contains a sequence of 1's - * - * @param word word to check - * - * @return true if the given word is a sequence of 1's - */ - private static boolean isOneSequence(int word) - { - // "word" must be 01* - return (word & 0xC0000000) == ConciseSetUtils.SEQUENCE_BIT; - } - - /** - * Checks whether a word contains a sequence of 0's - * - * @param word word to check - * - * @return true if the given word is a sequence of 0's - */ - private static boolean isZeroSequence(int word) - { - // "word" must be 00* - return (word & 0xC0000000) == 0; - } - - /** - * Checks whether a word contains a sequence of 0's with no set bit, or 1's - * with no unset bit. - *

- * NOTE: when {@link #simulateWAH} is true, it is - * equivalent to (and as fast as) !{@link #isLiteral(int)} - * - * @param word word to check - * - * @return true if the given word is a sequence of 0's or 1's - * but with no (un)set bit - */ - private static boolean isSequenceWithNoBits(int word) - { - // "word" must be 0?00000* - return (word & 0xBE000000) == 0x00000000; - } - - /** - * Gets the number of blocks of 1's or 0's stored in a sequence word - * - * @param word word to check - * - * @return the number of blocks that follow the first block of 31 bits - */ - private static int getSequenceCount(int word) - { - // get the 25 LSB bits - return word & 0x01FFFFFF; - } - - /** - * Clears the (un)set bit in a sequence - * - * @param word word to check - * - * @return the sequence corresponding to the given sequence and with no - * (un)set bits - */ - private static int getSequenceWithNoBits(int word) - { - // clear 29 to 25 LSB bits - return (word & 0xC1FFFFFF); - } - - /** - * Gets the position of the flipped bit within a sequence word. If the - * sequence has no set/unset bit, returns -1. - *

- * Note that the parameter must a sequence word, otherwise the - * result is meaningless. - * - * @param word sequence word to check - * - * @return the position of the set bit, from 0 to 31. If the sequence has no - * set/unset bit, returns -1. - */ - private static int getFlippedBit(int word) - { - // get bits from 30 to 26 - // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set - return ((word >>> 25) & 0x0000001F) - 1; - } - - /** - * Gets the number of set bits within the literal word - * - * @param word literal word - * - * @return the number of set bits within the literal word - */ - private static int getLiteralBitCount(int word) - { - return BitCount.count(getLiteralBits(word)); - } - - /** - * Gets the bits contained within the literal word - * - * @param word literal word - * - * @return the literal word with the most significant bit cleared - */ - private static int getLiteralBits(int word) - { - return ConciseSetUtils.ALL_ONES_WITHOUT_MSB & word; - } - - /** - * Returns true when the given 31-bit literal string (namely, - * with MSB set) contains only one set bit - * - * @param literal literal word (namely, with MSB unset) - * - * @return true when the given literal contains only one set - * bit - */ - private static boolean containsOnlyOneBit(int literal) - { - return (literal & (literal - 1)) == 0; - } - - /** - * Generates the 32-bit binary representation of a given word (debug only) - * - * @param word word to represent - * - * @return 32-character string that represents the given word - */ - private static String toBinaryString(int word) - { - String lsb = Integer.toBinaryString(word); - StringBuilder pad = new StringBuilder(); - for (int i = lsb.length(); i < 32; i++) { - pad.append('0'); - } - return pad.append(lsb).toString(); - } - - /** - * Resets to an empty set - * - * @see #ConciseSet() - * {@link #clear()} - */ - private void reset() - { - modCount++; - words = null; - last = -1; - size = 0; - lastWordIndex = -1; - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet clone() - { - if (isEmpty()) { - return empty(); - } - - // NOTE: do not use super.clone() since it is 10 times slower! - ConciseSet res = empty(); - res.last = last; - res.lastWordIndex = lastWordIndex; - res.modCount = 0; - res.size = size; - res.words = Arrays.copyOf(words, lastWordIndex + 1); - return res; - } - - /** - * Gets the literal word that represents the first 31 bits of the given the - * word (i.e. the first block of a sequence word, or the bits of a literal word). - *

- * If the word is a literal, it returns the unmodified word. In case of a - * sequence, it returns a literal that represents the first 31 bits of the - * given sequence word. - * - * @param word word to check - * - * @return the literal contained within the given word, with the most - * significant bit set to 1. - */ - private /*static*/ int getLiteral(int word) - { - if (isLiteral(word)) { - return word; - } - - if (simulateWAH) { - return isZeroSequence(word) ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL; - } - - // get bits from 30 to 26 and use them to set the corresponding bit - // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent - // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set - int literal = (1 << (word >>> 25)) >>> 1; - return isZeroSequence(word) - ? (ConciseSetUtils.ALL_ZEROS_LITERAL | literal) - : (ConciseSetUtils.ALL_ONES_LITERAL & ~literal); - } - - /** - * Clears bits from MSB (excluded, since it indicates the word type) to the - * specified bit (excluded). Last word is supposed to be a literal one. - * - * @param lastSetBit leftmost bit to preserve - */ - private void clearBitsAfterInLastWord(int lastSetBit) - { - words[lastWordIndex] &= ConciseSetUtils.ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); - } - - /** - * Assures that the length of {@link #words} is sufficient to contain - * the given index. - */ - private void ensureCapacity(int index) - { - int capacity = words == null ? 0 : words.length; - if (capacity > index) { - return; - } - capacity = Math.max(capacity << 1, index + 1); - - if (words == null) { - // nothing to copy - words = new int[capacity]; - return; - } - words = Arrays.copyOf(words, capacity); - } - - /** - * Removes unused allocated words at the end of {@link #words} only when they - * are more than twice of the needed space - */ - private void compact() - { - if (words != null && ((lastWordIndex + 1) << 1) < words.length) { - words = Arrays.copyOf(words, lastWordIndex + 1); - } - } - - /** - * Sets the bit at the given absolute position within the uncompressed bit - * string. The bit must be appendable, that is it must represent an - * integer that is strictly greater than the maximum integer in the set. - * Note that the parameter range check is performed by the public method - * {@link #add(Integer)} and not in this method. - *

- * NOTE: This method assumes that the last element of {@link #words} - * (i.e. getLastWord()) must be one of the - * following: - *

    - *
  • a literal word with at least one set bit; - *
  • a sequence of ones. - *
- * Hence, the last word in {@link #words} cannot be: - *
    - *
  • a literal word containing only zeros; - *
  • a sequence of zeros. - *
- * - * @param i the absolute position of the bit to set (i.e., the integer to add) - */ - private void append(int i) - { - // special case of empty set - if (isEmpty()) { - int zeroBlocks = maxLiteralLengthDivision(i); - if (zeroBlocks == 0) { - words = new int[1]; - lastWordIndex = 0; - } else if (zeroBlocks == 1) { - words = new int[2]; - lastWordIndex = 1; - words[0] = ConciseSetUtils.ALL_ZEROS_LITERAL; - } else { - words = new int[2]; - lastWordIndex = 1; - words[0] = zeroBlocks - 1; - } - last = i; - size = 1; - words[lastWordIndex] = ConciseSetUtils.ALL_ZEROS_LITERAL | (1 << maxLiteralLengthModulus(i)); - return; - } - - // position of the next bit to set within the current literal - int bit = maxLiteralLengthModulus(last) + i - last; - - // if we are outside the current literal, add zeros in - // between the current word and the new 1-bit literal word - if (bit >= ConciseSetUtils.MAX_LITERAL_LENGTH) { - int zeroBlocks = maxLiteralLengthDivision(bit) - 1; - bit = maxLiteralLengthModulus(bit); - if (zeroBlocks == 0) { - ensureCapacity(lastWordIndex + 1); - } else { - ensureCapacity(lastWordIndex + 2); - appendFill(zeroBlocks, 0); - } - appendLiteral(ConciseSetUtils.ALL_ZEROS_LITERAL | 1 << bit); - } else { - words[lastWordIndex] |= 1 << bit; - if (words[lastWordIndex] == ConciseSetUtils.ALL_ONES_LITERAL) { - lastWordIndex--; - appendLiteral(ConciseSetUtils.ALL_ONES_LITERAL); - } - } - - // update other info - last = i; - if (size >= 0) { - size++; - } - } - - /** - * Append a literal word after the last word - * - * @param word the new literal word. Note that the leftmost bit must - * be set to 1. - */ - private void appendLiteral(int word) - { - // when we have a zero sequence of the maximum lenght (that is, - // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen - // that we try to append a zero literal because the result of the given operation must be an - // empty set. Whitout the following test, we would have increased the - // counter of the zero sequence, thus obtaining 0x02000000 that - // represents a sequence with the first bit set! - if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) { - return; - } - - // first addition - if (lastWordIndex < 0) { - words[lastWordIndex = 0] = word; - return; - } - - final int lastWord = words[lastWordIndex]; - if (word == ConciseSetUtils.ALL_ZEROS_LITERAL) { - if (lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { - words[lastWordIndex] = 1; - } else if (isZeroSequence(lastWord)) { - words[lastWordIndex]++; - } else if (!simulateWAH && containsOnlyOneBit(getLiteralBits(lastWord))) { - words[lastWordIndex] = 1 | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); - } else { - words[++lastWordIndex] = word; - } - } else if (word == ConciseSetUtils.ALL_ONES_LITERAL) { - if (lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { - words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1; - } else if (isOneSequence(lastWord)) { - words[lastWordIndex]++; - } else if (!simulateWAH && containsOnlyOneBit(~lastWord)) { - words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1 | ((1 + Integer.numberOfTrailingZeros(~lastWord)) - << 25); - } else { - words[++lastWordIndex] = word; - } - } else { - words[++lastWordIndex] = word; - } - } - - /** - * Append a sequence word after the last word - * - * @param length sequence length - * @param fillType sequence word with a count that equals 0 - */ - private void appendFill(int length, int fillType) - { - assert length > 0; - assert lastWordIndex >= -1; - - fillType &= ConciseSetUtils.SEQUENCE_BIT; - - // it is actually a literal... - if (length == 1) { - appendLiteral(fillType == 0 ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL); - return; - } - - // empty set - if (lastWordIndex < 0) { - words[lastWordIndex = 0] = fillType | (length - 1); - return; - } - - final int lastWord = words[lastWordIndex]; - if (isLiteral(lastWord)) { - if (fillType == 0 && lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { - words[lastWordIndex] = length; - } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { - words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length; - } else if (!simulateWAH) { - if (fillType == 0 && containsOnlyOneBit(getLiteralBits(lastWord))) { - words[lastWordIndex] = length | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); - } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && containsOnlyOneBit(~lastWord)) { - words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length | ((1 + Integer.numberOfTrailingZeros(~lastWord)) - << 25); - } else { - words[++lastWordIndex] = fillType | (length - 1); - } - } else { - words[++lastWordIndex] = fillType | (length - 1); - } - } else { - if ((lastWord & 0xC0000000) == fillType) { - words[lastWordIndex] += length; - } else { - words[++lastWordIndex] = fillType | (length - 1); - } - } - } - - /** - * Recalculate a fresh value for {@link ConciseSet#last} - */ - private void updateLast() - { - if (isEmpty()) { - last = -1; - return; - } - - last = 0; - for (int i = 0; i <= lastWordIndex; i++) { - int w = words[i]; - if (isLiteral(w)) { - last += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); - } - } - - int w = words[lastWordIndex]; - if (isLiteral(w)) { - last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); - } else { - last--; - } - } - - /** - * Performs the given operation over the bit-sets - * - * @param other {@link ConciseSet} instance that represents the right - * operand - * @param operator operator - * - * @return the result of the operation - */ - private ConciseSet performOperation(ConciseSet other, Operator operator) - { - // non-empty arguments - if (this.isEmpty() || other.isEmpty()) { - return operator.combineEmptySets(this, other); - } - - // if the two operands are disjoint, the operation is faster - ConciseSet res = operator.combineDisjointSets(this, other); - if (res != null) { - return res; - } - - // Allocate a sufficient number of words to contain all possible results. - // NOTE: since lastWordIndex is the index of the last used word in "words", - // we require "+2" to have the actual maximum required space. - // In any case, we do not allocate more than the maximum space required - // for the uncompressed representation. - // Another "+1" is required to allows for the addition of the last word - // before compacting. - res = empty(); - res.words = new int[1 + Math.min( - this.lastWordIndex + other.lastWordIndex + 2, - maxLiteralLengthDivision(Math.max(this.last, other.last)) << (simulateWAH ? 1 : 0) - )]; - - // scan "this" and "other" - WordIterator thisItr = new WordIterator(); - WordIterator otherItr = other.new WordIterator(); - while (true) { - if (!thisItr.isLiteral) { - if (!otherItr.isLiteral) { - int minCount = Math.min(thisItr.count, otherItr.count); - res.appendFill(minCount, operator.combineLiterals(thisItr.word, otherItr.word)); - if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || - { - break; - } - } else { - res.appendLiteral(operator.combineLiterals(thisItr.toLiteral(), otherItr.word)); - thisItr.word--; - if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" - { - break; - } - } - } else if (!otherItr.isLiteral) { - res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.toLiteral())); - otherItr.word--; - if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" - { - break; - } - } else { - res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.word)); - if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" - { - break; - } - } - } - - // invalidate the size - res.size = -1; - boolean invalidLast = true; - - // if one bit string is greater than the other one, we add the remaining - // bits depending on the given operation. - switch (operator) { - case AND: - break; - case OR: - res.last = Math.max(this.last, other.last); - invalidLast = false; - invalidLast |= thisItr.flush(res); - invalidLast |= otherItr.flush(res); - break; - case XOR: - if (this.last != other.last) { - res.last = Math.max(this.last, other.last); - invalidLast = false; - } - invalidLast |= thisItr.flush(res); - invalidLast |= otherItr.flush(res); - break; - case ANDNOT: - if (this.last > other.last) { - res.last = this.last; - invalidLast = false; - } - invalidLast |= thisItr.flush(res); - break; - } - - // remove trailing zeros - res.trimZeros(); - if (res.isEmpty()) { - return res; - } - - // compute the greatest element - if (invalidLast) { - res.updateLast(); - } - - // compact the memory - res.compact(); - - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet o) - { - // special cases - if (isEmpty() || o == null || o.isEmpty()) { - return 0; - } - if (this == o) { - return size(); - } - - final ConciseSet other = convert(o); - - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(this.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return 0; - } - return other.size(); - } - if (isSequenceWithNoBits(other.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - if (isZeroSequence(other.words[0])) { - return 0; - } - return this.size(); - } - - int res = 0; - - // scan "this" and "other" - WordIterator thisItr = new WordIterator(); - WordIterator otherItr = other.new WordIterator(); - while (true) { - if (!thisItr.isLiteral) { - if (!otherItr.isLiteral) { - int minCount = Math.min(thisItr.count, otherItr.count); - if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { - res += maxLiteralLengthMultiplication(minCount); - } - if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || - { - break; - } - } else { - res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); - thisItr.word--; - if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" - { - break; - } - } - } else if (!otherItr.isLiteral) { - res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); - otherItr.word--; - if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" - { - break; - } - } else { - res += getLiteralBitCount(thisItr.word & otherItr.word); - if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" - { - break; - } - } - } - - return res; - } - - /** - * {@inheritDoc} - */ - public ByteBuffer toByteBuffer() - { - ByteBuffer buffer = ByteBuffer.allocate((lastWordIndex + 1) * 4); - buffer.asIntBuffer().put(Arrays.copyOf(words, lastWordIndex + 1)); - return buffer; - } - - /** - * {@inheritDoc} - */ - public int[] getWords() - { - if (words == null) { - return new int[]{}; - } - return Arrays.copyOf(words, lastWordIndex + 1); - } - - /** - * {@inheritDoc} - */ - @Override - public int get(int i) - { - if (i < 0) { - throw new IndexOutOfBoundsException(); - } - - // initialize data - int firstSetBitInWord = 0; - int position = i; - int setBitsInCurrentWord = 0; - for (int j = 0; j <= lastWordIndex; j++) { - int w = words[j]; - if (isLiteral(w)) { - // number of bits in the current word - setBitsInCurrentWord = getLiteralBitCount(w); - - // check if the desired bit is in the current word - if (position < setBitsInCurrentWord) { - int currSetBitInWord = -1; - for (; position >= 0; position--) { - currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); - } - return firstSetBitInWord + currSetBitInWord; - } - - // skip the 31-bit block - firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - // number of involved bits (31 * blocks) - int sequenceLength = maxLiteralLengthMultiplication(getSequenceCount(w) + 1); - - // check the sequence type - if (isOneSequence(w)) { - if (simulateWAH || isSequenceWithNoBits(w)) { - setBitsInCurrentWord = sequenceLength; - if (position < setBitsInCurrentWord) { - return firstSetBitInWord + position; - } - } else { - setBitsInCurrentWord = sequenceLength - 1; - if (position < setBitsInCurrentWord) - // check whether the desired set bit is after the - // flipped bit (or after the first block) - { - return firstSetBitInWord + position + (position < getFlippedBit(w) ? 0 : 1); - } - } - } else { - if (simulateWAH || isSequenceWithNoBits(w)) { - setBitsInCurrentWord = 0; - } else { - setBitsInCurrentWord = 1; - if (position == 0) { - return firstSetBitInWord + getFlippedBit(w); - } - } - } - - // skip the 31-bit blocks - firstSetBitInWord += sequenceLength; - } - - // update the number of found set bits - position -= setBitsInCurrentWord; - } - - throw new IndexOutOfBoundsException(Integer.toString(i)); - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(int e) - { - if (e < 0) { - throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); - } - if (isEmpty()) { - return -1; - } - - // returned value - int index = 0; - - int blockIndex = maxLiteralLengthDivision(e); - int bitPosition = maxLiteralLengthModulus(e); - for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { - int w = words[i]; - if (isLiteral(w)) { - // check if the current literal word is the "right" one - if (blockIndex == 0) { - if ((w & (1 << bitPosition)) == 0) { - return -1; - } - return index + BitCount.count(w & ~(0xFFFFFFFF << bitPosition)); - } - blockIndex--; - index += getLiteralBitCount(w); - } else { - if (simulateWAH) { - if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { - return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition; - } - } else { - // if we are at the beginning of a sequence, and it is - // a set bit, the bit already exists - if (blockIndex == 0) { - int l = getLiteral(w); - if ((l & (1 << bitPosition)) == 0) { - return -1; - } - return index + BitCount.count(l & ~(0xFFFFFFFF << bitPosition)); - } - - // if we are in the middle of a sequence of 1's, the bit already exist - if (blockIndex > 0 - && blockIndex <= getSequenceCount(w) - && isOneSequence(w)) { - return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition - (isSequenceWithNoBits(w) ? 0 : 1); - } - } - - // next word - int blocks = getSequenceCount(w) + 1; - blockIndex -= blocks; - if (isZeroSequence(w)) { - if (!simulateWAH && !isSequenceWithNoBits(w)) { - index++; - } - } else { - index += maxLiteralLengthMultiplication(blocks); - if (!simulateWAH && !isSequenceWithNoBits(w)) { - index--; - } - } - } - } - - // not found - return -1; - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet intersection(IntSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return empty(); - } - if (other == this) { - return clone(); - } - return performOperation(convert(other), Operator.AND); - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet union(IntSet other) - { - if (other == null || other.isEmpty() || other == this) { - return clone(); - } - return performOperation(convert(other), Operator.OR); - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet difference(IntSet other) - { - if (other == this) { - return empty(); - } - if (other == null || other.isEmpty()) { - return clone(); - } - return performOperation(convert(other), Operator.ANDNOT); - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet symmetricDifference(IntSet other) - { - if (other == this) { - return empty(); - } - if (other == null || other.isEmpty()) { - return clone(); - } - return performOperation(convert(other), Operator.XOR); - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet complemented() - { - ConciseSet cloned = clone(); - cloned.complement(); - return cloned; - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - modCount++; - - if (isEmpty()) { - return; - } - - if (last == ConciseSetUtils.MIN_ALLOWED_SET_BIT) { - clear(); - return; - } - - // update size - if (size >= 0) { - size = last - size + 1; - } - - // complement each word - for (int i = 0; i <= lastWordIndex; i++) { - int w = words[i]; - if (isLiteral(w)) - // negate the bits and set the most significant bit to 1 - { - words[i] = ConciseSetUtils.ALL_ZEROS_LITERAL | ~w; - } else - // switch the sequence type - { - words[i] ^= ConciseSetUtils.SEQUENCE_BIT; - } - } - - // do not complement after the last element - if (isLiteral(words[lastWordIndex])) { - clearBitsAfterInLastWord(maxLiteralLengthModulus(last)); - } - - // remove trailing zeros - trimZeros(); - if (isEmpty()) { - return; - } - - // calculate the maximal element - last = 0; - int w = 0; - for (int i = 0; i <= lastWordIndex; i++) { - w = words[i]; - if (isLiteral(w)) { - last += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); - } - } - - // manage the last word (that must be a literal or a sequence of 1's) - if (isLiteral(w)) { - last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); - } else { - last--; - } - } - - /** - * Removes trailing zeros - */ - private void trimZeros() - { - // loop over ALL_ZEROS_LITERAL words - int w; - do { - w = words[lastWordIndex]; - if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { - lastWordIndex--; - } else if (isZeroSequence(w)) { - if (simulateWAH || isSequenceWithNoBits(w)) { - lastWordIndex--; - } else { - // convert the sequence in a 1-bit literal word - words[lastWordIndex] = getLiteral(w); - return; - } - } else { - // one sequence or literal - return; - } - if (lastWordIndex < 0) { - reset(); - return; - } - } while (true); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator iterator() - { - if (isEmpty()) { - return new IntIterator() - { - @Override - public void skipAllBefore(int element) {/*empty*/} - - @Override - public boolean hasNext() {return false;} - - @Override - public int next() {throw new NoSuchElementException();} - - @Override - public void remove() {throw new UnsupportedOperationException();} - - @Override - public IntIterator clone() {throw new UnsupportedOperationException();} - }; - } - return new BitIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator descendingIterator() - { - if (isEmpty()) { - return new IntIterator() - { - @Override - public void skipAllBefore(int element) {/*empty*/} - - @Override - public boolean hasNext() {return false;} - - @Override - public int next() {throw new NoSuchElementException();} - - @Override - public void remove() {throw new UnsupportedOperationException();} - - @Override - public IntIterator clone() {throw new UnsupportedOperationException();} - }; - } - return new ReverseBitIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - reset(); - } - - /** - * {@inheritDoc} - */ - @Override - public int last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return last; - } - - /** - * Convert a given collection to a {@link ConciseSet} instance - */ - private ConciseSet convert(IntSet c) - { - if (c instanceof ConciseSet && simulateWAH == ((ConciseSet) c).simulateWAH) { - return (ConciseSet) c; - } - if (c == null) { - return empty(); - } - - ConciseSet res = empty(); - IntIterator itr = c.iterator(); - while (itr.hasNext()) { - res.add(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet convert(int... a) - { - ConciseSet res = empty(); - if (a != null) { - a = Arrays.copyOf(a, a.length); - Arrays.sort(a); - for (int i : a) { - if (res.last != i) { - res.add(i); - } - } - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet convert(Collection c) - { - ConciseSet res = empty(); - Collection sorted; - if (c != null) { - if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { - sorted = c; - } else { - sorted = new ArrayList(c); - Collections.sort((List) sorted); - } - for (int i : sorted) { - if (res.last != i) { - res.add(i); - } - } - } - return res; - } - - /** - * Replace the current instance with another {@link ConciseSet} instance. It - * also returns true if the given set is actually different - * from the current one - * - * @param other {@link ConciseSet} instance to use to replace the current one - * - * @return true if the given set is different from the current - * set - */ - private boolean replaceWith(ConciseSet other) - { - if (this == other) { - return false; - } - - boolean isSimilar = (this.lastWordIndex == other.lastWordIndex) - && (this.last == other.last); - for (int i = 0; isSimilar && (i <= lastWordIndex); i++) { - isSimilar &= this.words[i] == other.words[i]; - } - - if (isSimilar) { - if (other.size >= 0) { - this.size = other.size; - } - return false; - } - - this.words = other.words; - this.size = other.size; - this.last = other.last; - this.lastWordIndex = other.lastWordIndex; - this.modCount++; - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(int e) - { - modCount++; - - // range check - if (e < ConciseSetUtils.MIN_ALLOWED_SET_BIT || e > ConciseSetUtils.MAX_ALLOWED_INTEGER) { - throw new IndexOutOfBoundsException(String.valueOf(e)); - } - - // the element can be simply appended - if (e > last) { - append(e); - return true; - } - - if (e == last) { - return false; - } - - // check if the element can be put in a literal word - int blockIndex = maxLiteralLengthDivision(e); - int bitPosition = maxLiteralLengthModulus(e); - for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { - int w = words[i]; - if (isLiteral(w)) { - // check if the current literal word is the "right" one - if (blockIndex == 0) { - // bit already set - if ((w & (1 << bitPosition)) != 0) { - return false; - } - - // By adding the bit we potentially create a sequence: - // -- If the literal is made up of all zeros, it definitely - // cannot be part of a sequence (otherwise it would not have - // been created). Thus, we can create a 1-bit literal word - // -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding - // the new one we potentially allow for a 1's sequence - // together with the successive word - // -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding - // the new one we potentially allow for a 1's sequence - // together with the successive and/or the preceding words - if (!simulateWAH) { - int bitCount = getLiteralBitCount(w); - if (bitCount >= ConciseSetUtils.MAX_LITERAL_LENGTH - 2) { - break; - } - } else { - if (containsOnlyOneBit(~w) || w == ConciseSetUtils.ALL_ONES_LITERAL) { - break; - } - } - - // set the bit - words[i] |= 1 << bitPosition; - if (size >= 0) { - size++; - } - return true; - } - - blockIndex--; - } else { - if (simulateWAH) { - if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { - return false; - } - } else { - // if we are at the beginning of a sequence, and it is - // a set bit, the bit already exists - if (blockIndex == 0 - && (getLiteral(w) & (1 << bitPosition)) != 0) { - return false; - } - - // if we are in the middle of a sequence of 1's, the bit already exist - if (blockIndex > 0 - && blockIndex <= getSequenceCount(w) - && isOneSequence(w)) { - return false; - } - } - - // next word - blockIndex -= getSequenceCount(w) + 1; - } - } - - // the bit is in the middle of a sequence or it may cause a literal to - // become a sequence, thus the "easiest" way to add it is by ORing - return replaceWith(performOperation(convert(e), Operator.OR)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(int o) - { - modCount++; - - if (isEmpty()) { - return false; - } - - // the element cannot exist - if (o > last) { - return false; - } - - // check if the element can be removed from a literal word - int blockIndex = maxLiteralLengthDivision(o); - int bitPosition = maxLiteralLengthModulus(o); - for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { - final int w = words[i]; - if (isLiteral(w)) { - // check if the current literal word is the "right" one - if (blockIndex == 0) { - // the bit is already unset - if ((w & (1 << bitPosition)) == 0) { - return false; - } - - // By removing the bit we potentially create a sequence: - // -- If the literal is made up of all ones, it definitely - // cannot be part of a sequence (otherwise it would not have - // been created). Thus, we can create a 30-bit literal word - // -- If there are 2 set bits, by removing the specified - // one we potentially allow for a 1's sequence together with - // the successive word - // -- If there is 1 set bit, by removing the new one we - // potentially allow for a 0's sequence - // together with the successive and/or the preceding words - if (!simulateWAH) { - int bitCount = getLiteralBitCount(w); - if (bitCount <= 2) { - break; - } - } else { - final int l = getLiteralBits(w); - if (l == 0 || containsOnlyOneBit(l)) { - break; - } - } - - // unset the bit - words[i] &= ~(1 << bitPosition); - if (size >= 0) { - size--; - } - - // if the bit is the maximal element, update it - if (o == last) { - last -= maxLiteralLengthModulus(last) - (ConciseSetUtils.MAX_LITERAL_LENGTH - - Integer.numberOfLeadingZeros(getLiteralBits(words[i]))); - } - return true; - } - - blockIndex--; - } else { - if (simulateWAH) { - if (isZeroSequence(w) && blockIndex <= getSequenceCount(w)) { - return false; - } - } else { - // if we are at the beginning of a sequence, and it is - // an unset bit, the bit does not exist - if (blockIndex == 0 - && (getLiteral(w) & (1 << bitPosition)) == 0) { - return false; - } - - // if we are in the middle of a sequence of 0's, the bit does not exist - if (blockIndex > 0 - && blockIndex <= getSequenceCount(w) - && isZeroSequence(w)) { - return false; - } - } - - // next word - blockIndex -= getSequenceCount(w) + 1; - } - } - - // the bit is in the middle of a sequence or it may cause a literal to - // become a sequence, thus the "easiest" way to remove it by ANDNOTing - return replaceWith(performOperation(convert(o), Operator.ANDNOT)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(int o) - { - if (isEmpty() || o > last || o < 0) { - return false; - } - - // check if the element is within a literal word - int block = maxLiteralLengthDivision(o); - int bit = maxLiteralLengthModulus(o); - for (int i = 0; i <= lastWordIndex; i++) { - final int w = words[i]; - final int t = w & 0xC0000000; // the first two bits... - switch (t) { - case 0x80000000: // LITERAL - case 0xC0000000: // LITERAL - // check if the current literal word is the "right" one - if (block == 0) { - return (w & (1 << bit)) != 0; - } - block--; - break; - case 0x00000000: // ZERO SEQUENCE - if (!simulateWAH) { - if (block == 0 && ((w >> 25) - 1) == bit) { - return true; - } - } - block -= getSequenceCount(w) + 1; - if (block < 0) { - return false; - } - break; - case 0x40000000: // ONE SEQUENCE - if (!simulateWAH) { - if (block == 0 && (0x0000001F & (w >> 25) - 1) == bit) { - return false; - } - } - block -= getSequenceCount(w) + 1; - if (block < 0) { - return true; - } - break; - } - } - - // no more words - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - if (c == null || c.isEmpty() || c == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final ConciseSet other = convert(c); - if (other.last > last) { - return false; - } - if (size >= 0 && other.size > size) { - return false; - } - if (other.size == 1) { - return contains(other.last); - } - - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(this.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; - } - if (isSequenceWithNoBits(other.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - return false; - } - - // scan "this" and "other" - WordIterator thisItr = new WordIterator(); - WordIterator otherItr = other.new WordIterator(); - while (true) { - if (!thisItr.isLiteral) { - if (!otherItr.isLiteral) { - int minCount = Math.min(thisItr.count, otherItr.count); - if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word) == 0 - && (ConciseSetUtils.SEQUENCE_BIT & otherItr.word) != 0) { - return false; - } - if (!otherItr.prepareNext(minCount)) { - return true; - } - if (!thisItr.prepareNext(minCount)) { - return false; - } - } else { - if ((thisItr.toLiteral() & otherItr.word) != otherItr.word) { - return false; - } - thisItr.word--; - if (!otherItr.prepareNext()) { - return true; - } - if (!thisItr.prepareNext(1)) { - return false; - } - } - } else if (!otherItr.isLiteral) { - int o = otherItr.toLiteral(); - if ((thisItr.word & otherItr.toLiteral()) != o) { - return false; - } - otherItr.word--; - if (!otherItr.prepareNext(1)) { - return true; - } - if (!thisItr.prepareNext()) { - return false; - } - } else { - if ((thisItr.word & otherItr.word) != otherItr.word) { - return false; - } - if (!otherItr.prepareNext()) { - return true; - } - if (!thisItr.prepareNext()) { - return false; - } - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet c) - { - if (c == null || c.isEmpty() || c == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final ConciseSet other = convert(c); - if (other.size == 1) { - return contains(other.last); - } - - // disjoint sets - if (isSequenceWithNoBits(this.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; - } - if (isSequenceWithNoBits(other.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - if (isZeroSequence(other.words[0])) { - return false; - } - return true; - } - - // scan "this" and "other" - WordIterator thisItr = new WordIterator(); - WordIterator otherItr = other.new WordIterator(); - while (true) { - if (!thisItr.isLiteral) { - if (!otherItr.isLiteral) { - int minCount = Math.min(thisItr.count, otherItr.count); - if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { - return true; - } - if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || - { - return false; - } - } else { - if ((thisItr.toLiteral() & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { - return true; - } - thisItr.word--; - if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" - { - return false; - } - } - } else if (!otherItr.isLiteral) { - if ((thisItr.word & otherItr.toLiteral()) != ConciseSetUtils.ALL_ZEROS_LITERAL) { - return true; - } - otherItr.word--; - if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" - { - return false; - } - } else { - if ((thisItr.word & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { - return true; - } - if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" - { - return false; - } - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet c, int minElements) - { - if (minElements < 1) { - throw new IllegalArgumentException(); - } - if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { - return false; - } - if (this == c) { - return size() >= minElements; - } - - // convert the other set in order to perform a more complex intersection - ConciseSet other = convert(c); - if (other.size >= 0 && other.size < minElements) { - return false; - } - if (minElements == 1 && other.size == 1) { - return contains(other.last); - } - if (minElements == 1 && size == 1) { - return other.contains(last); - } - - // disjoint sets - if (isSequenceWithNoBits(this.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; - } - if (isSequenceWithNoBits(other.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - if (isZeroSequence(other.words[0])) { - return false; - } - return true; - } - - // resulting size - int res = 0; - - // scan "this" and "other" - WordIterator thisItr = new WordIterator(); - WordIterator otherItr = other.new WordIterator(); - while (true) { - if (!thisItr.isLiteral) { - if (!otherItr.isLiteral) { - int minCount = Math.min(thisItr.count, otherItr.count); - if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { - res += maxLiteralLengthMultiplication(minCount); - if (res >= minElements) { - return true; - } - } - if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || - { - return false; - } - } else { - res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); - if (res >= minElements) { - return true; - } - thisItr.word--; - if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" - { - return false; - } - } - } else if (!otherItr.isLiteral) { - res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); - if (res >= minElements) { - return true; - } - otherItr.word--; - if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" - { - return false; - } - } else { - res += getLiteralBitCount(thisItr.word & otherItr.word); - if (res >= minElements) { - return true; - } - if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" - { - return false; - } - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return words == null; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - modCount++; - - if (isEmpty() || c == this) { - return false; - } - if (c == null || c.isEmpty()) { - clear(); - return true; - } - - ConciseSet other = convert(c); - if (other.size == 1) { - if (contains(other.last)) { - if (size == 1) { - return false; - } - return replaceWith(convert(other.last)); - } - clear(); - return true; - } - - return replaceWith(performOperation(other, Operator.AND)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - modCount++; - if (c == null || c.isEmpty() || this == c) { - return false; - } - - ConciseSet other = convert(c); - if (other.size == 1) { - return add(other.last); - } - - return replaceWith(performOperation(convert(c), Operator.OR)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - modCount++; - - if (c == null || c.isEmpty() || isEmpty()) { - return false; - } - if (c == this) { - clear(); - return true; - } - - ConciseSet other = convert(c); - if (other.size == 1) { - return remove(other.last); - } - - return replaceWith(performOperation(convert(c), Operator.ANDNOT)); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - if (size < 0) { - size = 0; - for (int i = 0; i <= lastWordIndex; i++) { - int w = words[i]; - if (isLiteral(w)) { - size += getLiteralBitCount(w); - } else { - if (isZeroSequence(w)) { - if (!isSequenceWithNoBits(w)) { - size++; - } - } else { - size += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); - if (!isSequenceWithNoBits(w)) { - size--; - } - } - } - } - } - return size; - } - - /** - * {@inheritDoc} - */ - @Override - public ConciseSet empty() - { - return new ConciseSet(simulateWAH); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - int h = 1; - for (int i = 0; i <= lastWordIndex; i++) { - h = (h << 5) - h + words[i]; - } - return h; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof ConciseSet)) { - return super.equals(obj); - } - - final ConciseSet other = (ConciseSet) obj; - if (simulateWAH != other.simulateWAH) { - return super.equals(obj); - } - - if (size() != other.size()) { - return false; - } - if (isEmpty()) { - return true; - } - if (last != other.last) { - return false; - } - for (int i = 0; i <= lastWordIndex; i++) { - if (words[i] != other.words[i]) { - return false; - } - } - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(IntSet o) - { - // empty set cases - if (this.isEmpty() && o.isEmpty()) { - return 0; - } - if (this.isEmpty()) { - return -1; - } - if (o.isEmpty()) { - return 1; - } - - final ConciseSet other = convert(o); - - // the word at the end must be the same - int res = this.last - other.last; - if (res != 0) { - return res < 0 ? -1 : 1; - } - - // scan words from MSB to LSB - int thisIndex = this.lastWordIndex; - int otherIndex = other.lastWordIndex; - int thisWord = this.words[thisIndex]; - int otherWord = other.words[otherIndex]; - while (thisIndex >= 0 && otherIndex >= 0) { - if (!isLiteral(thisWord)) { - if (!isLiteral(otherWord)) { - // compare two sequences - // note that they are made up of at least two blocks, and we - // start comparing from the end, that is at blocks with no - // (un)set bits - if (isZeroSequence(thisWord)) { - if (isOneSequence(otherWord)) - // zeros < ones - { - return -1; - } - // compare two sequences of zeros - res = getSequenceCount(otherWord) - getSequenceCount(thisWord); - if (res != 0) { - return res < 0 ? -1 : 1; - } - } else { - if (isZeroSequence(otherWord)) - // ones > zeros - { - return 1; - } - // compare two sequences of ones - res = getSequenceCount(thisWord) - getSequenceCount(otherWord); - if (res != 0) { - return res < 0 ? -1 : 1; - } - } - // if the sequences are the same (both zeros or both ones) - // and have the same length, compare the first blocks in the - // next loop since such blocks might contain (un)set bits - thisWord = getLiteral(thisWord); - otherWord = getLiteral(otherWord); - } else { - // zeros < literal --> -1 - // ones > literal --> +1 - // note that the sequence is made up of at least two blocks, - // and we start comparing from the end, that is at a block - // with no (un)set bits - if (isZeroSequence(thisWord)) { - if (otherWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { - return -1; - } - } else { - if (otherWord != ConciseSetUtils.ALL_ONES_LITERAL) { - return 1; - } - } - if (getSequenceCount(thisWord) == 1) { - thisWord = getLiteral(thisWord); - } else { - thisWord--; - } - if (--otherIndex >= 0) { - otherWord = other.words[otherIndex]; - } - } - } else if (!isLiteral(otherWord)) { - // literal > zeros --> +1 - // literal < ones --> -1 - // note that the sequence is made up of at least two blocks, - // and we start comparing from the end, that is at a block - // with no (un)set bits - if (isZeroSequence(otherWord)) { - if (thisWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { - return 1; - } - } else { - if (thisWord != ConciseSetUtils.ALL_ONES_LITERAL) { - return -1; - } - } - if (--thisIndex >= 0) { - thisWord = this.words[thisIndex]; - } - if (getSequenceCount(otherWord) == 1) { - otherWord = getLiteral(otherWord); - } else { - otherWord--; - } - } else { - res = thisWord - otherWord; // equals getLiteralBits(thisWord) - getLiteralBits(otherWord) - if (res != 0) { - return res < 0 ? -1 : 1; - } - if (--thisIndex >= 0) { - thisWord = this.words[thisIndex]; - } - if (--otherIndex >= 0) { - otherWord = other.words[otherIndex]; - } - } - } - return thisIndex >= 0 ? 1 : (otherIndex >= 0 ? -1 : 0); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(int from, int to) - { - ConciseSet toRemove = empty(); - toRemove.fill(from, to); - this.removeAll(toRemove); - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(int from, int to) - { - ConciseSet toAdd = empty(); - toAdd.add(to); - toAdd.complement(); - toAdd.add(to); - - ConciseSet toRemove = empty(); - toRemove.add(from); - toRemove.complement(); - - toAdd.removeAll(toRemove); - - this.addAll(toAdd); - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(int e) - { - if (!add(e)) { - remove(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return (lastWordIndex + 1) / Math.ceil((1 + last) / 32D); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return (double) (lastWordIndex + 1) / size(); - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); - final Formatter f = new Formatter(s, Locale.ENGLISH); - - if (isEmpty()) { - return s.append("null\n").toString(); - } - - f.format("Elements: %s\n", toString()); - - // elements - int firstBitInWord = 0; - for (int i = 0; i <= lastWordIndex; i++) { - // raw representation of words[i] - f.format("words[%d] = ", i); - String ws = toBinaryString(words[i]); - if (isLiteral(words[i])) { - s.append(ws.substring(0, 1)); - s.append("--"); - s.append(ws.substring(1)); - } else { - s.append(ws.substring(0, 2)); - s.append('-'); - if (simulateWAH) { - s.append("xxxxx"); - } else { - s.append(ws.substring(2, 7)); - } - s.append('-'); - s.append(ws.substring(7)); - } - s.append(" --> "); - - // decode words[i] - if (isLiteral(words[i])) { - // literal - s.append("literal: "); - s.append(toBinaryString(words[i]).substring(1)); - f.format(" ---> [from %d to %d] ", firstBitInWord, firstBitInWord + ConciseSetUtils.MAX_LITERAL_LENGTH - 1); - firstBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - // sequence - if (isOneSequence(words[i])) { - s.append('1'); - } else { - s.append('0'); - } - s.append(" block: "); - s.append(toBinaryString(getLiteralBits(getLiteral(words[i]))).substring(1)); - if (!simulateWAH) { - s.append(" (bit="); - int bit = (words[i] & 0x3E000000) >>> 25; - if (bit == 0) { - s.append("none"); - } else { - s.append(String.format("%4d", bit - 1)); - } - s.append(')'); - } - int count = getSequenceCount(words[i]); - f.format( - " followed by %d blocks (%d bits)", - getSequenceCount(words[i]), - maxLiteralLengthMultiplication(count) - ); - f.format( - " ---> [from %d to %d] ", - firstBitInWord, - firstBitInWord + (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH - 1 - ); - firstBitInWord += (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH; - } - s.append('\n'); - } - - // object attributes - f.format("simulateWAH: %b\n", simulateWAH); - f.format("last: %d\n", last); - f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); - f.format("words.length: %d\n", words.length); - f.format("lastWordIndex: %d\n", lastWordIndex); - - // compression - f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); - f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); - - return s.toString(); - } - - /** - * Save the state of the instance to a stream - */ - private void writeObject(ObjectOutputStream s) throws IOException - { - if (words != null && lastWordIndex < words.length - 1) - // compact before serializing - { - words = Arrays.copyOf(words, lastWordIndex + 1); - } - s.defaultWriteObject(); - } - - /** - * Reconstruct the instance from a stream - */ - private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException - { - s.defaultReadObject(); - if (words == null) { - reset(); - return; - } - lastWordIndex = words.length - 1; - updateLast(); - size = -1; - } - - /** - * Possible operations - */ - private enum Operator - { - /** - * @uml.property name="aND" - * @uml.associationEnd - */ - AND { - @Override - public int combineLiterals(int literal1, int literal2) - { - return literal1 & literal2; - } - - @Override - public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) - { - return op1.empty(); - } - - /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ - private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) - { - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(op1.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { - // op2 is completely hidden by op1 - if (isZeroSequence(op1.words[0])) { - return op1.empty(); - } - // op2 is left unchanged, but the rest of op1 is hidden - return op2.clone(); - } - return null; - } - - @Override - public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) - { - ConciseSet res = oneWayCombineDisjointSets(op1, op2); - if (res == null) { - res = oneWayCombineDisjointSets(op2, op1); - } - return res; - } - }, - - /** - * @uml.property name="oR" - * @uml.associationEnd - */ - OR { - @Override - public int combineLiterals(int literal1, int literal2) - { - return literal1 | literal2; - } - - @Override - public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) - { - if (!op1.isEmpty()) { - return op1.clone(); - } - if (!op2.isEmpty()) { - return op2.clone(); - } - return op1.empty(); - } - - /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ - private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) - { - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(op1.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { - // op2 is completely hidden by op1 - if (isOneSequence(op1.words[0])) { - return op1.clone(); - } - // op2 is left unchanged, but the rest of op1 must be appended... - - // ... first, allocate sufficient space for the result - ConciseSet res = op1.empty(); - res.words = new int[op1.lastWordIndex + op2.lastWordIndex + 3]; - res.lastWordIndex = op2.lastWordIndex; - - // ... then, copy op2 - System.arraycopy(op2.words, 0, res.words, 0, op2.lastWordIndex + 1); - - // ... finally, append op1 - WordIterator wordIterator = op1.new WordIterator(); - wordIterator.prepareNext(maxLiteralLengthDivision(op2.last) + 1); - wordIterator.flush(res); - if (op1.size < 0 || op2.size < 0) { - res.size = -1; - } else { - res.size = op1.size + op2.size; - } - res.last = op1.last; - res.compact(); - return res; - } - return null; - } - - @Override - public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) - { - ConciseSet res = oneWayCombineDisjointSets(op1, op2); - if (res == null) { - res = oneWayCombineDisjointSets(op2, op1); - } - return res; - } - }, - - /** - * @uml.property name="xOR" - * @uml.associationEnd - */ - XOR { - @Override - public int combineLiterals(int literal1, int literal2) - { - return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 ^ literal2); - } - - @Override - public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) - { - if (!op1.isEmpty()) { - return op1.clone(); - } - if (!op2.isEmpty()) { - return op2.clone(); - } - return op1.empty(); - } - - /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ - private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) - { - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(op1.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { - // op2 is left unchanged by op1 - if (isZeroSequence(op1.words[0])) { - return OR.combineDisjointSets(op1, op2); - } - // op2 must be complemented, then op1 must be appended - // it is better to perform it normally... - return null; - } - return null; - } - - @Override - public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) - { - ConciseSet res = oneWayCombineDisjointSets(op1, op2); - if (res == null) { - res = oneWayCombineDisjointSets(op2, op1); - } - return res; - } - }, - - /** - * @uml.property name="aNDNOT" - * @uml.associationEnd - */ - ANDNOT { - @Override - public int combineLiterals(int literal1, int literal2) - { - return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 & (~literal2)); - } - - @Override - public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) - { - if (!op1.isEmpty()) { - return op1.clone(); - } - return op1.empty(); - } - - @Override - public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) - { - // check whether the first operator starts with a sequence that - // completely "covers" the second operator - if (isSequenceWithNoBits(op1.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { - // op1 is left unchanged by op2 - if (isZeroSequence(op1.words[0])) { - return op1.clone(); - } - // op2 must be complemented, then op1 must be appended - // it is better to perform it normally... - return null; - } - // check whether the second operator starts with a sequence that - // completely "covers" the first operator - if (isSequenceWithNoBits(op2.words[0]) - && maxLiteralLengthMultiplication(getSequenceCount(op2.words[0]) + 1) > op1.last) { - // op1 is left unchanged by op2 - if (isZeroSequence(op2.words[0])) { - return op1.clone(); - } - // op1 is cleared by op2 - return op1.empty(); - } - return null; - } - },; - - /** - * Performs the operation on the given literals - * - * @param literal1 left operand - * @param literal2 right operand - * - * @return literal representing the result of the specified operation - */ - public abstract int combineLiterals(int literal1, int literal2); - - /** - * Performs the operation when one or both operands are empty set - *

- * NOTE: the caller MUST assure that one or both the operands - * are empty!!! - * - * @param op1 left operand - * @param op2 right operand - * - * @return null if both operands are non-empty - */ - public abstract ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2); - - /** - * Performs the operation in the special case of "disjoint" sets, namely - * when the first (or the second) operand starts with a sequence (it - * does not matter if 0's or 1's) that completely covers all the bits of - * the second (or the first) operand. - * - * @param op1 left operand - * @param op2 right operand - * - * @return null if operands are non-disjoint - */ - public abstract ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2); - } - - /** - * Iterator over the bits of a single literal/fill word - */ - private interface WordExpander - { - public boolean hasNext(); - - public boolean hasPrevious(); - - public int next(); - - public int previous(); - - public void skipAllAfter(int i); - - public void skipAllBefore(int i); - - public void reset(int offset, int word, boolean fromBeginning); - } - - /** - * Iterates over words, from the rightmost (LSB) to the leftmost (MSB). - *

- * When {@link ConciseSet#simulateWAH} is false, mixed - * sequences are "broken" into a literal (i.e., the first block is coded - * with a literal in {@link #word}) and a "pure" sequence (i.e., the - * remaining blocks are coded with a sequence with no bits in {@link #word}) - */ - private class WordIterator - { - /** - * copy of the current word - */ - int word; - - /** - * current word index - */ - int index; - - /** - * true if {@link #word} is a literal - */ - boolean isLiteral; - - /** - * number of blocks in the current word (1 for literals, > 1 for sequences) - */ - int count; - - /** - * Initialize data - */ - WordIterator() - { - isLiteral = false; - index = -1; - prepareNext(); - } - - /** - * @return true if there is no current word - */ - boolean exhausted() - { - return index > lastWordIndex; - } - - /** - * Prepare the next value for {@link #word} after skipping a given - * number of 31-bit blocks in the current sequence. - *

- * NOTE: it works only when the current word is within a - * sequence, namely a literal cannot be skipped. Moreover, the number of - * blocks to skip must be less than the remaining blocks in the current - * sequence. - * - * @param c number of 31-bit "blocks" to skip - * - * @return false if the next word does not exists - */ - boolean prepareNext(int c) - { - assert c <= count; - count -= c; - if (count == 0) { - return prepareNext(); - } - return true; - } - - /** - * Prepare the next value for {@link #word} - * - * @return false if the next word does not exists - */ - boolean prepareNext() - { - if (!simulateWAH && isLiteral && count > 1) { - count--; - isLiteral = false; - word = getSequenceWithNoBits(words[index]) - 1; - return true; - } - - index++; - if (index > lastWordIndex) { - return false; - } - word = words[index]; - isLiteral = isLiteral(word); - if (!isLiteral) { - count = getSequenceCount(word) + 1; - if (!simulateWAH && !isSequenceWithNoBits(word)) { - isLiteral = true; - int bit = (1 << (word >>> 25)) >>> 1; - word = isZeroSequence(word) - ? (ConciseSetUtils.ALL_ZEROS_LITERAL | bit) - : (ConciseSetUtils.ALL_ONES_LITERAL & ~bit); - } - } else { - count = 1; - } - return true; - } - - /** - * @return the literal word corresponding to each block contained in the - * current sequence word. Not to be used with literal words! - */ - int toLiteral() - { - assert !isLiteral; - return ConciseSetUtils.ALL_ZEROS_LITERAL | ((word << 1) >> ConciseSetUtils.MAX_LITERAL_LENGTH); - } - - /** - * Copies all the remaining words in the given set - * - * @param s set where the words must be copied - * - * @return false if there are no words to copy - */ - private boolean flush(ConciseSet s) - { - // nothing to flush - if (exhausted()) { - return false; - } - - // try to "compress" the first few words - do { - if (isLiteral) { - s.appendLiteral(word); - } else { - s.appendFill(count, word); - } - } while (prepareNext() && s.words[s.lastWordIndex] != word); - - // copy remaining words "as-is" - int delta = lastWordIndex - index + 1; - System.arraycopy(words, index, s.words, s.lastWordIndex + 1, delta); - s.lastWordIndex += delta; - s.last = last; - return true; - } - } - - /* - * DEBUGGING METHODS - */ - - /** - * Iterator over the bits of literal and zero-fill words - */ - private class LiteralAndZeroFillExpander implements WordExpander - { - final int[] buffer = new int[ConciseSetUtils.MAX_LITERAL_LENGTH]; - int len = 0; - int current = 0; - - @Override - public boolean hasNext() - { - return current < len; - } - - @Override - public boolean hasPrevious() - { - return current > 0; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return buffer[current++]; - } - - @Override - public int previous() - { - if (!hasPrevious()) { - throw new NoSuchElementException(); - } - return buffer[--current]; - } - - @Override - public void skipAllAfter(int i) - { - while (hasPrevious() && buffer[current - 1] > i) { - current--; - } - } - - @Override - public void skipAllBefore(int i) - { - while (hasNext() && buffer[current] < i) { - current++; - } - } - - @Override - public void reset(int offset, int word, boolean fromBeginning) - { - if (isLiteral(word)) { - len = 0; - for (int i = 0; i < ConciseSetUtils.MAX_LITERAL_LENGTH; i++) { - if ((word & (1 << i)) != 0) { - buffer[len++] = offset + i; - } - } - current = fromBeginning ? 0 : len; - } else { - if (isZeroSequence(word)) { - if (simulateWAH || isSequenceWithNoBits(word)) { - len = 0; - current = 0; - } else { - len = 1; - buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; - current = fromBeginning ? 0 : 1; - } - } else { - throw new RuntimeException("sequence of ones!"); - } - } - } - } - - /** - * Iterator over the bits of one-fill words - */ - private class OneFillExpander implements WordExpander - { - int firstInt = 1; - int lastInt = -1; - int current = 0; - int exception = -1; - - @Override - public boolean hasNext() - { - return current < lastInt; - } - - @Override - public boolean hasPrevious() - { - return current > firstInt; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - current++; - if (!simulateWAH && current == exception) { - current++; - } - return current; - } - - @Override - public int previous() - { - if (!hasPrevious()) { - throw new NoSuchElementException(); - } - current--; - if (!simulateWAH && current == exception) { - current--; - } - return current; - } - - @Override - public void skipAllAfter(int i) - { - if (i >= current) { - return; - } - current = i + 1; - } - - @Override - public void skipAllBefore(int i) - { - if (i <= current) { - return; - } - current = i - 1; - } - - @Override - public void reset(int offset, int word, boolean fromBeginning) - { - if (!isOneSequence(word)) { - throw new RuntimeException("NOT a sequence of ones!"); - } - firstInt = offset; - lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; - if (!simulateWAH) { - exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; - if (exception == firstInt) { - firstInt++; - } - if (exception == lastInt) { - lastInt--; - } - } - current = fromBeginning ? (firstInt - 1) : (lastInt + 1); - } - } - - /** - * Iterator for all the integers of a {@link ConciseSet} instance - */ - private class BitIterator implements IntIterator - { - /** - * @uml.property name="litExp" - * @uml.associationEnd - */ - final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); - /** - * @uml.property name="oneExp" - * @uml.associationEnd - */ - final OneFillExpander oneExp = new OneFillExpander(); - /** - * @uml.property name="exp" - * @uml.associationEnd - */ - WordExpander exp; - int nextIndex = 0; - int nextOffset = 0; - - private BitIterator() - { - nextWord(); - } - - private void nextWord() - { - final int word = words[nextIndex++]; - exp = isOneSequence(word) ? oneExp : litExp; - exp.reset(nextOffset, word, true); - - // prepare next offset - if (isLiteral(word)) { - nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - nextOffset += maxLiteralLengthMultiplication(getSequenceCount(word) + 1); - } - } - - @Override - public boolean hasNext() - { - return nextIndex <= lastWordIndex || exp.hasNext(); - } - - @Override - public int next() - { - while (!exp.hasNext()) { - if (nextIndex > lastWordIndex) { - throw new NoSuchElementException(); - } - nextWord(); - } - return exp.next(); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - - @Override - public void skipAllBefore(int element) - { - while (true) { - exp.skipAllBefore(element); - if (exp.hasNext() || nextIndex > lastWordIndex) { - return; - } - nextWord(); - } - } - - @Override - public IntIterator clone() - { - BitIterator retVal = new BitIterator(); - retVal.exp = exp; - retVal.nextIndex = nextIndex; - retVal.nextOffset = nextOffset; - return retVal; - } - } - - /** - * @author alessandrocolantonio - */ - private class ReverseBitIterator implements IntIterator - { - /** - * @uml.property name="litExp" - * @uml.associationEnd - */ - final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); - /** - * @uml.property name="oneExp" - * @uml.associationEnd - */ - final OneFillExpander oneExp = new OneFillExpander(); - /** - * @uml.property name="exp" - * @uml.associationEnd - */ - WordExpander exp; - int nextIndex = lastWordIndex; - int nextOffset = maxLiteralLengthMultiplication(maxLiteralLengthDivision(last) + 1); - int firstIndex; // first non-zero block - - ReverseBitIterator() - { - // identify the first non-zero block - if ((isSequenceWithNoBits(words[0]) && isZeroSequence(words[0])) || (isLiteral(words[0]) - && words[0] - == ConciseSetUtils.ALL_ZEROS_LITERAL)) { - firstIndex = 1; - } else { - firstIndex = 0; - } - previousWord(); - } - - void previousWord() - { - final int word = words[nextIndex--]; - exp = isOneSequence(word) ? oneExp : litExp; - if (isLiteral(word)) { - nextOffset -= ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - nextOffset -= maxLiteralLengthMultiplication(getSequenceCount(word) + 1); - } - exp.reset(nextOffset, word, false); - } - - @Override - public boolean hasNext() - { - return nextIndex >= firstIndex || exp.hasPrevious(); - } - - @Override - public int next() - { - while (!exp.hasPrevious()) { - if (nextIndex < firstIndex) { - throw new NoSuchElementException(); - } - previousWord(); - } - return exp.previous(); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - - @Override - public void skipAllBefore(int element) - { - while (true) { - exp.skipAllAfter(element); - if (exp.hasPrevious() || nextIndex < firstIndex) { - return; - } - previousWord(); - } - } - - @Override - public IntIterator clone() - { - ReverseBitIterator retVal = new ReverseBitIterator(); - retVal.exp = exp; - retVal.nextIndex = nextIndex; - retVal.nextOffset = nextOffset; - retVal.firstIndex = firstIndex; - return retVal; - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java deleted file mode 100755 index b9465d9dc3de..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java +++ /dev/null @@ -1,563 +0,0 @@ -package io.druid.extendedset.intset; - -import io.druid.extendedset.utilities.BitCount; - -import java.util.NoSuchElementException; - -/** - */ -public class ConciseSetUtils -{ - /** - * The highest representable integer. - *

- * Its value is computed as follows. The number of bits required to - * represent the longest sequence of 0's or 1's is - * ceil(log2(({@link Integer#MAX_VALUE} - 31) / 31)) = 27. - * Indeed, at least one literal exists, and the other bits may all be 0's or - * 1's, that is {@link Integer#MAX_VALUE} - 31. If we use: - *

    - *
  • 2 bits for the sequence type; - *
  • 5 bits to indicate which bit is set; - *
- * then 32 - 5 - 2 = 25 is the number of available bits to - * represent the maximum sequence of 0's and 1's. Thus, the maximal bit that - * can be set is represented by a number of 0's equals to - * 31 * (1 << 25), followed by a literal with 30 0's and the - * MSB (31st bit) equal to 1 - */ - public final static int MAX_ALLOWED_INTEGER = 31 * (1 << 25) + 30; // 1040187422 - - /** - * The lowest representable integer. - */ - public final static int MIN_ALLOWED_SET_BIT = 0; - - /** - * Maximum number of representable bits within a literal - */ - public final static int MAX_LITERAL_LENGTH = 31; - - /** - * Literal that represents all bits set to 1 (and MSB = 1) - */ - public final static int ALL_ONES_LITERAL = 0xFFFFFFFF; - - /** - * Literal that represents all bits set to 0 (and MSB = 1) - */ - public final static int ALL_ZEROS_LITERAL = 0x80000000; - - /** - * All bits set to 1 and MSB = 0 - */ - public final static int ALL_ONES_WITHOUT_MSB = 0x7FFFFFFF; - - /** - * Sequence bit - */ - public final static int SEQUENCE_BIT = 0x40000000; - - /** - * Calculates the modulus division by 31 in a faster way than using n % 31 - *

- * This method of finding modulus division by an integer that is one less - * than a power of 2 takes at most O(lg(32)) time. The number of operations - * is at most 12 + 9 * ceil(lg(32)). - *

- * See http://graphics.stanford.edu/~seander/bithacks.html - * - * @param n number to divide - * - * @return n % 31 - */ - public static int maxLiteralLengthModulus(int n) - { - int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); - m = (m >>> 15) + (m & 0x00007FFF); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - if (m <= 31) { - return m == 31 ? 0 : m; - } - m = (m >>> 5) + (m & 0x0000001F); - return m == 31 ? 0 : m; - } - - /** - * Calculates the multiplication by 31 in a faster way than using n * 31 - * - * @param n number to multiply - * - * @return n * 31 - */ - public static int maxLiteralLengthMultiplication(int n) - { - return (n << 5) - n; - } - - /** - * Calculates the division by 31 - * - * @param n number to divide - * - * @return n / 31 - */ - public static int maxLiteralLengthDivision(int n) - { - return n / 31; - } - - /** - * Checks whether a word is a literal one - * - * @param word word to check - * - * @return true if the given word is a literal word - */ - public static boolean isLiteral(int word) - { - // "word" must be 1* - // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" - return (word & 0x80000000) != 0; - } - - /** - * Checks whether a word contains a sequence of 1's - * - * @param word word to check - * - * @return true if the given word is a sequence of 1's - */ - public static boolean isOneSequence(int word) - { - // "word" must be 01* - return (word & 0xC0000000) == SEQUENCE_BIT; - } - - /** - * Checks whether a word contains a sequence of 0's - * - * @param word word to check - * - * @return true if the given word is a sequence of 0's - */ - public static boolean isZeroSequence(int word) - { - // "word" must be 00* - return (word & 0xC0000000) == 0; - } - - /** - * Checks whether a word contains a sequence of 0's with no set bit, or 1's - * with no unset bit. - *

- * NOTE: when {@link #simulateWAH} is true, it is - * equivalent to (and as fast as) !{@link #isLiteral(int)} - * - * @param word word to check - * - * @return true if the given word is a sequence of 0's or 1's - * but with no (un)set bit - */ - public static boolean isSequenceWithNoBits(int word) - { - // "word" must be 0?00000* - return (word & 0xBE000000) == 0x00000000; - } - - /** - * Gets the number of blocks of 1's or 0's stored in a sequence word - * - * @param word word to check - * - * @return the number of blocks that follow the first block of 31 bits - */ - public static int getSequenceCount(int word) - { - // get the 25 LSB bits - return word & 0x01FFFFFF; - } - - public static int getSequenceNumWords(int word) - { - return getSequenceCount(word) + 1; - } - - /** - * Clears the (un)set bit in a sequence - * - * @param word word to check - * - * @return the sequence corresponding to the given sequence and with no - * (un)set bits - */ - public static int getSequenceWithNoBits(int word) - { - // clear 29 to 25 LSB bits - return (word & 0xC1FFFFFF); - } - - /** - * Gets the literal word that represents the first 31 bits of the given the - * word (i.e. the first block of a sequence word, or the bits of a literal word). - *

- * If the word is a literal, it returns the unmodified word. In case of a - * sequence, it returns a literal that represents the first 31 bits of the - * given sequence word. - * - * @param word word to check - * - * @return the literal contained within the given word, with the most - * significant bit set to 1. - */ - public static int getLiteral(int word, boolean simulateWAH) - { - if (isLiteral(word)) { - return word; - } - - if (simulateWAH) { - return isZeroSequence(word) ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL; - } - - // get bits from 30 to 26 and use them to set the corresponding bit - // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent - // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set - int literal = (1 << (word >>> 25)) >>> 1; - return isZeroSequence(word) - ? (ALL_ZEROS_LITERAL | literal) - : (ALL_ONES_LITERAL & ~literal); - } - - public static int getLiteralFromZeroSeqFlipBit(int word) - { - int flipBit = getFlippedBit(word); - if (flipBit > -1) { - return ALL_ZEROS_LITERAL | flipBitAsBinaryString(flipBit); - } - return ALL_ZEROS_LITERAL; - } - - public static int getLiteralFromOneSeqFlipBit(int word) - { - int flipBit = getFlippedBit(word); - if (flipBit > -1) { - return ALL_ONES_LITERAL ^ flipBitAsBinaryString(flipBit); - } - return ALL_ONES_LITERAL; - } - - /** - * Gets the position of the flipped bit within a sequence word. If the - * sequence has no set/unset bit, returns -1. - *

- * Note that the parameter must a sequence word, otherwise the - * result is meaningless. - * - * @param word sequence word to check - * - * @return the position of the set bit, from 0 to 31. If the sequence has no - * set/unset bit, returns -1. - */ - public static int getFlippedBit(int word) - { - // get bits from 30 to 26 - // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set - return ((word >>> 25) & 0x0000001F) - 1; - } - - public static int flipBitAsBinaryString(int flipBit) - { - return ((Number) Math.pow(2, flipBit)).intValue(); - } - - /** - * Gets the number of set bits within the literal word - * - * @param word literal word - * - * @return the number of set bits within the literal word - */ - public static int getLiteralBitCount(int word) - { - return BitCount.count(getLiteralBits(word)); - } - - /** - * Gets the bits contained within the literal word - * - * @param word literal word - * - * @return the literal word with the most significant bit cleared - */ - public static int getLiteralBits(int word) - { - return ALL_ONES_WITHOUT_MSB & word; - } - - public static boolean isAllOnesLiteral(int word) - { - return (word & -1) == -1; - } - - public static boolean isAllZerosLiteral(int word) - { - return (word | 0x80000000) == 0x80000000; - } - - public static boolean isLiteralWithSingleZeroBit(int word) - { - return isLiteral(word) && (Integer.bitCount(~word) == 1); - } - - public static boolean isLiteralWithSingleOneBit(int word) - { - return isLiteral(word) && (Integer.bitCount(word) == 2); - } - - public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit) - { - return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); - } - - public static int onesUntil(int bit) - { - return 0x80000000 | ((1 << bit) - 1); - } - - public static LiteralAndZeroFillExpander newLiteralAndZeroFillExpander() - { - return new LiteralAndZeroFillExpander(); - } - - public static OneFillExpander newOneFillExpander() - { - return new OneFillExpander(); - } - - public interface WordExpander - { - public boolean hasNext(); - - public boolean hasPrevious(); - - public int next(); - - public int previous(); - - public void skipAllAfter(int i); - - public void skipAllBefore(int i); - - public void reset(int offset, int word, boolean fromBeginning); - - public WordExpander clone(); - } - - /** - * Iterator over the bits of literal and zero-fill words - */ - public static class LiteralAndZeroFillExpander implements WordExpander - { - final int[] buffer = new int[MAX_LITERAL_LENGTH]; - int len = 0; - int current = 0; - - @Override - public boolean hasNext() - { - return current < len; - } - - @Override - public boolean hasPrevious() - { - return current > 0; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return buffer[current++]; - } - - @Override - public int previous() - { - if (!hasPrevious()) { - throw new NoSuchElementException(); - } - return buffer[--current]; - } - - @Override - public void skipAllAfter(int i) - { - while (hasPrevious() && buffer[current - 1] > i) { - current--; - } - } - - @Override - public void skipAllBefore(int i) - { - while (hasNext() && buffer[current] < i) { - current++; - } - } - - @Override - public void reset(int offset, int word, boolean fromBeginning) - { - if (isLiteral(word)) { - len = 0; - for (int i = 0; i < MAX_LITERAL_LENGTH; i++) { - if ((word & (1 << i)) != 0) { - buffer[len++] = offset + i; - } - } - current = fromBeginning ? 0 : len; - } else { - if (isZeroSequence(word)) { - if (isSequenceWithNoBits(word)) { - len = 0; - current = 0; - } else { - len = 1; - buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; - current = fromBeginning ? 0 : 1; - } - } else { - throw new RuntimeException("sequence of ones!"); - } - } - } - - @Override - public WordExpander clone() - { - LiteralAndZeroFillExpander retVal = new LiteralAndZeroFillExpander(); - System.arraycopy(buffer, 0, retVal.buffer, 0, buffer.length); - retVal.len = len; - retVal.current = current; - return retVal; - } - } - - /** - * Iterator over the bits of one-fill words - */ - public static class OneFillExpander implements WordExpander - { - int firstInt = 1; - int lastInt = -1; - int current = 0; - int exception = -1; - - @Override - public boolean hasNext() - { - return current < lastInt; - } - - @Override - public boolean hasPrevious() - { - return current > firstInt; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - current++; - if (current == exception) { - current++; - } - return current; - } - - @Override - public int previous() - { - if (!hasPrevious()) { - throw new NoSuchElementException(); - } - current--; - if (current == exception) { - current--; - } - return current; - } - - @Override - public void skipAllAfter(int i) - { - if (i >= current) { - return; - } - current = i + 1; - } - - @Override - public void skipAllBefore(int i) - { - if (i <= current) { - return; - } - current = i - 1; - } - - @Override - public void reset(int offset, int word, boolean fromBeginning) - { - if (!isOneSequence(word)) { - throw new RuntimeException("NOT a sequence of ones!"); - } - firstInt = offset; - lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; - - exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; - if (exception == firstInt) { - firstInt++; - } - if (exception == lastInt) { - lastInt--; - } - - current = fromBeginning ? (firstInt - 1) : (lastInt + 1); - } - - @Override - public WordExpander clone() - { - OneFillExpander retVal = new OneFillExpander(); - retVal.firstInt = firstInt; - retVal.lastInt = lastInt; - retVal.current = current; - retVal.exception = exception; - return retVal; - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java deleted file mode 100755 index d0f6fe4bd169..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java +++ /dev/null @@ -1,1403 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.intset; - - -import io.druid.extendedset.utilities.BitCount; - -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.Arrays; -import java.util.BitSet; -import java.util.Collection; -import java.util.Formatter; -import java.util.Locale; -import java.util.NoSuchElementException; - -/** - * An {@link IntSet} implementation, representing a set of integers, based on an - * uncompressed bitmap. - *

- * It actually is an extension of {@link BitSet}. More specifically, union and - * intersection operations are mainly derived from the code of {@link BitSet} to - * provide bitwise "or" and "and". - *

- * The iterator implemented for this class allows for modifications during the - * iteration, that is it is possible to add/remove elements through - * {@link #add(int)}, {@link #remove(int)}, {@link #addAll(IntSet)}, - * {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, - * {@link IntIterator#next()} returns the first integral greater than the last - * visited one. - * - * @author Alessandro Colantonio - * @version $Id$ - */ -public class FastSet extends AbstractIntSet implements java.io.Serializable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = 6519808981110513440L; - - /** - * number of bits within each word - */ - private final static int WORD_SIZE = 32; - - /** - * 32-bit string of all 1's - */ - private static final int ALL_ONES_WORD = 0xFFFFFFFF; - - /** - * all bits, grouped in blocks of length 32 - */ - private int[] words; - - /** - * index of the first empty word, that is the number of words in the logical - * size of this {@link FastSet} - */ - private transient int firstEmptyWord; - - /** - * cached set size (only for fast size() call). When -1, the cache is invalid - */ - private transient int size; - - /** - * Creates a new, empty set. - */ - public FastSet() - { - clear(); - } - - /** - * Creates a new, empty set. It preallocates the space for - * maxWordsInUse words. - */ - private FastSet(int wordsToAllocate) - { - firstEmptyWord = 0; - size = 0; - words = new int[wordsToAllocate]; - } - - /** - * Given a number, it returns the multiplication by the number of bits for each block - */ - private static int multiplyByWordSize(int i) - { - return i << 5; // i * WORD_SIZE; - } - - /** - * Given a bit index, it returns the index of the word containing it - */ - private static int wordIndex(int bitIndex) - { - if (bitIndex < 0) { - throw new IndexOutOfBoundsException("index < 0: " + bitIndex); - } - return bitIndex >> 5; - } - - /** - * Given a bit index, it returns the index of the word containing it - */ - private static int wordIndexNoCheck(int bitIndex) - { - return bitIndex >> 5; - } - - /** - * Generates the 32-bit binary representation of a given word (debug only) - * - * @param word word to represent - * - * @return 32-character string that represents the given word - */ - private static String toBinaryString(int word) - { - String lsb = Integer.toBinaryString(word); - StringBuilder pad = new StringBuilder(); - for (int i = lsb.length(); i < 32; i++) { - pad.append('0'); - } - return pad.append(lsb).toString(); - } - - /** - * Sets the field {@link #firstEmptyWord} with the logical size in words of the - * bit set. - */ - private void fixFirstEmptyWord() - { - int i = firstEmptyWord - 1; - final int[] localWords = words; // faster - while (i >= 0 && localWords[i] == 0) { - i--; - } - firstEmptyWord = i + 1; - } - - /** - * Ensures that the {@link FastSet} can hold enough words. - * - * @param wordsRequired the minimum acceptable number of words. - */ - private void ensureCapacity(int wordsRequired) - { - if (words.length >= wordsRequired) { - return; - } - int newLength = Math.max(words.length << 1, wordsRequired); - words = Arrays.copyOf(words, newLength); - } - - /** - * Ensures that the {@link FastSet} can accommodate a given word index - * - * @param wordIndex the index to be accommodated. - */ - private void expandTo(int wordIndex) - { - int wordsRequired = wordIndex + 1; - if (firstEmptyWord < wordsRequired) { - ensureCapacity(wordsRequired); - firstEmptyWord = wordsRequired; - } - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - FastSet res = new FastSet(); - res.firstEmptyWord = firstEmptyWord; - res.size = size; - res.words = Arrays.copyOf(words, firstEmptyWord); - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - int h = 1; - final int[] localWords = words; // faster - for (int i = 0; i < firstEmptyWord; i++) { - h = (h << 5) - h + localWords[i]; - } - return h; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof FastSet)) { - return super.equals(obj); - } - - final FastSet other = (FastSet) obj; - if (firstEmptyWord != other.firstEmptyWord) { - return false; - } - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - for (int i = 0; i < firstEmptyWord; i++) { - if (localWords[i] != localOtherWords[i]) { - return false; - } - } - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return firstEmptyWord == 0; - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - // check if the cached size is invalid - if (size < 0) { - size = BitCount.count(words, firstEmptyWord); - } - return size; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(int i) - { - int wordIndex = wordIndex(i); - expandTo(wordIndex); - int before = words[wordIndex]; - words[wordIndex] |= (1 << i); - if (before != words[wordIndex]) { - if (size >= 0) { - size++; - } - return true; - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(int i) - { - if (i < 0) { - return false; - } - - int wordIndex = wordIndex(i); - if (wordIndex >= firstEmptyWord) { - return false; - } - int before = words[wordIndex]; - words[wordIndex] &= ~(1 << i); - if (before != words[wordIndex]) { - if (size >= 0) { - size--; - } - fixFirstEmptyWord(); - return true; - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - if (c == null || c.isEmpty() || this == c) { - return false; - } - - final FastSet other = convert(c); - - int wordsInCommon = Math.min(firstEmptyWord, other.firstEmptyWord); - - boolean modified = false; - if (firstEmptyWord < other.firstEmptyWord) { - modified = true; - ensureCapacity(other.firstEmptyWord); - firstEmptyWord = other.firstEmptyWord; - } - - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - // Perform logical OR on words in common - for (int i = 0; i < wordsInCommon; i++) { - int before = localWords[i]; - localWords[i] |= localOtherWords[i]; - modified = modified || before != localWords[i]; - } - - // Copy any remaining words - if (wordsInCommon < other.firstEmptyWord) { - modified = true; - System.arraycopy( - other.words, wordsInCommon, words, - wordsInCommon, firstEmptyWord - wordsInCommon - ); - } - if (modified) { - size = -1; - } - return modified; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - if (c == null || c.isEmpty() || isEmpty()) { - return false; - } - if (c == this) { - clear(); - return true; - } - - final FastSet other = convert(c); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - // Perform logical (a & !b) on words in common - boolean modified = false; - for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { - int before = localWords[i]; - localWords[i] &= ~localOtherWords[i]; - modified = modified || before != localWords[i]; - } - if (modified) { - fixFirstEmptyWord(); - size = -1; - } - return modified; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - if (isEmpty() || c == this) { - return false; - } - if (c == null || c.isEmpty()) { - clear(); - return true; - } - - final FastSet other = convert(c); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - boolean modified = false; - if (firstEmptyWord > other.firstEmptyWord) { - modified = true; - while (firstEmptyWord > other.firstEmptyWord) { - localWords[--firstEmptyWord] = 0; - } - } - - // Perform logical AND on words in common - for (int i = 0; i < firstEmptyWord; i++) { - int before = localWords[i]; - localWords[i] &= localOtherWords[i]; - modified = modified || before != localWords[i]; - } - if (modified) { - fixFirstEmptyWord(); - size = -1; - } - return modified; - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - words = new int[10]; - firstEmptyWord = 0; - size = 0; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(int i) - { - if (isEmpty() || i < 0) { - return false; - } - int wordIndex = wordIndexNoCheck(i); - return (wordIndex < firstEmptyWord) - && ((words[wordIndex] & (1 << i)) != 0); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - if (c == null || c.isEmpty() || c == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final FastSet other = convert(c); - - if (other.firstEmptyWord > firstEmptyWord) { - return false; - } - - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - for (int i = 0; i < other.firstEmptyWord; i++) { - int o = localOtherWords[i]; - if ((localWords[i] & o) != o) { - return false; - } - } - return true; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet c, int minElements) - { - if (minElements < 1) { - throw new IllegalArgumentException(); - } - if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { - return false; - } - if (this == c) { - return size() >= minElements; - } - - final FastSet other = convert(c); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - int count = 0; - for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { - count += BitCount.count(localWords[i] & localOtherWords[i]); - if (count >= minElements) { - return true; - } - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet c) - { - if (c == null || c.isEmpty() || c == this) { - return true; - } - if (isEmpty()) { - return false; - } - - final FastSet other = convert(c); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { - if ((localWords[i] & localOtherWords[i]) != 0) { - return true; - } - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet c) - { - if (c == null || c.isEmpty()) { - return 0; - } - if (c == this) { - return size(); - } - if (isEmpty()) { - return 0; - } - - final FastSet other = convert(c); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - int count = 0; - for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { - count += BitCount.count(localWords[i] & localOtherWords[i]); - } - return count; - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator iterator() - { - return new BitIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator descendingIterator() - { - return new ReverseBitIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public int last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - return multiplyByWordSize(firstEmptyWord - 1) - + (WORD_SIZE - Integer.numberOfLeadingZeros(words[firstEmptyWord - 1])) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - if (isEmpty()) { - return; - } - if (size > 0) { - size = last() - size + 1; - } - int lastWordMask = ALL_ONES_WORD >>> Integer.numberOfLeadingZeros(words[firstEmptyWord - 1]); - final int[] localWords = words; // faster - for (int i = 0; i < firstEmptyWord - 1; i++) { - localWords[i] ^= ALL_ONES_WORD; - } - localWords[firstEmptyWord - 1] ^= lastWordMask; - fixFirstEmptyWord(); - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet complemented() - { - FastSet clone = clone(); - clone.complement(); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet empty() - { - return new FastSet(); - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return 1D; - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return (double) firstEmptyWord / size(); - } - - /** - * Convert a given collection to a {@link FastSet} instance - */ - private FastSet convert(IntSet c) - { - if (c instanceof FastSet) { - return (FastSet) c; - } - if (c == null) { - return new FastSet(); - } - - FastSet res = new FastSet(); - IntIterator itr = c.iterator(); - while (itr.hasNext()) { - res.add(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet convert(Collection c) - { - FastSet res = empty(); - if (c != null) { - for (int i : c) { - res.add(i); - } - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet convert(int... a) - { - FastSet res = new FastSet(); - if (a != null) { - for (int i : a) { - res.add(i); - } - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(int fromIndex, int toIndex) - { - if (fromIndex > toIndex) { - throw new IndexOutOfBoundsException( - "fromIndex: " + fromIndex - + " > toIndex: " + toIndex - ); - } - if (fromIndex == toIndex) { - add(fromIndex); - return; - } - - // Increase capacity if necessary - int startWordIndex = wordIndex(fromIndex); - int endWordIndex = wordIndex(toIndex); - expandTo(endWordIndex); - - final int[] localWords = words; // faster - - boolean modified = false; - int firstWordMask = ALL_ONES_WORD << fromIndex; - int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); - if (startWordIndex == endWordIndex) { - // Case 1: One word - int before = localWords[startWordIndex]; - localWords[startWordIndex] |= (firstWordMask & lastWordMask); - modified = localWords[startWordIndex] != before; - } else { - // Case 2: Multiple words - // Handle first word - int before = localWords[startWordIndex]; - localWords[startWordIndex] |= firstWordMask; - modified = localWords[startWordIndex] != before; - - // Handle intermediate words, if any - for (int i = startWordIndex + 1; i < endWordIndex; i++) { - modified = modified || localWords[i] != ALL_ONES_WORD; - localWords[i] = ALL_ONES_WORD; - } - - // Handle last word - before = localWords[endWordIndex]; - localWords[endWordIndex] |= lastWordMask; - modified = modified || localWords[endWordIndex] != before; - } - if (modified) { - size = -1; - } - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(int fromIndex, int toIndex) - { - if (fromIndex > toIndex) { - throw new IndexOutOfBoundsException( - "fromIndex: " + fromIndex - + " > toIndex: " + toIndex - ); - } - if (fromIndex == toIndex) { - remove(fromIndex); - return; - } - - int startWordIndex = wordIndex(fromIndex); - if (startWordIndex >= firstEmptyWord) { - return; - } - - int endWordIndex = wordIndex(toIndex); - if (endWordIndex >= firstEmptyWord) { - toIndex = last(); - endWordIndex = firstEmptyWord - 1; - } - - final int[] localWords = words; // faster - - boolean modified = false; - int firstWordMask = ALL_ONES_WORD << fromIndex; - int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); - if (startWordIndex == endWordIndex) { - // Case 1: One word - int before = localWords[startWordIndex]; - localWords[startWordIndex] &= ~(firstWordMask & lastWordMask); - modified = localWords[startWordIndex] != before; - } else { - // Case 2: Multiple words - // Handle first word - int before = localWords[startWordIndex]; - localWords[startWordIndex] &= ~firstWordMask; - modified = localWords[startWordIndex] != before; - - // Handle intermediate words, if any - for (int i = startWordIndex + 1; i < endWordIndex; i++) { - modified = modified || localWords[i] != 0; - localWords[i] = 0; - } - - // Handle last word - before = localWords[endWordIndex]; - localWords[endWordIndex] &= ~lastWordMask; - modified = modified || localWords[endWordIndex] != before; - } - if (modified) { - fixFirstEmptyWord(); - size = -1; - } - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(int e) - { - int wordIndex = wordIndex(e); - expandTo(wordIndex); - int mask = (1 << e); - words[wordIndex] ^= mask; - fixFirstEmptyWord(); - if (size >= 0) { - if ((words[wordIndex] & mask) == 0) { - size--; - } else { - size++; - } - } - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(IntSet o) - { - // empty set cases - if (this.isEmpty() && o.isEmpty()) { - return 0; - } - if (this.isEmpty()) { - return -1; - } - if (o.isEmpty()) { - return 1; - } - - final FastSet other = convert(o); - final int[] localWords = words; // faster - final int[] localOtherWords = other.words; // faster - - if (firstEmptyWord > other.firstEmptyWord) { - return 1; - } - if (firstEmptyWord < other.firstEmptyWord) { - return -1; - } - for (int i = firstEmptyWord - 1; i >= 0; i--) { - long w1 = localWords[i] & 0xFFFFFFFFL; - long w2 = localOtherWords[i] & 0xFFFFFFFFL; - int res = w1 < w2 ? -1 : (w1 > w2 ? 1 : 0); - if (res != 0) { - return res; - } - } - return 0; - } - - /** - * {@inheritDoc} - */ - @Override - public int get(int index) - { - if (index < 0) { - throw new IndexOutOfBoundsException(); - } - - int count = 0; - final int[] localWords = words; // faster - for (int j = 0; j < firstEmptyWord; j++) { - int w = localWords[j]; - int current = BitCount.count(w); - if (index < count + current) { - int bit = -1; - for (int skip = index - count; skip >= 0; skip--) { - bit = Integer.numberOfTrailingZeros(w & (ALL_ONES_WORD << (bit + 1))); - } - return multiplyByWordSize(j) + bit; - } - count += current; - } - throw new NoSuchElementException(); - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(int e) - { - if (e < 0) { - throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); - } - if (isEmpty()) { - return -1; - } - - int index = wordIndex(e); - if (index >= firstEmptyWord || (words[index] & (1 << e)) == 0) { - return -1; - } - int count = BitCount.count(words, index); - count += BitCount.count(words[index] & ~(ALL_ONES_WORD << e)); - return count; - - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet intersection(IntSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return empty(); - } - if (other == this) { - return clone(); - } - - final FastSet o = convert(other); - FastSet res = new FastSet(Math.min(firstEmptyWord, o.firstEmptyWord)); - res.firstEmptyWord = res.words.length; - - final int[] localWords = words; // faster - final int[] localOtherWords = o.words; // faster - final int[] localResWords = res.words; // faster - - for (int i = 0; i < res.firstEmptyWord; i++) { - localResWords[i] = localWords[i] & localOtherWords[i]; - } - res.fixFirstEmptyWord(); - res.size = -1; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet union(IntSet other) - { - if (other == null || other.isEmpty() || this == other) { - return clone(); - } - - final FastSet o = convert(other); - if (isEmpty()) { - return o.clone(); - } - - FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); - res.firstEmptyWord = res.words.length; - final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); - - final int[] localWords = words; // faster - final int[] localOtherWords = o.words; // faster - final int[] localResWords = res.words; // faster - - for (int i = 0; i < wordsInCommon; i++) { - localResWords[i] = localWords[i] | localOtherWords[i]; - } - - if (wordsInCommon < firstEmptyWord) { - System.arraycopy( - localWords, wordsInCommon, localResWords, wordsInCommon, - res.firstEmptyWord - wordsInCommon - ); - } - if (wordsInCommon < o.firstEmptyWord) { - System.arraycopy( - localOtherWords, wordsInCommon, localResWords, wordsInCommon, - res.firstEmptyWord - wordsInCommon - ); - } - res.size = -1; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet difference(IntSet other) - { - if (other == null || other.isEmpty()) { - return clone(); - } - if (other == this || isEmpty()) { - return empty(); - } - - final FastSet o = convert(other); - FastSet res = new FastSet(firstEmptyWord); - res.firstEmptyWord = firstEmptyWord; - - final int[] localWords = words; // faster - final int[] localOtherWords = o.words; // faster - final int[] localResWords = res.words; // faster - - int i = 0; - final int m = Math.min(firstEmptyWord, o.firstEmptyWord); - for (; i < m; i++) { - localResWords[i] = localWords[i] & ~localOtherWords[i]; - } - if (i < firstEmptyWord) { - System.arraycopy(localWords, i, localResWords, i, firstEmptyWord - i); - } else { - res.fixFirstEmptyWord(); - } - res.size = -1; - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public FastSet symmetricDifference(IntSet other) - { - if (other == null || other.isEmpty()) { - return clone(); - } - if (other == this) { - return empty(); - } - - final FastSet o = convert(other); - if (isEmpty()) { - return o.clone(); - } - - FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); - res.firstEmptyWord = res.words.length; - final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); - - final int[] localWords = words; // faster - final int[] localOtherWords = o.words; // faster - final int[] localResWords = res.words; // faster - - for (int i = 0; i < wordsInCommon; i++) { - localResWords[i] = localWords[i] ^ localOtherWords[i]; - } - - if (wordsInCommon < firstEmptyWord) { - System.arraycopy( - localWords, wordsInCommon, localResWords, wordsInCommon, - res.firstEmptyWord - wordsInCommon - ); - } else if (wordsInCommon < o.firstEmptyWord) { - System.arraycopy( - localOtherWords, wordsInCommon, localResWords, wordsInCommon, - res.firstEmptyWord - wordsInCommon - ); - } else { - res.fixFirstEmptyWord(); - } - res.size = -1; - return res; - } - - /** - * Save the state of the {@link ConciseSet}instance to a stream - */ - private void writeObject(ObjectOutputStream s) throws IOException - { - assert words != null; - if (firstEmptyWord < words.length) { - words = Arrays.copyOf(words, firstEmptyWord); - } - s.defaultWriteObject(); - } - - /** - * Reconstruct the {@link ConciseSet} instance from a stream - */ - private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException - { - s.defaultReadObject(); - firstEmptyWord = words.length; - size = -1; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); - final Formatter f = new Formatter(s, Locale.ENGLISH); - - if (isEmpty()) { - return s.append("null\n").toString(); - } - - // elements - f.format("Elements: %s\n", toString()); - - // raw representation of words - for (int i = 0; i < firstEmptyWord; i++) { - f.format( - "words[%d] = %s (from %d to %d)\n", - Integer.valueOf(i), - toBinaryString(words[i]), - Integer.valueOf(multiplyByWordSize(i)), - Integer.valueOf(multiplyByWordSize(i + 1) - 1) - ); - } - - // object attributes - f.format("wordsInUse: %d\n", firstEmptyWord); - f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); - f.format("words.length: %d\n", words.length); - - // compression - f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); - f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); - - return s.toString(); - } - - /** - * Iterates over bits - *

- * This iterator allows for modifications during the iteration, that is it - * is possible to add/remove elements through {@link #add(int)}, - * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, - * {@link IntIterator#next()} returns the first integral greater than the - * last visited one. - */ - private class BitIterator implements IntIterator - { - private int nextIndex; - private int nextBit; - private int last; - - /** - * identify the first bit - */ - private BitIterator() - { - nextIndex = 0; - if (isEmpty()) { - return; - } - - last = -1; // unused! - - // find the first non-empty word - while (words[nextIndex] == 0) { - nextIndex++; - } - - // find the first set bit - nextBit = Integer.numberOfTrailingZeros(words[nextIndex]); - } - - /** - * find the first set bit after nextIndex + nextBit - */ - void prepareNext() - { - // find the next set bit within the current word - int w = words[nextIndex]; - while ((++nextBit < WORD_SIZE)) { - if ((w & (1 << nextBit)) != 0) { - return; - } - } - - // find the first non-empty word - do { - if (++nextIndex == firstEmptyWord) { - return; - } - } while ((w = words[nextIndex]) == 0); - nextBit = Integer.numberOfTrailingZeros(w); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean hasNext() - { - return nextIndex < firstEmptyWord; - } - - /** - * {@inheritDoc} - */ - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - last = multiplyByWordSize(nextIndex) + nextBit; - prepareNext(); - return last; - } - - /** - * {@inheritDoc} - */ - @Override - public void skipAllBefore(int element) - { - if (element <= 0 || element <= last) { - return; - } - - // identify where the element is - int newNextIndex = wordIndexNoCheck(element); - int newNextBit = element & (WORD_SIZE - 1); - if (newNextIndex < nextIndex || (newNextIndex == nextIndex && newNextBit <= nextBit)) { - return; - } - - // "element" is the next item to return, unless it does not exist - nextIndex = newNextIndex; - if (nextIndex >= firstEmptyWord) { - return; - } - nextBit = newNextBit; - if ((words[nextIndex] & (1 << nextBit)) == 0) { - prepareNext(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void remove() - { - FastSet.this.remove(last); - } - - @Override - public IntIterator clone() - { - BitIterator retVal = new BitIterator(); - retVal.nextIndex = nextIndex; - retVal.nextBit = nextBit; - retVal.last = last; - return retVal; - } - } - - /** - * Iterates over bits in reverse order - *

- * This iterator allows for modifications during the iteration, that is it - * is possible to add/remove elements through {@link #add(int)}, - * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, - * {@link IntIterator#next()} returns the first integral greater than the - * last visited one. - */ - private class ReverseBitIterator implements IntIterator - { - private int nextIndex; - private int nextBit; - private int last; - - /** - * identify the first bit - */ - private ReverseBitIterator() - { - nextIndex = firstEmptyWord - 1; - if (isEmpty()) { - return; - } - - last = Integer.MAX_VALUE; // unused! - nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(words[nextIndex]) - 1; - } - - /** - * find the first set bit after nextIndex + nextBit - */ - void prepareNext() - { - // find the next set bit within the current word - int w = words[nextIndex]; - while ((--nextBit >= 0)) { - if ((w & (1 << nextBit)) != 0) { - return; - } - } - - // find the first non-empty word - do { - if (--nextIndex == -1) { - return; - } - } while ((w = words[nextIndex]) == 0); - nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(w) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean hasNext() - { - return nextIndex >= 0; - } - - /** - * {@inheritDoc} - */ - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - last = multiplyByWordSize(nextIndex) + nextBit; - prepareNext(); - return last; - } - - /** - * {@inheritDoc} - */ - @Override - public void skipAllBefore(int element) - { - if (element < 0) { - nextIndex = -1; - return; - } - if (element >= last) { - return; - } - - // identify where the element is - int newNextIndex = wordIndexNoCheck(element); - int newNextBit = element & (WORD_SIZE - 1); - if (newNextIndex > nextIndex || (newNextIndex == nextIndex && newNextBit >= nextBit)) { - return; - } - - // "element" is the next item to return, unless it does not exist - nextIndex = newNextIndex; - nextBit = newNextBit; - if ((words[nextIndex] & (1 << nextBit)) == 0) { - prepareNext(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void remove() - { - FastSet.this.remove(last); - } - - @Override - public IntIterator clone() - { - BitIterator retVal = new BitIterator(); - retVal.nextIndex = nextIndex; - retVal.nextBit = nextBit; - retVal.last = last; - return retVal; - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java deleted file mode 100755 index e81434a890bb..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java +++ /dev/null @@ -1,1012 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.intset; - -// update CompactIdentityHashSet.java, UniqueSet.java and -// SoftHashMapIndex.java accordingly. - -import io.druid.extendedset.utilities.IntHashCode; - -import java.util.Arrays; -import java.util.Collection; -import java.util.ConcurrentModificationException; -import java.util.NoSuchElementException; - -/** - * Implements a fast hash-set. - *

- * Inspired by http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ - * ontopia/utils/CompactHashSet.java - * - * @author Alessandro Colantonio - * @version $Id: HashIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ - */ -public class HashIntSet extends AbstractIntSet -{ - protected final static int INITIAL_SIZE = 3; - protected final static double LOAD_FACTOR = 0.75D; - - /** - * empty cell - */ - protected final static int EMPTY = -1; - - /** - * When an object is deleted this object is put into the hashtable in its - * place, so that other objects with the same key (collisions) further down - * the hashtable are not lost after we delete an object in the collision - * chain. - */ - protected final static int REMOVED = -2; - - /** - * number of elements - */ - protected int size; - - /** - * This is the number of empty cells. It's not necessarily the same as - * objects.length - elements, because some cells may contain REMOVED. - */ - protected int freecells; - - /** - * cells - */ - protected int[] cells; - - /** - * concurrent modification during iteration - */ - protected int modCount; - - /** - * Constructs a new, empty set. - */ - public HashIntSet() - { - this(INITIAL_SIZE); - } - - /** - * Constructs a new, empty set. - * - * @param initialSize - */ - public HashIntSet(int initialSize) - { - if (initialSize <= 0) { - throw new IllegalArgumentException(); - } - cells = new int[initialSize]; - modCount = 0; - clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator iterator() - { - return new SortedIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntIterator descendingIterator() - { - return new DescendingSortedIterator(); - } - - /** - * Similar to {@link #iterator()}, but with no particular order - * - * @return iterator with no sorting - */ - public IntIterator unsortedIterator() - { - return new UnsortedIterator(); - } - - /** - * Returns the number of elements in this set (its cardinality). - */ - @Override - public int size() - { - return size; - } - - /** - * Returns true if this set contains no elements. - */ - @Override - public boolean isEmpty() - { - return size == 0; - } - - /** - * Compute the index of the element - * - * @param o element to search - * - * @return index of the element in {@link #cells} - */ - private final int toIndex(int o) - { - return (o & 0x7FFFFFFF) % cells.length; - } - - /** - * Find position of the integer in {@link #cells}. If not found, returns the - * first empty cell. - * - * @param element element to search - * - * @return if returned value >=0, it returns the index of the - * element; if returned value <0, the index of the - * first empty cell is -(returned value - 1) - */ - private int findElementOrEmpty(int element) - { - assert element >= 0; - int index = toIndex(IntHashCode.hashCode(element)); - int offset = 1; - - while (cells[index] != EMPTY) { - // element found! - if (cells[index] == element) { - return index; - } - - // compute the next index to check - index = toIndex(index + offset); - offset <<= 1; - offset++; - if (offset < 0) { - offset = 2; - } - } - - // element not found! - return -(index + 1); - } - - /** - * Find position of the integer in {@link #cells}. If not found, returns the - * first removed cell. - * - * @param element element to search - * - * @return if returned value >=0, it returns the index of the - * element; if returned value <0, the index of the - * first empty cell is -(returned value - 1) - */ - private int findElementOrRemoved(int element) - { - assert element >= 0; - int index = toIndex(IntHashCode.hashCode(element)); - int offset = 1; - int removed = -1; - - while (cells[index] != EMPTY) { - // element found! - if (cells[index] == element) { - return index; - } - - // remember the last removed cell if we don't find the element - if (cells[index] == REMOVED) { - removed = index; - } - - index = toIndex(index + offset); - offset <<= 1; - offset++; - if (offset < 0) { - offset = 2; - } - } - if (removed >= 0) { - return -(removed + 1); - } - return index; - } - - /** - * Returns true if this set contains the specified element. - * - * @param element element whose presence in this set is to be tested. - * - * @return true if this set contains the specified element. - */ - @Override - public boolean contains(int element) - { - if (element < 0) { - throw new IndexOutOfBoundsException("element < 0: " + element); - } - if (isEmpty()) { - return false; - } - return findElementOrEmpty(element) >= 0; - } - - /** - * Adds the specified element to this set if it is not already present. - * - * @param element element to be added to this set. - * - * @return true if the set did not already contain the specified - * element. - */ - @Override - public boolean add(int element) - { - if (element < 0) { - throw new IndexOutOfBoundsException("element < 0: " + element); - } - int index = findElementOrRemoved(element); - if (index >= 0) { - if (cells[index] == element) { - return false; - } - freecells--; - } else { - index = -(index + 1); - } - - modCount++; - size++; - - // set the integer - cells[index] = element; - - // do we need to rehash? - if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { - rehash(); - } - return true; - } - - /** - * Removes the specified element from the set. - */ - @Override - public boolean remove(int element) - { - if (element < 0) { - throw new IndexOutOfBoundsException("element < 0: " + element); - } - int index = findElementOrEmpty(element); - if (index < 0) { - return false; - } - - cells[index] = REMOVED; - modCount++; - size--; - return true; - } - - /** - * Removes all of the elements from this set. - */ - @Override - public void clear() - { - size = 0; - Arrays.fill(cells, EMPTY); - freecells = cells.length; - modCount++; - } - - /** - * Figures out correct size for rehashed set, then does the rehash. - */ - protected void rehash() - { - // do we need to increase capacity, or are there so many - // deleted objects hanging around that rehashing to the same - // size is sufficient? if 5% (arbitrarily chosen number) of - // cells can be freed up by a rehash, we do it. - - int gargagecells = cells.length - (size + freecells); - if ((double) gargagecells / cells.length > 0.05D) - // rehash with same size - { - rehash(cells.length); - } else - // rehash with increased capacity - { - rehash((cells.length << 1) + 1); - } - } - - /** - * Rehashes to a bigger size. - */ - protected void rehash(int newCapacity) - { - HashIntSet rehashed = new HashIntSet(newCapacity); - @SuppressWarnings("hiding") - int[] cells = rehashed.cells; - for (int element : this.cells) { - if (element < 0) - // removed or empty - { - continue; - } - - // add the element - cells[-(rehashed.findElementOrEmpty(element) + 1)] = element; - } - this.cells = cells; - freecells = newCapacity - size; - modCount++; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - boolean res = false; - while (itr.hasNext()) { - res |= add(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - boolean res = false; - while (itr.hasNext()) { - res |= remove(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - if (c == null || c.isEmpty()) { - return false; - } - boolean res = false; - for (int i = 0; i < cells.length; i++) { - if (cells[i] >= 0 && !c.contains(cells[i])) { - cells[i] = REMOVED; - res = true; - size--; - } - } - if (res) { - modCount++; - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet clone() - { - HashIntSet cloned = new HashIntSet(cells.length); - System.arraycopy(cells, 0, cloned.cells, 0, cells.length); - cloned.freecells = freecells; - cloned.size = size; - cloned.modCount = 0; - return cloned; - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - if (isEmpty()) { - return 0D; - } - return cells.length / Math.ceil(last() / 32D); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return isEmpty() ? 0D : (double) cells.length / size(); - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet complemented() - { - return (HashIntSet) super.complemented(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - boolean res = true; - while (res && itr.hasNext()) { - res &= contains(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet c) - { - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - boolean res = true; - while (res && itr.hasNext()) { - if (contains(itr.next())) { - return true; - } - } - return false; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet c, int minElements) - { - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - while (minElements > 0 && itr.hasNext()) { - if (contains(itr.next())) { - minElements--; - } - } - return minElements == 0; - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet convert(int... a) - { - HashIntSet res = new HashIntSet((int) (a.length / LOAD_FACTOR) + 1); - for (int e : a) { - res.add(e); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet convert(Collection c) - { - HashIntSet res = new HashIntSet((int) (c.size() / LOAD_FACTOR) + 1); - for (int e : c) { - res.add(e); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return "size: " + size + ", freecells: " + freecells + ", " - + Arrays.toString(cells); - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet symmetricDifference(IntSet c) - { - HashIntSet res = clone(); - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - while (itr.hasNext()) { - res.flip(itr.next()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet union(IntSet other) - { - return (HashIntSet) super.union(other); - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet difference(IntSet other) - { - return (HashIntSet) super.difference(other); - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet intersection(IntSet other) - { - return (HashIntSet) super.intersection(other); - } - - /** - * {@inheritDoc} - */ - @Override - public HashIntSet empty() - { - return new HashIntSet(); - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(int element) - { - if (element < 0) { - throw new IndexOutOfBoundsException("element < 0: " + element); - } - modCount++; - int index = findElementOrRemoved(element); - if (index >= 0) { - // REMOVE - if (cells[index] == element) { - cells[index] = REMOVED; - size--; - return; - } - freecells--; - } else { - index = -(index + 1); - } - - // ADD - cells[index] = element; - size++; - - // do we need to rehash? - if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { - rehash(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public int get(int i) - { - return toArray()[i]; - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(int e) - { - if (e < 0) { - throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); - } - return Arrays.binarySearch(toArray(), e); - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet c) - { - int res = 0; - IntIterator itr; - if (c instanceof HashIntSet) { - itr = ((HashIntSet) c).unsortedIterator(); - } else { - itr = c.iterator(); - } - while (itr.hasNext()) { - if (contains(itr.next())) { - res++; - } - } - return res; - - } - - /** - * {@inheritDoc} - */ - @Override - public int last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - int max = 0; - for (int element : cells) { - if (max < element) { - max = element; - } - } - return max; - } - - /** - * {@inheritDoc} - */ - @Override - public int first() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - int min = Integer.MAX_VALUE; - for (int element : cells) { - if (element >= 0 && min > element) { - min = element; - } - } - return min; - } - - /** - * {@inheritDoc} - */ - @Override - public int[] toArray(int[] a) - { - if (a.length < size) { - throw new IllegalArgumentException(); - } - if (isEmpty()) { - return a; - } - int i = 0; - for (int element : this.cells) { - if (element < 0) - // removed or empty - { - continue; - } - - // copy the element - a[i++] = element; - } - Arrays.sort(a, 0, size); - return a; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return Arrays.toString(toArray()); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - if (isEmpty()) { - return 0; - } - int h = 1; - for (int e : cells) { - if (e >= 0) { - h ^= IntHashCode.hashCode(e); - } - } - return h; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof HashIntSet)) { - return super.equals(obj); - } - final HashIntSet other = (HashIntSet) obj; - if (size != other.size) { - return false; - } - for (int e : other.cells) { - if (e >= 0 && !contains(e)) { - return false; - } - } - return true; - } - - /** - * Iterates over the hashset, with no sorting - */ - private class UnsortedIterator implements IntIterator - { - private int nextIndex = 0; - private int current = -1; - private int expectedModCount = modCount; - - public UnsortedIterator() - { - nextIndex = 0; - skipEmpty(); - expectedModCount = modCount; - } - - void skipEmpty() - { - while (nextIndex < cells.length - && (cells[nextIndex] == EMPTY || cells[nextIndex] == REMOVED)) { - nextIndex++; - } - } - - @Override - public boolean hasNext() - { - return nextIndex < cells.length; - } - - @Override - public int next() - { - if (modCount != expectedModCount) { - throw new ConcurrentModificationException(); - } - if (nextIndex >= cells.length) { - throw new NoSuchElementException(); - } - - current = nextIndex; - nextIndex++; - skipEmpty(); - return cells[current]; - } - - @Override - public void remove() - { - if (modCount != expectedModCount) { - throw new ConcurrentModificationException(); - } - if (current < 0) { - throw new IllegalStateException(); - } - // delete object - cells[current] = REMOVED; - size--; - modCount++; - expectedModCount = modCount; // this is expected! - current = -1; - } - - @Override - public void skipAllBefore(int element) - { - throw new UnsupportedOperationException(); - } - - @Override - public IntIterator clone() - { - UnsortedIterator retVal = new UnsortedIterator(); - retVal.nextIndex = nextIndex; - retVal.current = current; - retVal.expectedModCount = expectedModCount; - return retVal; - } - } - - /** - * Iterates over the hashset, with no sorting - */ - private class SortedIterator implements IntIterator - { - int[] elements = toArray(); - int next = 0; - - @Override - public boolean hasNext() - { - return next < size; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return elements[next++]; - } - - @Override - public void remove() - { - if (elements[next - 1] == REMOVED) { - throw new IllegalStateException(); - } - HashIntSet.this.remove(elements[next - 1]); - elements[next - 1] = REMOVED; - } - - @Override - public void skipAllBefore(int element) - { - if (element <= elements[next]) { - return; - } - next = Arrays.binarySearch(elements, next + 1, size, element); - if (next < 0) { - next = -(next + 1); - } - } - - @Override - public IntIterator clone() - { - SortedIterator retVal = new SortedIterator(); - retVal.next = next; - retVal.elements = elements.clone(); - return retVal; - } - } - - /** - * Iterates over the hashset, with no sorting - */ - private class DescendingSortedIterator implements IntIterator - { - int[] elements = toArray(); - int next = size - 1; - - @Override - public boolean hasNext() - { - return next >= 0; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return elements[next--]; - } - - @Override - public void remove() - { - if (elements[next + 1] == REMOVED) { - throw new IllegalStateException(); - } - HashIntSet.this.remove(elements[next + 1]); - elements[next + 1] = REMOVED; - } - - @Override - public void skipAllBefore(int element) - { - if (element >= elements[next]) { - return; - } - next = Arrays.binarySearch(elements, 0, next, element); - if (next < 0) { - next = -(next + 1) - 1; - } - } - - @Override - public IntIterator clone() - { - DescendingSortedIterator retVal = new DescendingSortedIterator(); - retVal.elements = elements.clone(); - retVal.next = next; - return retVal; - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java deleted file mode 100755 index 6dbf8bb1d0d8..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java +++ /dev/null @@ -1,1157 +0,0 @@ -/* -* Copyright 2012 Metamarkets Group Inc. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package io.druid.extendedset.intset; - - -import com.google.common.collect.Iterators; -import com.google.common.collect.Lists; -import com.google.common.collect.MinMaxPriorityQueue; -import com.google.common.collect.UnmodifiableIterator; -import com.google.common.primitives.Ints; -import io.druid.extendedset.utilities.IntList; - -import java.nio.ByteBuffer; -import java.nio.IntBuffer; -import java.util.Arrays; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; - -public class ImmutableConciseSet -{ - private final static int CHUNK_SIZE = 10000; - private final IntBuffer words; - private final int lastWordIndex; - private final int size; - - public ImmutableConciseSet() - { - this.words = null; - this.lastWordIndex = -1; - this.size = 0; - } - - public ImmutableConciseSet(ByteBuffer byteBuffer) - { - this.words = byteBuffer.asIntBuffer(); - this.lastWordIndex = words.capacity() - 1; - this.size = calcSize(); - } - - public ImmutableConciseSet(IntBuffer buffer) - { - this.words = buffer; - this.lastWordIndex = (words == null || buffer.capacity() == 0) ? -1 : words.capacity() - 1; - this.size = calcSize(); - } - - public static ImmutableConciseSet newImmutableFromMutable(ConciseSet conciseSet) - { - if (conciseSet == null || conciseSet.isEmpty()) { - return new ImmutableConciseSet(); - } - return new ImmutableConciseSet(IntBuffer.wrap(conciseSet.getWords())); - } - - public static int compareInts(int x, int y) - { - return (x < y) ? -1 : ((x == y) ? 0 : 1); - } - - public static ImmutableConciseSet union(ImmutableConciseSet... sets) - { - return union(Arrays.asList(sets)); - } - - public static ImmutableConciseSet union(Iterable sets) - { - return union(sets.iterator()); - } - - public static ImmutableConciseSet union(Iterator sets) - { - ImmutableConciseSet partialResults = doUnion(Iterators.limit(sets, CHUNK_SIZE)); - while (sets.hasNext()) { - final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); - partialResults = doUnion(Iterators.concat(partialIter, Iterators.limit(sets, CHUNK_SIZE))); - } - return partialResults; - } - - public static ImmutableConciseSet intersection(ImmutableConciseSet... sets) - { - return intersection(Arrays.asList(sets)); - } - - public static ImmutableConciseSet intersection(Iterable sets) - { - return intersection(sets.iterator()); - } - - public static ImmutableConciseSet intersection(Iterator sets) - { - ImmutableConciseSet partialResults = doIntersection(Iterators.limit(sets, CHUNK_SIZE)); - while (sets.hasNext()) { - final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); - partialResults = doIntersection( - Iterators.concat(Iterators.limit(sets, CHUNK_SIZE), partialIter) - ); - } - return partialResults; - } - - public static ImmutableConciseSet complement(ImmutableConciseSet set) - { - return doComplement(set); - } - - public static ImmutableConciseSet complement(ImmutableConciseSet set, int length) - { - if (length <= 0) { - return new ImmutableConciseSet(); - } - - // special case when the set is empty and we need a concise set of ones - if (set == null || set.isEmpty()) { - final int leftoverBits = length % 31; - final int onesBlocks = length / 31; - final int[] words; - if (onesBlocks > 0) { - if (leftoverBits > 0) { - words = new int[]{ - ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1), - ConciseSetUtils.onesUntil(leftoverBits) - }; - } else { - words = new int[]{ - ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1) - }; - } - } else { - if (leftoverBits > 0) { - words = new int[]{ConciseSetUtils.onesUntil(leftoverBits)}; - } else { - words = new int[]{}; - } - } - ConciseSet newSet = new ConciseSet(words, false); - return ImmutableConciseSet.newImmutableFromMutable(newSet); - } - - IntList retVal = new IntList(); - int endIndex = length - 1; - - int wordsWalked = 0; - int last = 0; - - WordIterator iter = set.newWordIterator(); - - while (iter.hasNext()) { - int word = iter.next(); - wordsWalked = iter.wordsWalked; - if (ConciseSetUtils.isLiteral(word)) { - retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); - } else { - retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); - } - } - - last = set.getLast(); - - int distFromLastWordBoundary = ConciseSetUtils.maxLiteralLengthModulus(last); - int distToNextWordBoundary = ConciseSetUtils.MAX_LITERAL_LENGTH - distFromLastWordBoundary - 1; - last = (last < 0) ? 0 : last + distToNextWordBoundary; - - int diff = endIndex - last; - // only append a new literal when the end index is beyond the current word - if (diff > 0) { - // first check if the difference can be represented in 31 bits - if (diff <= ConciseSetUtils.MAX_LITERAL_LENGTH) { - retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); - } else { - // create a fill from last set bit to endIndex for number of 31 bit blocks minus one - int endIndexWordCount = ConciseSetUtils.maxLiteralLengthDivision(endIndex); - retVal.add(ConciseSetUtils.SEQUENCE_BIT | (endIndexWordCount - wordsWalked - 1)); - retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); - } - } - - // clear bits after last set value - int lastWord = retVal.get(retVal.length() - 1); - if (ConciseSetUtils.isLiteral(lastWord)) { - lastWord = ConciseSetUtils.clearBitsAfterInLastWord( - lastWord, - ConciseSetUtils.maxLiteralLengthModulus(endIndex) - ); - } - - retVal.set(retVal.length() - 1, lastWord); - trimZeros(retVal); - - if (retVal.isEmpty()) { - return new ImmutableConciseSet(); - } - return compact(new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()))); - } - - public static ImmutableConciseSet compact(ImmutableConciseSet set) - { - IntList retVal = new IntList(); - WordIterator itr = set.newWordIterator(); - while (itr.hasNext()) { - addAndCompact(retVal, itr.next()); - } - return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); - } - - private static void addAndCompact(IntList set, int wordToAdd) - { - int length = set.length(); - if (set.isEmpty()) { - set.add(wordToAdd); - return; - } - - int last = set.get(length - 1); - - int newWord = 0; - if (ConciseSetUtils.isAllOnesLiteral(last)) { - if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { - newWord = 0x40000001; - } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = wordToAdd + 1; - } - } else if (ConciseSetUtils.isOneSequence(last)) { - if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { - newWord = last + 1; - } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); - } - } else if (ConciseSetUtils.isAllZerosLiteral(last)) { - if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { - newWord = 0x00000001; - } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = wordToAdd + 1; - } - } else if (ConciseSetUtils.isZeroSequence(last)) { - if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { - newWord = last + 1; - } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); - } - } else if (ConciseSetUtils.isLiteralWithSingleOneBit(last)) { - int position = Integer.numberOfTrailingZeros(last) + 1; - if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { - newWord = 0x00000001 | (position << 25); - } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = (wordToAdd + 1) | (position << 25); - } - } else if (ConciseSetUtils.isLiteralWithSingleZeroBit(last)) { - int position = Integer.numberOfTrailingZeros(~last) + 1; - if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { - newWord = 0x40000001 | (position << 25); - } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { - newWord = (wordToAdd + 1) | (position << 25); - } - } - - if (newWord != 0) { - set.set(length - 1, newWord); - } else { - set.add(wordToAdd); - } - } - - private static ImmutableConciseSet doUnion(Iterator sets) - { - IntList retVal = new IntList(); - - // lhs = current word position, rhs = the iterator - // Comparison is first by index, then one fills > literals > zero fills - // one fills are sorted by length (longer one fills have priority) - // similarily, shorter zero fills have priority - MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( - new Comparator() - { - @Override - public int compare(WordHolder h1, WordHolder h2) - { - int w1 = h1.getWord(); - int w2 = h2.getWord(); - int s1 = h1.getIterator().startIndex; - int s2 = h2.getIterator().startIndex; - - if (s1 != s2) { - return compareInts(s1, s2); - } - - if (ConciseSetUtils.isOneSequence(w1)) { - if (ConciseSetUtils.isOneSequence(w2)) { - return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); - } - return -1; - } else if (ConciseSetUtils.isLiteral(w1)) { - if (ConciseSetUtils.isOneSequence(w2)) { - return 1; - } else if (ConciseSetUtils.isLiteral(w2)) { - return 0; - } - return -1; - } else { - if (!ConciseSetUtils.isZeroSequence(w2)) { - return 1; - } - return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); - } - } - } - ).create(); - - // populate priority queue - while (sets.hasNext()) { - ImmutableConciseSet set = sets.next(); - - if (set != null && !set.isEmpty()) { - WordIterator itr = set.newWordIterator(); - theQ.add(new WordHolder(itr.next(), itr)); - } - } - - int currIndex = 0; - - while (!theQ.isEmpty()) { - // create a temp list to hold everything that will get pushed back into the priority queue after each run - List wordsToAdd = Lists.newArrayList(); - - // grab the top element from the priority queue - WordHolder curr = theQ.poll(); - int word = curr.getWord(); - WordIterator itr = curr.getIterator(); - - // if the next word in the queue starts at a different point than where we ended off we need to create a zero gap - // to fill the space - if (currIndex < itr.startIndex) { - addAndCompact(retVal, itr.startIndex - currIndex - 1); - currIndex = itr.startIndex; - } - - if (ConciseSetUtils.isOneSequence(word)) { - // extract a literal from the flip bits of the one sequence - int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); - - // advance everything past the longest ones sequence - WordHolder nextVal = theQ.peek(); - while (nextVal != null && - nextVal.getIterator().startIndex < itr.wordsWalked) { - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - if (i.startIndex == itr.startIndex) { - // if a literal was created from a flip bit, OR it with other literals or literals from flip bits in the same - // position - if (ConciseSetUtils.isOneSequence(w)) { - flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); - } else if (ConciseSetUtils.isLiteral(w)) { - flipBitLiteral |= w; - } else { - flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); - } - } - - i.advanceTo(itr.wordsWalked); - if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } - nextVal = theQ.peek(); - } - - // advance longest one literal forward and push result back to priority queue - // if a flip bit is still needed, put it in the correct position - int newWord = word & 0xC1FFFFFF; - if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { - flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL; - int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; - newWord |= (position << 25); - } - addAndCompact(retVal, newWord); - currIndex = itr.wordsWalked; - - if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } - } else if (ConciseSetUtils.isLiteral(word)) { - // advance all other literals - WordHolder nextVal = theQ.peek(); - while (nextVal != null && - nextVal.getIterator().startIndex == itr.startIndex) { - - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - // if we still have zero fills with flipped bits, OR them here - if (ConciseSetUtils.isLiteral(w)) { - word |= w; - } else { - int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); - if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { - word |= flipBitLiteral; - i.advanceTo(itr.wordsWalked); - } - } - - if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } - - nextVal = theQ.peek(); - } - - // advance the set with the current literal forward and push result back to priority queue - addAndCompact(retVal, word); - currIndex++; - - if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } - } else { // zero fills - int flipBitLiteral; - WordHolder nextVal = theQ.peek(); - - while (nextVal != null && - nextVal.getIterator().startIndex == itr.startIndex) { - // check if literal can be created flip bits of other zero sequences - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); - if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { - wordsToAdd.add(new WordHolder(flipBitLiteral, i)); - } else if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } - nextVal = theQ.peek(); - } - - // check if a literal needs to be created from the flipped bits of this sequence - flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); - if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { - wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); - } else if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } - } - - theQ.addAll(wordsToAdd); - } - - if (retVal.isEmpty()) { - return new ImmutableConciseSet(); - } - return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); - } - - public static ImmutableConciseSet doIntersection(Iterator sets) - { - IntList retVal = new IntList(); - - // lhs = current word position, rhs = the iterator - // Comparison is first by index, then zero fills > literals > one fills - // zero fills are sorted by length (longer zero fills have priority) - // similarily, shorter one fills have priority - MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( - new Comparator() - { - @Override - public int compare(WordHolder h1, WordHolder h2) - { - int w1 = h1.getWord(); - int w2 = h2.getWord(); - int s1 = h1.getIterator().startIndex; - int s2 = h2.getIterator().startIndex; - - if (s1 != s2) { - return compareInts(s1, s2); - } - - if (ConciseSetUtils.isZeroSequence(w1)) { - if (ConciseSetUtils.isZeroSequence(w2)) { - return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); - } - return -1; - } else if (ConciseSetUtils.isLiteral(w1)) { - if (ConciseSetUtils.isZeroSequence(w2)) { - return 1; - } else if (ConciseSetUtils.isLiteral(w2)) { - return 0; - } - return -1; - } else { - if (!ConciseSetUtils.isOneSequence(w2)) { - return 1; - } - return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); - } - } - } - ).create(); - - // populate priority queue - while (sets.hasNext()) { - ImmutableConciseSet set = sets.next(); - - if (set == null || set.isEmpty()) { - return new ImmutableConciseSet(); - } - - WordIterator itr = set.newWordIterator(); - theQ.add(new WordHolder(itr.next(), itr)); - } - - int currIndex = 0; - int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE; - - while (!theQ.isEmpty()) { - // create a temp list to hold everything that will get pushed back into the priority queue after each run - List wordsToAdd = Lists.newArrayList(); - - // grab the top element from the priority queue - WordHolder curr = theQ.poll(); - int word = curr.getWord(); - WordIterator itr = curr.getIterator(); - - // if a sequence has ended, we can break out because of Boolean logic - if (itr.startIndex >= wordsWalkedAtSequenceEnd) { - break; - } - - // if the next word in the queue starts at a different point than where we ended off we need to create a one gap - // to fill the space - if (currIndex < itr.startIndex) { - // number of 31 bit blocks that compromise the fill minus one - addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1))); - currIndex = itr.startIndex; - } - - if (ConciseSetUtils.isZeroSequence(word)) { - // extract a literal from the flip bits of the zero sequence - int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); - - // advance everything past the longest zero sequence - WordHolder nextVal = theQ.peek(); - while (nextVal != null && - nextVal.getIterator().startIndex < itr.wordsWalked) { - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - if (i.startIndex == itr.startIndex) { - // if a literal was created from a flip bit, AND it with other literals or literals from flip bits in the same - // position - if (ConciseSetUtils.isZeroSequence(w)) { - flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); - } else if (ConciseSetUtils.isLiteral(w)) { - flipBitLiteral &= w; - } else { - flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); - } - } - - i.advanceTo(itr.wordsWalked); - if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } else { - wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); - } - nextVal = theQ.peek(); - } - - // advance longest zero literal forward and push result back to priority queue - // if a flip bit is still needed, put it in the correct position - int newWord = word & 0xC1FFFFFF; - if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { - int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; - newWord = (word & 0xC1FFFFFF) | (position << 25); - } - addAndCompact(retVal, newWord); - currIndex = itr.wordsWalked; - - if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } else { - wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); - } - } else if (ConciseSetUtils.isLiteral(word)) { - // advance all other literals - WordHolder nextVal = theQ.peek(); - while (nextVal != null && - nextVal.getIterator().startIndex == itr.startIndex) { - - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - // if we still have one fills with flipped bits, AND them here - if (ConciseSetUtils.isLiteral(w)) { - word &= w; - } else { - int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); - if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { - word &= flipBitLiteral; - i.advanceTo(itr.wordsWalked); - } - } - - if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } else { - wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); - } - - nextVal = theQ.peek(); - } - - // advance the set with the current literal forward and push result back to priority queue - addAndCompact(retVal, word); - currIndex++; - - if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } else { - wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); - } - } else { // one fills - int flipBitLiteral; - WordHolder nextVal = theQ.peek(); - - while (nextVal != null && - nextVal.getIterator().startIndex == itr.startIndex) { - // check if literal can be created flip bits of other one sequences - WordHolder entry = theQ.poll(); - int w = entry.getWord(); - WordIterator i = entry.getIterator(); - - flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); - if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { - wordsToAdd.add(new WordHolder(flipBitLiteral, i)); - } else if (i.hasNext()) { - wordsToAdd.add(new WordHolder(i.next(), i)); - } else { - wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); - } - - nextVal = theQ.peek(); - } - - // check if a literal needs to be created from the flipped bits of this sequence - flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); - if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { - wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); - } else if (itr.hasNext()) { - wordsToAdd.add(new WordHolder(itr.next(), itr)); - } else { - wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); - } - } - - theQ.addAll(wordsToAdd); - } - - // fill in any missing one sequences - if (currIndex < wordsWalkedAtSequenceEnd) { - addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1))); - } - - if (retVal.isEmpty()) { - return new ImmutableConciseSet(); - } - return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); - } - - public static ImmutableConciseSet doComplement(ImmutableConciseSet set) - { - if (set == null || set.isEmpty()) { - return new ImmutableConciseSet(); - } - - IntList retVal = new IntList(); - WordIterator iter = set.newWordIterator(); - while (iter.hasNext()) { - int word = iter.next(); - if (ConciseSetUtils.isLiteral(word)) { - retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); - } else { - retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); - } - } - // do not complement after the last element - int lastWord = retVal.get(retVal.length() - 1); - if (ConciseSetUtils.isLiteral(lastWord)) { - lastWord = ConciseSetUtils.clearBitsAfterInLastWord( - lastWord, - ConciseSetUtils.maxLiteralLengthModulus(set.getLast()) - ); - } - - retVal.set(retVal.length() - 1, lastWord); - - trimZeros(retVal); - - if (retVal.isEmpty()) { - return new ImmutableConciseSet(); - } - return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - private static void trimZeros(IntList set) - { - // loop over ALL_ZEROS_LITERAL words - int w; - int last = set.length() - 1; - do { - w = set.get(last); - if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { - set.set(last, 0); - last--; - } else if (ConciseSetUtils.isZeroSequence(w)) { - if (ConciseSetUtils.isSequenceWithNoBits(w)) { - set.set(last, 0); - last--; - } else { - // convert the sequence in a 1-bit literal word - set.set(last, ConciseSetUtils.getLiteral(w, false)); - return; - } - } else { - // one sequence or literal - return; - } - if (set.isEmpty() || last == -1) { - return; - } - } while (true); - } - - public byte[] toBytes() - { - if (words == null) { - return new byte[]{}; - } - ByteBuffer buf = ByteBuffer.allocate(words.capacity() * Ints.BYTES); - buf.asIntBuffer().put(words.asReadOnlyBuffer()); - return buf.array(); - } - - public int getLastWordIndex() - { - return lastWordIndex; - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - private int calcSize() - { - int retVal = 0; - for (int i = 0; i <= lastWordIndex; i++) { - int w = words.get(i); - if (ConciseSetUtils.isLiteral(w)) { - retVal += ConciseSetUtils.getLiteralBitCount(w); - } else { - if (ConciseSetUtils.isZeroSequence(w)) { - if (!ConciseSetUtils.isSequenceWithNoBits(w)) { - retVal++; - } - } else { - retVal += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); - if (!ConciseSetUtils.isSequenceWithNoBits(w)) { - retVal--; - } - } - } - } - - return retVal; - } - - public int size() - { - return size; - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - public int getLast() - { - if (isEmpty()) { - return -1; - } - - int last = 0; - for (int i = 0; i <= lastWordIndex; i++) { - int w = words.get(i); - if (ConciseSetUtils.isLiteral(w)) { - last += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - last += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); - } - } - - int w = words.get(lastWordIndex); - if (ConciseSetUtils.isLiteral(w)) { - last -= Integer.numberOfLeadingZeros(ConciseSetUtils.getLiteralBits(w)); - } else { - last--; - } - return last; - } - - public boolean contains(final int integer) - { - if (isEmpty()) { - return false; - } - final IntSet.IntIterator intIterator = iterator(); - intIterator.skipAllBefore(integer); - return intIterator.hasNext() && intIterator.next() == integer; - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - public int get(int i) - { - if (i < 0) { - throw new IndexOutOfBoundsException(); - } - - // initialize data - int firstSetBitInWord = 0; - int position = i; - int setBitsInCurrentWord = 0; - for (int j = 0; j <= lastWordIndex; j++) { - int w = words.get(j); - if (ConciseSetUtils.isLiteral(w)) { - // number of bits in the current word - setBitsInCurrentWord = ConciseSetUtils.getLiteralBitCount(w); - - // check if the desired bit is in the current word - if (position < setBitsInCurrentWord) { - int currSetBitInWord = -1; - for (; position >= 0; position--) { - currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); - } - return firstSetBitInWord + currSetBitInWord; - } - - // skip the 31-bit block - firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - // number of involved bits (31 * blocks) - int sequenceLength = ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); - - // check the sequence type - if (ConciseSetUtils.isOneSequence(w)) { - if (ConciseSetUtils.isSequenceWithNoBits(w)) { - setBitsInCurrentWord = sequenceLength; - if (position < setBitsInCurrentWord) { - return firstSetBitInWord + position; - } - } else { - setBitsInCurrentWord = sequenceLength - 1; - if (position < setBitsInCurrentWord) - // check whether the desired set bit is after the - // flipped bit (or after the first block) - { - return firstSetBitInWord + position + (position < ConciseSetUtils.getFlippedBit(w) ? 0 : 1); - } - } - } else { - if (ConciseSetUtils.isSequenceWithNoBits(w)) { - setBitsInCurrentWord = 0; - } else { - setBitsInCurrentWord = 1; - if (position == 0) { - return firstSetBitInWord + ConciseSetUtils.getFlippedBit(w); - } - } - } - - // skip the 31-bit blocks - firstSetBitInWord += sequenceLength; - } - - // update the number of found set bits - position -= setBitsInCurrentWord; - } - - throw new IndexOutOfBoundsException(Integer.toString(i)); - } - - public int compareTo(ImmutableConciseSet other) - { - return words.asReadOnlyBuffer().compareTo(other.words.asReadOnlyBuffer()); - } - - private boolean isEmpty() - { - return words == null || words.limit() == 0; - } - - @Override - // Based on the AbstractIntSet implementation by Alessandro Colantonio - public String toString() - { - IntSet.IntIterator itr = iterator(); - if (!itr.hasNext()) { - return "[]"; - } - - StringBuilder sb = new StringBuilder(); - sb.append('['); - for (; ; ) { - sb.append(itr.next()); - if (!itr.hasNext()) { - return sb.append(']').toString(); - } - sb.append(", "); - } - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - public IntSet.IntIterator iterator() - { - if (isEmpty()) { - return new IntSet.IntIterator() - { - @Override - public void skipAllBefore(int element) {/*empty*/} - - @Override - public boolean hasNext() {return false;} - - @Override - public int next() {throw new NoSuchElementException();} - - @Override - public void remove() {throw new UnsupportedOperationException();} - - @Override - public IntSet.IntIterator clone() {throw new UnsupportedOperationException();} - }; - } - return new BitIterator(); - } - - public WordIterator newWordIterator() - { - return new WordIterator(); - } - - private static class WordHolder - { - private final int word; - private final WordIterator iterator; - - public WordHolder( - int word, - WordIterator iterator - ) - { - this.word = word; - this.iterator = iterator; - } - - public int getWord() - { - return word; - } - - public WordIterator getIterator() - { - return iterator; - } - } - - // Based on the ConciseSet implementation by Alessandro Colantonio - private class BitIterator implements IntSet.IntIterator - { - final ConciseSetUtils.LiteralAndZeroFillExpander litExp; - final ConciseSetUtils.OneFillExpander oneExp; - - ConciseSetUtils.WordExpander exp; - int nextIndex = 0; - int nextOffset = 0; - - private BitIterator() - { - litExp = ConciseSetUtils.newLiteralAndZeroFillExpander(); - oneExp = ConciseSetUtils.newOneFillExpander(); - - nextWord(); - } - - private BitIterator( - ConciseSetUtils.LiteralAndZeroFillExpander litExp, - ConciseSetUtils.OneFillExpander oneExp, - ConciseSetUtils.WordExpander exp, - int nextIndex, - int nextOffset - ) - { - this.litExp = litExp; - this.oneExp = oneExp; - this.exp = exp; - this.nextIndex = nextIndex; - this.nextOffset = nextOffset; - } - - @Override - public boolean hasNext() - { - while (!exp.hasNext()) { - if (nextIndex > lastWordIndex) { - return false; - } - nextWord(); - } - return true; - } - - @Override - public int next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return exp.next(); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - - @Override - public void skipAllBefore(int element) - { - while (true) { - exp.skipAllBefore(element); - if (exp.hasNext() || nextIndex > lastWordIndex) { - return; - } - nextWord(); - } - } - - @Override - public IntSet.IntIterator clone() - { - return new BitIterator( - (ConciseSetUtils.LiteralAndZeroFillExpander) litExp.clone(), - (ConciseSetUtils.OneFillExpander) oneExp.clone(), - exp.clone(), - nextIndex, - nextOffset - ); - } - - private void nextWord() - { - final int word = words.get(nextIndex++); - exp = ConciseSetUtils.isOneSequence(word) ? oneExp : litExp; - exp.reset(nextOffset, word, true); - - // prepare next offset - if (ConciseSetUtils.isLiteral(word)) { - nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; - } else { - nextOffset += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(word) + 1); - } - } - } - - public class WordIterator implements Iterator - { - private int startIndex; - private int wordsWalked; - private int currWord; - private int nextWord; - private int currRow; - - private volatile boolean hasNextWord = false; - - WordIterator() - { - startIndex = -1; - wordsWalked = 0; - currRow = -1; - } - - public void advanceTo(int endCount) - { - while (hasNext() && wordsWalked < endCount) { - next(); - } - if (wordsWalked <= endCount) { - return; - } - - nextWord = (currWord & 0xC1000000) | (wordsWalked - endCount - 1); - startIndex = endCount; - hasNextWord = true; - } - - @Override - public boolean hasNext() - { - if (isEmpty()) { - return false; - } - if (hasNextWord) { - return true; - } - return currRow < (words.capacity() - 1); - } - - @Override - public Integer next() - { - if (hasNextWord) { - currWord = nextWord; - hasNextWord = false; - return new Integer(currWord); - } - - currWord = words.get(++currRow); - if (ConciseSetUtils.isLiteral(currWord)) { - startIndex = wordsWalked++; - } else { - startIndex = wordsWalked; - wordsWalked += ConciseSetUtils.getSequenceNumWords(currWord); - } - - return new Integer(currWord); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java deleted file mode 100755 index 3f15daeb8289..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java +++ /dev/null @@ -1,662 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.intset; - - -import io.druid.extendedset.ExtendedSet; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; - -/** - * Very similar to {@link ExtendedSet} but for the primitive int - * type. - * - * @author Alessandro Colantonio - * @version $Id: IntSet.java 135 2011-01-04 15:54:48Z cocciasik $ - * @see ArraySet - * @see ConciseSet - * @see FastSet - * @see HashIntSet - */ -public interface IntSet extends Cloneable, Comparable -{ - /** - * Generates the intersection set - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #retainAll(IntSet) - */ - public IntSet intersection(IntSet other); - - /** - * Generates the union set - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #addAll(IntSet) - */ - public IntSet union(IntSet other); - - /** - * Generates the difference set - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #removeAll(IntSet) - */ - public IntSet difference(IntSet other); - - /** - * Generates the symmetric difference set - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #flip(int) - */ - public IntSet symmetricDifference(IntSet other); - - /** - * Generates the complement set. The returned set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @return the complement set - * - * @see IntSet#complement() - */ - public IntSet complemented(); - - /** - * Complements the current set. The modified set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @see IntSet#complemented() - */ - public void complement(); - - /** - * Returns true if the specified {@link IntSet} - * instance contains any elements that are also contained within this - * {@link IntSet} instance - * - * @param other {@link IntSet} to intersect with - * - * @return a boolean indicating whether this {@link IntSet} - * intersects the specified {@link IntSet}. - */ - public boolean containsAny(IntSet other); - - /** - * Returns true if the specified {@link IntSet} - * instance contains at least minElements elements that are - * also contained within this {@link IntSet} instance - * - * @param other {@link IntSet} instance to intersect with - * @param minElements minimum number of elements to be contained within this - * {@link IntSet} instance - * - * @return a boolean indicating whether this {@link IntSet} - * intersects the specified {@link IntSet}. - * - * @throws IllegalArgumentException if minElements < 1 - */ - public boolean containsAtLeast(IntSet other, int minElements); - - /** - * Computes the intersection set size. - *

- * This is faster than calling {@link #intersection(IntSet)} and - * then {@link #size()} - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the size - */ - public int intersectionSize(IntSet other); - - /** - * Computes the union set size. - *

- * This is faster than calling {@link #union(IntSet)} and then - * {@link #size()} - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the size - */ - public int unionSize(IntSet other); - - /** - * Computes the symmetric difference set size. - *

- * This is faster than calling {@link #symmetricDifference(IntSet)} - * and then {@link #size()} - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the size - */ - public int symmetricDifferenceSize(IntSet other); - - /** - * Computes the difference set size. - *

- * This is faster than calling {@link #difference(IntSet)} and then - * {@link #size()} - * - * @param other {@link IntSet} instance that represents the right - * operand - * - * @return the size - */ - public int differenceSize(IntSet other); - - /** - * Computes the complement set size. - *

- * This is faster than calling {@link #complemented()} and then - * {@link #size()} - * - * @return the size - */ - public int complementSize(); - - /** - * Generates an empty set - * - * @return the empty set - */ - public IntSet empty(); - - /** - * See the clone() of {@link Object} - * - * @return cloned object - */ - public IntSet clone(); - - /** - * Computes the compression factor of the equivalent bitmap representation - * (1 means not compressed, namely a memory footprint similar to - * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) - * - * @return the compression factor - */ - public double bitmapCompressionRatio(); - - /** - * Computes the compression factor of the equivalent integer collection (1 - * means not compressed, namely a memory footprint similar to - * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) - * - * @return the compression factor - */ - public double collectionCompressionRatio(); - - /** - * @return a {@link IntIterator} instance to iterate over the set - */ - public IntIterator iterator(); - - /** - * @return a {@link IntIterator} instance to iterate over the set in - * descending order - */ - public IntIterator descendingIterator(); - - /** - * Prints debug info about the given {@link IntSet} implementation - * - * @return a string that describes the internal representation of the - * instance - */ - public String debugInfo(); - - /** - * Adds to the set all the elements between first and - * last, both included. - * - * @param from first element - * @param to last element - */ - public void fill(int from, int to); - - /** - * Removes from the set all the elements between first and - * last, both included. - * - * @param from first element - * @param to last element - */ - public void clear(int from, int to); - - /** - * Adds the element if it not existing, or removes it if existing - * - * @param e element to flip - * - * @see #symmetricDifference(IntSet) - */ - public void flip(int e); - - /** - * Gets the ith element of the set - * - * @param i position of the element in the sorted set - * - * @return the ith element of the set - * - * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to - * {@link #size()} - */ - public int get(int i); - - /** - * Provides position of element within the set. - *

- * It returns -1 if the element does not exist within the set. - * - * @param e element of the set - * - * @return the element position - */ - public int indexOf(int e); - - /** - * Converts a given array into an instance of the current class. - * - * @param a array to use to generate the new instance - * - * @return the converted collection - */ - public IntSet convert(int... a); - - /** - * Converts a given collection into an instance of the current class. - * - * @param c array to use to generate the new instance - * - * @return the converted collection - */ - public IntSet convert(Collection c); - - /** - * Returns the first (lowest) element currently in this set. - * - * @return the first (lowest) element currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public int first(); - - /** - * Returns the last (highest) element currently in this set. - * - * @return the last (highest) element currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public int last(); - - /** - * @return the number of elements in this set (its cardinality) - */ - public int size(); - - /** - * @return true if this set contains no elements - */ - public boolean isEmpty(); - - /** - * Returns true if this set contains the specified element. - * - * @param i element whose presence in this set is to be tested - * - * @return true if this set contains the specified element - */ - public boolean contains(int i); - - /** - * Adds the specified element to this set if it is not already present. It - * ensures that sets never contain duplicate elements. - * - * @param i element to be added to this set - * - * @return true if this set did not already contain the specified - * element - * - * @throws IllegalArgumentException if some property of the specified element prevents it from - * being added to this set - */ - public boolean add(int i); - - /** - * Removes the specified element from this set if it is present. - * - * @param i object to be removed from this set, if present - * - * @return true if this set contained the specified element - * - * @throws UnsupportedOperationException if the remove operation is not supported by this set - */ - public boolean remove(int i); - - /** - * Returns true if this set contains all of the elements of the - * specified collection. - * - * @param c collection to be checked for containment in this set - * - * @return true if this set contains all of the elements of the - * specified collection - * - * @throws NullPointerException if the specified collection contains one or more null - * elements and this set does not permit null elements - * (optional), or if the specified collection is null - * @see #contains(int) - */ - public boolean containsAll(IntSet c); - - /** - * Adds all of the elements in the specified collection to this set if - * they're not already present. - * - * @param c collection containing elements to be added to this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if the specified collection contains one or more null - * elements and this set does not permit null elements, or if - * the specified collection is null - * @throws IllegalArgumentException if some property of an element of the specified collection - * prevents it from being added to this set - * @see #add(int) - */ - public boolean addAll(IntSet c); - - /** - * Retains only the elements in this set that are contained in the specified - * collection. In other words, removes from this set all of its elements - * that are not contained in the specified collection. - * - * @param c collection containing elements to be retained in this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if this set contains a null element and the specified - * collection does not permit null elements (optional), or if - * the specified collection is null - * @see #remove(int) - */ - public boolean retainAll(IntSet c); - - /** - * Removes from this set all of its elements that are contained in the - * specified collection. - * - * @param c collection containing elements to be removed from this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if this set contains a null element and the specified - * collection does not permit null elements (optional), or if - * the specified collection is null - * @see #remove(int) - * @see #contains(int) - */ - public boolean removeAll(IntSet c); - - /** - * Removes all of the elements from this set. The set will be empty after - * this call returns. - * - * @throws UnsupportedOperationException if the clear method is not supported by this set - */ - public void clear(); - - /** - * @return an array containing all the elements in this set, in the same - * order. - */ - public int[] toArray(); - - /** - * Returns an array containing all of the elements in this set. - *

- * If this set fits in the specified array with room to spare (i.e., the - * array has more elements than this set), the element in the array - * immediately following the end of the set are left unchanged. - * - * @param a the array into which the elements of this set are to be - * stored. - * - * @return the array containing all the elements in this set - * - * @throws NullPointerException if the specified array is null - * @throws IllegalArgumentException if this set does not fit in the specified array - */ - public int[] toArray(int[] a); - - /** - * Computes the power-set of the current set. - *

- * It is a particular implementation of the algorithm Apriori (see: - * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - * Association Rules in Large Databases, in Proceedings of the - * 20th International Conference on Very Large Data Bases, - * p.487-499, 1994). The returned power-set does not contain the - * empty set. - *

- * The subsets composing the powerset are returned in a list that is sorted - * according to the lexicographical order provided by the integer set. - * - * @return the power-set - * - * @see #powerSet(int, int) - * @see #powerSetSize() - */ - public List powerSet(); - - /** - * Computes a subset of the power-set of the current set, composed by those - * subsets that have cardinality between min and - * max. - *

- * It is a particular implementation of the algorithm Apriori (see: - * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - * Association Rules in Large Databases, in Proceedings of the - * 20th International Conference on Very Large Data Bases, - * p.487-499, 1994). The power-set does not contains the empty set. - *

- * The subsets composing the powerset are returned in a list that is sorted - * according to the lexicographical order provided by the integer set. - * - * @param min minimum subset size (greater than zero) - * @param max maximum subset size - * - * @return the power-set - * - * @see #powerSet() - * @see #powerSetSize(int, int) - */ - public List powerSet(int min, int max); - - /** - * Computes the power-set size of the current set. - *

- * The power-set does not contains the empty set. - * - * @return the power-set size - * - * @see #powerSet() - */ - public int powerSetSize(); - - /** - * Computes the power-set size of the current set, composed by those subsets - * that have cardinality between min and max. - *

- * The returned power-set does not contain the empty set. - * - * @param min minimum subset size (greater than zero) - * @param max maximum subset size - * - * @return the power-set size - * - * @see #powerSet(int, int) - */ - public int powerSetSize(int min, int max); - - /** - * Computes the Jaccard similarity coefficient between this set and the - * given set. - *

- * The coefficient is defined as - * |A intersection B| / |A union B|. - * - * @param other the other set - * - * @return the Jaccard similarity coefficient - * - * @see #jaccardDistance(IntSet) - */ - public double jaccardSimilarity(IntSet other); - - /** - * Computes the Jaccard distance between this set and the given set. - *

- * The coefficient is defined as - * 1 - {@link #jaccardSimilarity(IntSet)}. - * - * @param other the other set - * - * @return the Jaccard distance - * - * @see #jaccardSimilarity(IntSet) - */ - public double jaccardDistance(IntSet other); - - /** - * Computes the weighted version of the Jaccard similarity coefficient - * between this set and the given set. - *

- * The coefficient is defined as - * sum of min(A_i, B_i) / sum of max(A_i, B_i). - * - * @param other the other set - * - * @return the weighted Jaccard similarity coefficient - * - * @see #weightedJaccardDistance(IntSet) - */ - public double weightedJaccardSimilarity(IntSet other); - - /** - * Computes the weighted version of the Jaccard distance between this set - * and the given set. - *

- * The coefficient is defined as 1 - - * {@link #weightedJaccardSimilarity(IntSet)}. - * - * @param other the other set - * - * @return the weighted Jaccard distance - * - * @see #weightedJaccardSimilarity(IntSet) - */ - public double weightedJaccardDistance(IntSet other); - - /** - * An {@link Iterator}-like interface that allows to "skip" some elements of - * the set - */ - public interface IntIterator - { - /** - * @return true if the iterator has more elements. - */ - boolean hasNext(); - - /** - * @return the next element in the iteration. - * - * @throws NoSuchElementException iteration has no more elements. - */ - int next(); - - /** - * Removes from the underlying collection the last element returned by - * the iterator (optional operation). This method can be called only - * once per call to next. The behavior of an iterator is - * unspecified if the underlying collection is modified while the - * iteration is in progress in any way other than by calling this - * method. - * - * @throws UnsupportedOperationException if the remove operation is not supported by - * this Iterator. - * @throws IllegalStateException if the next method has not yet been called, - * or the remove method has already been called - * after the last call to the next method. - */ - void remove(); - - /** - * Skips all the elements before the the specified element, so that - * {@link #next()} gives the given element or, if it does not exist, the - * element immediately after according to the sorting provided by this - * set. - *

- * If element is less than the next element, it does - * nothing - * - * @param element first element to not skip - */ - public void skipAllBefore(int element); - - /** - * Clone the iterator - * - * @return a clone of the IntIterator - */ - public IntIterator clone(); - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java deleted file mode 100755 index 3026fdf25907..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.utilities; - -import java.io.IOException; -import java.io.ObjectInputStream; -import java.util.AbstractMap; -import java.util.AbstractSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; - -/** - * A {@link Map} backed by an array, where keys are the indices of the array, - * and values are the elements of the array. - *

- * Modifications to the map (i.e., through {@link #put(Integer, Object)} and - * {@link java.util.Map.Entry#setValue(Object)}) are reflected to the original array. - * However, the map has a fixed length, that is the length of the array. - * - * @param the type of elements represented by columns - * - * @author Alessandro Colantonio - * @version $Id$ - */ -public class ArrayMap extends AbstractMap implements java.io.Serializable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = -578029467093308343L; - - /** - * array backed by this map - */ - private final T[] array; - /** - * first index of the map - */ - private final int indexShift; - /** - * {@link Set} instance to iterate over #array - */ - private transient Set> entrySet; - - /** - * Initializes the map - * - * @param array array to manipulate - * @param indexShift first index of the map - */ - ArrayMap(T[] array, int indexShift) - { - this.array = array; - this.indexShift = indexShift; - entrySet = null; - } - - /** - * Initializes the map - * - * @param array array to manipulate - */ - ArrayMap(T[] array) - { - this(array, 0); - } - - /** - * Test - * - * @param args - */ - public static void main(String[] args) - { - ArrayMap am = new ArrayMap(new String[]{"Three", "Four", "Five"}, 3); - System.out.println(am); - am.put(5, "FIVE"); - System.out.println(am); - System.out.println(am.get(5)); - System.out.println(am.containsKey(2)); - System.out.println(am.containsKey(3)); - System.out.println(am.containsValue("THREE")); - System.out.println(am.keySet()); - System.out.println(am.values()); - } - - /** - * {@inheritDoc} - */ - @Override - public Set> entrySet() - { - if (entrySet == null) { - // create an entry for each element - final List entries = new ArrayList(array.length); - for (int i = 0; i < array.length; i++) { - entries.add(new SimpleEntry(i)); - } - - // create the Set instance - entrySet = new AbstractSet>() - { - @Override - public Iterator> iterator() - { - return new Iterator>() - { - int curr = 0; - - @Override - public boolean hasNext() - { - return curr < entries.size(); - } - - @Override - public Entry next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - return entries.get(curr++); - } - - @Override - public void remove() - { - throw new IllegalArgumentException(); - } - }; - } - - @Override - public int size() - { - return entries.size(); - } - }; - } - return entrySet; - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return array.length; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsKey(Object key) - { - int index = (Integer) key - indexShift; - return (index >= 0) && (index < array.length); - } - - /** - * {@inheritDoc} - */ - @Override - public T get(Object key) - { - return array[(Integer) key - indexShift]; - } - - /** - * {@inheritDoc} - */ - @Override - public T put(Integer key, T value) - { - int actualIndex = key - indexShift; - T old = array[actualIndex]; - array[actualIndex] = value; - return old; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return Arrays.hashCode(array); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!super.equals(obj)) { - return false; - } - if (!(obj instanceof ArrayMap)) { - return false; - } - return Arrays.equals(array, ((ArrayMap) obj).array); - } - - /** - * Reconstruct the instance from a stream - */ - private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException - { - s.defaultReadObject(); - entrySet = null; - } - - /** - * Entry of the map - */ - private class SimpleEntry implements Entry - { - /** - * index of {@link ArrayMap#array} - */ - final int actualIndex; - - /** - * Creates an entry - * - * @param index index of {@link ArrayMap#array} - */ - private SimpleEntry(int index) - { - this.actualIndex = index; - } - - /** - * {@inheritDoc} - */ - @Override - public Integer getKey() - { - return actualIndex + indexShift; - } - - /** - * {@inheritDoc} - */ - @Override - public T getValue() - { - return array[actualIndex]; - } - - /** - * {@inheritDoc} - */ - @Override - public T setValue(T value) - { - T old = array[actualIndex]; - array[actualIndex] = value; - return old; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return (actualIndex + indexShift) + "=" + array[actualIndex]; - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java b/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java deleted file mode 100755 index 306a2e1e510c..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java +++ /dev/null @@ -1,350 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.utilities; - -import java.util.Random; - -/** - * Population count (a.k.a. Hamming distance) of a bitmap represented by an - * array of int. - *

- * Derived from http - * ://dalkescientific.com/writings/diary/popcnt.c - * - * @author Alessandro Colantonio - * @version $Id: BitCount.java 157 2011-11-14 14:25:15Z cocciasik $ - */ -public class BitCount -{ - /** - * Population count - *

- * It counts a single word - * - * @param word word to count - * - * @return population count - */ - public static int count(int word) - { - word -= ((word >>> 1) & 0x55555555); - word = (word & 0x33333333) + ((word >>> 2) & 0x33333333); - word = (word + (word >>> 4)) & 0x0F0F0F0F; - return (word * 0x01010101) >>> 24; - } - - /** - * Population count - * - * @param buffer array of int - * - * @return population count - */ - public static int count(int[] buffer) - { - return count(buffer, buffer.length); - } - - /** - * Population count - *

- * It counts 24 words at a time, then 3 at a time, then 1 at a time - * - * @param buffer array of int - * @param n number of elements of buffer to count - * - * @return population count - */ - public static int count(int[] buffer, int n) - { - final int n1 = n - n % 24; - final int n2 = n - n % 3; - - int cnt = 0; - int i; - for (i = 0; i < n1; i += 24) { - cnt += merging3(buffer, i); - } - for (; i < n2; i += 3) { - cnt += merging2(buffer, i); - } - cnt += popcount_fbsd2(buffer, i, n); - return cnt; - } - - // used by count() - private static int merging3(int[] buffer, int x) - { - int cnt1; - int cnt2; - int cnt = 0; - for (int i = x; i < x + 24; i += 3) { - cnt1 = buffer[i]; - cnt2 = buffer[i + 1]; - final int w = buffer[i + 2]; - cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); - cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); - cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); - cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); - cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); - } - cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); - cnt += cnt >>> 16; - return cnt & 0x00000FFFF; - } - - // used by count() - private static int merging2(int[] buffer, int x) - { - int cnt1 = buffer[x]; - int cnt2 = buffer[x + 1]; - final int w = buffer[x + 2]; - cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); - cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); - cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); - cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); - cnt1 += cnt2; - cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); - cnt1 += cnt1 >>> 8; - cnt1 += cnt1 >>> 16; - return cnt1 & 0x000000FF; - } - - // used by count() - private static int popcount_fbsd2(int[] data, int x, int n) - { - int cnt = 0; - for (; x < n; x++) { - cnt += count(data[x]); - } - return cnt; - } - - /** - * Population count, skipping words at even positions - * - * @param buffer array of int - * - * @return population count - */ - public static int count_2(int[] buffer) - { - return count_2(buffer, buffer.length); - } - - /** - * Population count, skipping words at even positions - *

- * It counts 24 words at a time, then 3 at a time, then 1 at a time - * - * @param buffer array of int - * @param n number of elements of buffer to count - * - * @return population count - */ - public static int count_2(int[] buffer, int n) - { - final int n1 = n - n % 48; - final int n2 = n - n % 6; - - int cnt = 0; - int i; - for (i = 0; i < n1; i += 48) { - cnt += merging3_2(buffer, i); - } - for (; i < n2; i += 6) { - cnt += merging2_2(buffer, i); - } - cnt += popcount_fbsd2_2(buffer, i, n); - return cnt; - } - - // used by count_2() - private static int merging3_2(int[] buffer, int x) - { - int cnt1; - int cnt2; - int cnt = 0; - for (int i = x; i < x + 48; i += 6) { - cnt1 = buffer[i + 1]; - cnt2 = buffer[i + 3]; - final int w = buffer[i + 5]; - cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); - cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); - cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); - cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); - cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); - } - cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); - cnt += cnt >>> 16; - return cnt & 0x00000FFFF; - } - - // used by count_2() - private static int merging2_2(int[] buffer, int x) - { - int cnt1 = buffer[x + 1]; - int cnt2 = buffer[x + 3]; - final int w = buffer[x + 5]; - cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); - cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); - cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); - cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); - cnt1 += cnt2; - cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); - cnt1 += cnt1 >>> 8; - cnt1 += cnt1 >>> 16; - return cnt1 & 0x000000FF; - } - - // used by count_2() - private static int popcount_fbsd2_2(int[] data, int x, int n) - { - int cnt = 0; - for (x++; x < n; x += 2) { - cnt += count(data[x]); - } - return cnt; - } - - /** - * Test - * - * @param args - */ - public static void main(String[] args) - { - final int trials = 10000; - final int maxLength = 10000; - - Random rnd = new Random(); - final int seed = rnd.nextInt(); - - System.out.print("Test correctness... "); - rnd = new Random(seed); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength)]; - for (int j = 0; j < x.length; j++) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - - int size1 = 0; - for (int j = 0; j < x.length; j++) { - size1 += count(x[j]); - } - int size2 = count(x); - - if (size1 != size2) { - System.out.println("i = " + i); - System.out.println("ERRORE!"); - System.out.println(size1 + ", " + size2); - for (int j = 0; j < x.length; j++) { - System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); - } - return; - } - } - System.out.println("done!"); - - System.out.print("Test correctness II... "); - rnd = new Random(seed); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength << 1)]; - for (int j = 1; j < x.length; j += 2) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - - int size1 = 0; - for (int j = 1; j < x.length; j += 2) { - size1 += count(x[j]); - } - int size2 = count_2(x); - - if (size1 != size2) { - System.out.println("i = " + i); - System.out.println("ERRORE!"); - System.out.println(size1 + ", " + size2); - for (int j = 1; j < x.length; j += 2) { - System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); - } - return; - } - } - System.out.println("done!"); - - System.out.print("Test time count(): "); - rnd = new Random(seed); - long t = System.currentTimeMillis(); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength)]; - for (int j = 0; j < x.length; j++) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - - @SuppressWarnings("unused") - int size = 0; - for (int j = 0; j < x.length; j++) { - size += count(x[j]); - } - } - System.out.println(System.currentTimeMillis() - t); - - System.out.print("Test time BitCount.count(): "); - rnd = new Random(seed); - t = System.currentTimeMillis(); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength)]; - for (int j = 0; j < x.length; j++) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - count(x); - } - System.out.println(System.currentTimeMillis() - t); - - System.out.print("Test II time count(): "); - rnd = new Random(seed); - t = System.currentTimeMillis(); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength << 1)]; - for (int j = 1; j < x.length; j += 2) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - - @SuppressWarnings("unused") - int size = 0; - for (int j = 1; j < x.length; j += 2) { - size += count(x[j]); - } - } - System.out.println(System.currentTimeMillis() - t); - - System.out.print("Test II time BitCount.count(): "); - rnd = new Random(seed); - t = System.currentTimeMillis(); - for (int i = 0; i < trials; i++) { - int[] x = new int[rnd.nextInt(maxLength << 1)]; - for (int j = 1; j < x.length; j += 2) { - x[j] = rnd.nextInt(Integer.MAX_VALUE); - } - count_2(x); - } - System.out.println(System.currentTimeMillis() - t); - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java deleted file mode 100755 index ec38ce38b99b..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java +++ /dev/null @@ -1,317 +0,0 @@ -package io.druid.extendedset.utilities; - -import io.druid.extendedset.ExtendedSet; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.wrappers.IntegerSet; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; - -/** - * This class implements a {@link Map} from a key of type K to a - * collection contains instances of I. - * - * @param key type - * @param item type - * @param {@link Collection} subclass used to collect items - * - * @author Alessandro Colantonio - * @version $Id: CollectionMap.java 152 2011-03-30 11:18:18Z cocciasik $ - */ -public class CollectionMap> extends LinkedHashMap -{ - private static final long serialVersionUID = -2613391212228461025L; - - /** - * empty collection - */ - private final C emptySet; - - /** - * Initializes the map by providing an instance of the empty collection - * - * @param emptySet the empty collection - */ - public CollectionMap(C emptySet) - { - this.emptySet = emptySet; - } - - /** - * Generates a new {@link CollectionMap} instance. It is an alternative to - * the constructor {@link #CollectionMap(Collection)} that reduces the code - * to write. - * - * @param key type - * @param item type - * @param {@link Collection} subclass used to collect items - * @param empty subset type - * @param emptySet the empty collection - * - * @return the new instance of {@link CollectionMap} - */ - public static , EX extends CX> - CollectionMap newCollectionMap(EX emptySet) - { - return new CollectionMap(emptySet); - } - - /** - * Test procedure - *

- * Expected output: - *

-   * {}
-   * {A=[1]}
-   * {A=[1, 2]}
-   * {A=[1, 2], B=[3]}
-   * {A=[1, 2], B=[3, 4, 5, 6]}
-   * true
-   * true
-   * false
-   * {A=[1], B=[3, 4, 5, 6]}
-   * {A=[1], B=[3, 4, 5, 6]}
-   * {A=[1], B=[6]}
-   * 
- * - * @param args - */ - public static void main(String[] args) - { - CollectionMap map = newCollectionMap(new IntegerSet(new ConciseSet())); - System.out.println(map); - - map.putItem("A", 1); - System.out.println(map); - - map.putItem("A", 2); - System.out.println(map); - - map.putItem("B", 3); - System.out.println(map); - - map.putAllItems("B", Arrays.asList(4, 5, 6)); - System.out.println(map); - - System.out.println(map.containsItem(1)); - System.out.println(map.containsItem(6)); - System.out.println(map.containsItem(7)); - - map.removeItem("A", 2); - System.out.println(map); - - map.removeItem("A", 3); - System.out.println(map); - - map.removeAllItems("B", Arrays.asList(1, 2, 3, 4, 5)); - System.out.println(map); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public CollectionMap clone() - { - // result - CollectionMap cloned = new CollectionMap(emptySet); - - // clone all the entries - cloned.putAll(this); - - // clone all the values - if (emptySet instanceof Cloneable) { - for (Entry e : cloned.entrySet()) { - try { - e.setValue((C) e.getValue().getClass().getMethod("clone").invoke(e.getValue())); - } - catch (Exception ex) { - throw new RuntimeException(ex); - } - } - } else { - for (Entry e : cloned.entrySet()) { - C copy = cloneEmptySet(); - copy.addAll(e.getValue()); - e.setValue(copy); - } - } - return cloned; - } - - /** - * Generates an empty {@link CollectionMap} instance with the same - * collection type for values - * - * @return the empty {@link CollectionMap} instance - */ - public CollectionMap empty() - { - return new CollectionMap(emptySet); - } - - /** - * Populates the current instance with the data from another map. In - * particular, it creates the list of keys associated to each value. - * - * @param map the input map - */ - public void mapValueToKeys(Map map) - { - for (Entry e : map.entrySet()) { - putItem(e.getValue(), e.getKey()); - } - } - - /** - * Generates a clone of the empty set - * - * @return a clone of the empty set - */ - @SuppressWarnings("unchecked") - private C cloneEmptySet() - { - try { - if (emptySet instanceof Cloneable) { - return (C) emptySet.getClass().getMethod("clone").invoke(emptySet); - } - return (C) emptySet.getClass().newInstance(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Checks if there are some collections that contain the given item - * - * @param item item to check - * - * @return true if the item exists within some collections - */ - public boolean containsItem(I item) - { - for (Entry e : entrySet()) { - if (e.getValue().contains(item)) { - return true; - } - } - return false; - } - - /** - * Adds an item to the collection corresponding to the given key - * - * @param key the key for the identification of the collection - * @param item item to add - * - * @return the updated collection of items for the given key - */ - public C putItem(K key, I item) - { - C items = get(key); - if (items == null) { - put(key, items = cloneEmptySet()); - } - items.add(item); - return items; - } - - /** - * Adds a collection of items to the collection corresponding to the given key - * - * @param key the key for the identification of the collection - * @param c items to add - * - * @return the updated collection of items for the given key - */ - public C putAllItems(K key, Collection c) - { - C items = get(key); - if (c == null) { - put(key, items = cloneEmptySet()); - } - items.addAll(c); - return items; - } - - /** - * Removes the item from the collection corresponding to the given key - * - * @param key the key for the identification of the collection - * @param item item to remove - * - * @return the updated collection of items for the given key - */ - public C removeItem(K key, I item) - { - C items = get(key); - if (items == null) { - return null; - } - items.remove(item); - if (items.isEmpty()) { - remove(key); - } - return items; - } - - /** - * Removes a collection of items from the collection corresponding to the given key - * - * @param key the key for the identification of the collection - * @param c items to remove - * - * @return the updated collection of items for the given key - */ - public C removeAllItems(K key, Collection c) - { - C items = get(key); - if (items == null) { - return null; - } - items.removeAll(c); - if (items.isEmpty()) { - remove(key); - } - return items; - } - - /** - * Makes all collections read-only - */ - @SuppressWarnings("unchecked") - public void makeAllCollectionsUnmodifiable() - { - if (emptySet instanceof ExtendedSet) { - for (Entry e : entrySet()) { - e.setValue((C) ((ExtendedSet) e.getValue()).unmodifiable()); - } - } else if (emptySet instanceof List) { - for (Entry e : entrySet()) { - e.setValue((C) (Collections.unmodifiableList((List) e.getValue()))); - } - } else if (emptySet instanceof Set) { - for (Entry e : entrySet()) { - e.setValue((C) (Collections.unmodifiableSet((Set) e.getValue()))); - } - } else if (emptySet instanceof SortedSet) { - for (Entry e : entrySet()) { - e.setValue((C) (Collections.unmodifiableSortedSet((SortedSet) e.getValue()))); - } - } else { - for (Entry e : entrySet()) { - e.setValue((C) (Collections.unmodifiableCollection(e.getValue()))); - } - } - - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java deleted file mode 100755 index 1aaa06bdbe1c..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.utilities; - -/** - * Hash functions for integers and integer arrays. - * - * @author Alessandro Colantonio - * @version $Id: IntHashCode.java 127 2010-12-21 20:22:12Z cocciasik $ - */ -public class IntHashCode -{ - /** - * Computes a hashcode for an integer - *

- * Inspired by Thomas Wang's function, described at http://www.concentric.net/~ttwang/tech/inthash.htm - * - * @param key the given integer - * - * @return the hashcode - */ - public static int hashCode(int key) - { - key = ~key + (key << 15); - key ^= key >>> 12; - key += key << 2; - key ^= key >>> 4; - key *= 2057; - key ^= key >>> 16; - return key; - } - - /** - * Computes the hashcode of an array of integers - * - * @param keys the given integer array - * - * @return the hashcode - */ - public static int hashCode(int[] keys) - { - return hashCode(keys, keys.length, 0); - } - - /** - * Computes the hashcode of an array of integers - *

- * It is based on MurmurHash3 Algorithm, described at http://sites.google.com/site/murmurhash - * - * @param keys the given integer array - * @param len number of elements to include, that is - * len <= keys.length - * @param seed initial seed - * - * @return the hashcode - */ - public static int hashCode(int[] keys, int len, int seed) - { - int h = 0x971e137b ^ seed; - int c1 = 0x95543787; - int c2 = 0x2ad7eb25; - - for (int i = 0; i < len; i++) { - int k = keys[i]; - k *= c1; - k = (k << 11) | (k >>> 21); // rotl k, 11 - k *= c2; - h ^= k; - - h = (h << 2) - h + 0x52dce729; - c1 = (c1 << 2) + c1 + 0x7b7d159c; - c2 = (c2 << 2) + c2 + 0x6bce6396; - } - - h ^= len; - h ^= h >>> 16; - h *= 0x85ebca6b; - h ^= h >>> 13; - h *= 0xc2b2ae35; - h ^= h >>> 16; - return h; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java deleted file mode 100755 index 51474c9fccbf..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java +++ /dev/null @@ -1,115 +0,0 @@ -package io.druid.extendedset.utilities; - -import java.nio.IntBuffer; -import java.util.ArrayList; - -/** - */ -public class IntList -{ - private final ArrayList baseLists = new ArrayList(); - - private final int allocateSize; - - private int maxIndex; - - public IntList() - { - this(1000); - } - - public IntList(final int allocateSize) - { - this.allocateSize = allocateSize; - - maxIndex = -1; - } - - public int length() - { - return maxIndex + 1; - } - - public boolean isEmpty() - { - return (length() == 0); - } - - public void add(int value) - { - set(length(), value); - } - - public void set(int index, int value) - { - int subListIndex = index / allocateSize; - - if (subListIndex >= baseLists.size()) { - for (int i = baseLists.size(); i <= subListIndex; ++i) { - baseLists.add(null); - } - } - - int[] baseList = baseLists.get(subListIndex); - - if (baseList == null) { - baseList = new int[allocateSize]; - baseLists.set(subListIndex, baseList); - } - - baseList[index % allocateSize] = value; - - if (index > maxIndex) { - maxIndex = index; - } - } - - public int get(int index) - { - if (index > maxIndex) { - throw new ArrayIndexOutOfBoundsException(index); - } - - int subListIndex = index / allocateSize; - int[] baseList = baseLists.get(subListIndex); - - if (baseList == null) { - return 0; - } - - return baseList[index % allocateSize]; - } - - public int baseListCount() - { - return baseLists.size(); - } - - public IntBuffer getBaseList(int index) - { - final int[] array = baseLists.get(index); - if (array == null) { - return null; - } - - final IntBuffer retVal = IntBuffer.wrap(array); - - if (index + 1 == baseListCount()) { - retVal.limit(maxIndex - (index * allocateSize)); - } - - return retVal.asReadOnlyBuffer(); - } - - public int[] toArray() - { - int[] retVal = new int[length()]; - int currIndex = 0; - for (int[] arr : baseLists) { - int min = Math.min(length() - currIndex, arr.length); - System.arraycopy(arr, 0, retVal, currIndex, min); - currIndex += min; - } - return retVal; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java deleted file mode 100755 index 5d40f8299b52..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java +++ /dev/null @@ -1,689 +0,0 @@ -package io.druid.extendedset.utilities; - -import io.druid.extendedset.intset.IntSet; - -import java.util.Collection; -import java.util.Formatter; -import java.util.List; - -/** - * A wrapper class for classes that implement the {@link IntSet} interface to count method calls - * - * @author Alessandro Colantonio - * @version $Id: IntSetStatistics.java 153 2011-05-30 16:39:57Z cocciasik $ - */ -public class IntSetStatistics implements IntSet -{ - /** - * @uml.property name="unionCount" - */ - private static long unionCount = 0; - - - /* - * Monitored characteristics - */ - /** - * @uml.property name="intersectionCount" - */ - private static long intersectionCount = 0; - /** - * @uml.property name="differenceCount" - */ - private static long differenceCount = 0; - /** - * @uml.property name="symmetricDifferenceCount" - */ - private static long symmetricDifferenceCount = 0; - /** - * @uml.property name="complementCount" - */ - private static long complementCount = 0; - /** - * @uml.property name="unionSizeCount" - */ - private static long unionSizeCount = 0; - /** - * @uml.property name="intersectionSizeCount" - */ - private static long intersectionSizeCount = 0; - /** - * @uml.property name="differenceSizeCount" - */ - private static long differenceSizeCount = 0; - /** - * @uml.property name="symmetricDifferenceSizeCount" - */ - private static long symmetricDifferenceSizeCount = 0; - /** - * @uml.property name="complementSizeCount" - */ - private static long complementSizeCount = 0; - /** - * @uml.property name="equalsCount" - */ - private static long equalsCount = 0; - /** - * @uml.property name="hashCodeCount" - */ - private static long hashCodeCount = 0; - /** - * @uml.property name="containsAllCount" - */ - private static long containsAllCount = 0; - /** - * @uml.property name="containsAnyCount" - */ - private static long containsAnyCount = 0; - /** - * @uml.property name="containsAtLeastCount" - */ - private static long containsAtLeastCount = 0; - /** - * instance to monitor - * - * @uml.property name="container" - * @uml.associationEnd - */ - private final IntSet container; - - - /* - * Statistics getters - */ - - /** - * Wraps an {@link IntSet} instance with an {@link IntSetStatistics} - * instance - * - * @param container {@link IntSet} to wrap - */ - public IntSetStatistics(IntSet container) - { - this.container = extractContainer(container); - } - - /** - * @return number of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) - * - * @uml.property name="unionCount" - */ - public static long getUnionCount() {return unionCount;} - - /** - * @return number of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) - * - * @uml.property name="intersectionCount" - */ - public static long getIntersectionCount() {return intersectionCount;} - - /** - * @return number of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) - * - * @uml.property name="differenceCount" - */ - public static long getDifferenceCount() {return differenceCount;} - - /** - * @return number of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) - * - * @uml.property name="symmetricDifferenceCount" - */ - public static long getSymmetricDifferenceCount() {return symmetricDifferenceCount;} - - /** - * @return number of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) - * - * @uml.property name="complementCount" - */ - public static long getComplementCount() {return complementCount;} - - /** - * @return cardinality of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) - * - * @uml.property name="unionSizeCount" - */ - public static long getUnionSizeCount() {return unionSizeCount;} - - /** - * @return cardinality of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) - * - * @uml.property name="intersectionSizeCount" - */ - public static long getIntersectionSizeCount() {return intersectionSizeCount;} - - /** - * @return cardinality of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) - * - * @uml.property name="differenceSizeCount" - */ - public static long getDifferenceSizeCount() {return differenceSizeCount;} - - /** - * @return cardinality of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) - * - * @uml.property name="symmetricDifferenceSizeCount" - */ - public static long getSymmetricDifferenceSizeCount() {return symmetricDifferenceSizeCount;} - - /** - * @return cardinality of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) - * - * @uml.property name="complementSizeCount" - */ - public static long getComplementSizeCount() {return complementSizeCount;} - - /** - * @return number of equality check operations (i.e., {@link #equals(Object)} ) - * - * @uml.property name="equalsCount" - */ - public static long getEqualsCount() {return equalsCount;} - - /** - * @return number of hash code computations (i.e., {@link #hashCode()} ) - * - * @uml.property name="hashCodeCount" - */ - public static long getHashCodeCount() {return hashCodeCount;} - - /** - * @return number of {@link #containsAll(IntSet)} calls - * - * @uml.property name="containsAllCount" - */ - public static long getContainsAllCount() {return containsAllCount;} - - /** - * @return number of {@link #containsAny(IntSet)} calls - * - * @uml.property name="containsAnyCount" - */ - public static long getContainsAnyCount() {return containsAnyCount;} - - /** - * @return number of {@link #containsAtLeast(IntSet, int)} calls - * - * @uml.property name="containsAtLeastCount" - */ - public static long getContainsAtLeastCount() {return containsAtLeastCount;} - - - /* - * Other statistical methods - */ - - /** - * @return the sum of the cardinality of set operations - */ - public static long getSizeCheckCount() - { - return getIntersectionSizeCount() - + - getUnionSizeCount() - + getDifferenceSizeCount() - + getSymmetricDifferenceSizeCount() - + getComplementSizeCount(); - } - - /** - * Resets all counters - */ - public static void resetCounters() - { - unionCount = intersectionCount = differenceCount = symmetricDifferenceCount = complementCount = - unionSizeCount = intersectionSizeCount = differenceSizeCount = symmetricDifferenceSizeCount = complementSizeCount = - equalsCount = hashCodeCount = containsAllCount = containsAnyCount = containsAtLeastCount = 0; - } - - /** - * @return the summary information string - */ - public static String summary() - { - final StringBuilder s = new StringBuilder(); - final Formatter f = new Formatter(s); - - f.format("unionCount: %d\n", Long.valueOf(unionCount)); - f.format("intersectionCount: %d\n", Long.valueOf(intersectionCount)); - f.format("differenceCount: %d\n", Long.valueOf(differenceCount)); - f.format("symmetricDifferenceCount: %d\n", Long.valueOf(symmetricDifferenceCount)); - f.format("complementCount: %d\n", Long.valueOf(complementCount)); - f.format("unionSizeCount: %d\n", Long.valueOf(unionSizeCount)); - f.format("intersectionSizeCount: %d\n", Long.valueOf(intersectionSizeCount)); - f.format("differenceSizeCount: %d\n", Long.valueOf(differenceSizeCount)); - f.format("symmetricDifferenceSizeCount: %d\n", Long.valueOf(symmetricDifferenceSizeCount)); - f.format("complementSizeCount: %d\n", Long.valueOf(complementSizeCount)); - f.format("equalsCount: %d\n", Long.valueOf(equalsCount)); - f.format("hashCodeCount: %d\n", Long.valueOf(hashCodeCount)); - f.format("containsAllCount: %d\n", Long.valueOf(containsAllCount)); - f.format("containsAnyCount: %d\n", Long.valueOf(containsAnyCount)); - f.format("containsAtLeastCount: %d\n", Long.valueOf(containsAtLeastCount)); - - return s.toString(); - } - - /** - * Removes the {@link IntSetStatistics} wrapper - * - * @param c - * - * @return the contained {@link IntSet} instance - */ - public static IntSet extractContainer(IntSet c) - { - if (c instanceof IntSetStatistics) { - return extractContainer(((IntSetStatistics) c).container); - } - return c; - } - - /* - * MONITORED METHODS - */ - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(IntSet c) - { - unionCount++; - return container.addAll(extractContainer(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet union(IntSet other) - { - unionCount++; - return new IntSetStatistics(container.union(extractContainer(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(IntSet c) - { - intersectionCount++; - return container.retainAll(extractContainer(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet intersection(IntSet other) - { - intersectionCount++; - return new IntSetStatistics(container.intersection(extractContainer(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(IntSet c) - { - differenceCount++; - return container.removeAll(extractContainer(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet difference(IntSet other) - { - differenceCount++; - return new IntSetStatistics(container.difference(extractContainer(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet symmetricDifference(IntSet other) - { - symmetricDifferenceCount++; - return container.symmetricDifference(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - complementCount++; - container.complement(); - } - - /** - * {@inheritDoc} - */ - @Override - public IntSet complemented() - { - complementCount++; - return new IntSetStatistics(container.complemented()); - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(IntSet other) - { - unionSizeCount++; - return container.unionSize(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(IntSet other) - { - intersectionSizeCount++; - return container.intersectionSize(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(IntSet other) - { - differenceSizeCount++; - return container.differenceSize(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(IntSet other) - { - symmetricDifferenceSizeCount++; - return container.symmetricDifferenceSize(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - complementSizeCount++; - return container.complementSize(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(IntSet c) - { - containsAllCount++; - return container.containsAll(extractContainer(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(IntSet other) - { - containsAnyCount++; - return container.containsAny(extractContainer(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(IntSet other, int minElements) - { - containsAtLeastCount++; - return container.containsAtLeast(extractContainer(other), minElements); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - hashCodeCount++; - return container.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - equalsCount++; - return obj != null - && ((obj instanceof IntSetStatistics) - ? container.equals(extractContainer((IntSetStatistics) obj)) - : container.equals(obj)); - } - - /* - * SIMPLE REDIRECTION - */ - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() {return container.bitmapCompressionRatio();} - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() {return container.collectionCompressionRatio();} - - /** - * {@inheritDoc} - */ - @Override - public void clear(int from, int to) {container.clear(from, to);} - - /** - * {@inheritDoc} - */ - @Override - public void fill(int from, int to) {container.fill(from, to);} - - /** - * {@inheritDoc} - */ - @Override - public void clear() {container.clear();} - - /** - * {@inheritDoc} - */ - @Override - public boolean add(int i) {return container.add(i);} - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(int i) {return container.remove(i);} - - /** - * {@inheritDoc} - */ - @Override - public void flip(int e) {container.flip(e);} - - /** - * {@inheritDoc} - */ - @Override - public int get(int i) {return container.get(i);} - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(int e) {return container.indexOf(e);} - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(int i) {return container.contains(i);} - - /** - * {@inheritDoc} - */ - @Override - public int first() {return container.first();} - - /** - * {@inheritDoc} - */ - @Override - public int last() {return container.last();} - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() {return container.isEmpty();} - - /** - * {@inheritDoc} - */ - @Override - public int size() {return container.size();} - - /** - * {@inheritDoc} - */ - @Override - public IntIterator iterator() {return container.iterator();} - - /** - * {@inheritDoc} - */ - @Override - public IntIterator descendingIterator() {return container.descendingIterator();} - - /** - * {@inheritDoc} - */ - @Override - public int[] toArray() {return container.toArray();} - - /** - * {@inheritDoc} - */ - @Override - public int[] toArray(int[] a) {return container.toArray(a);} - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(IntSet o) {return container.compareTo(o);} - - /** - * {@inheritDoc} - */ - @Override - public String toString() {return container.toString();} - - /** - * {@inheritDoc} - */ - @Override - public List powerSet() {return container.powerSet();} - - /** - * {@inheritDoc} - */ - @Override - public List powerSet(int min, int max) {return container.powerSet(min, max);} - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize() {return container.powerSetSize();} - - /** - * {@inheritDoc} - */ - @Override - public int powerSetSize(int min, int max) {return container.powerSetSize(min, max);} - - /** - * {@inheritDoc} - */ - @Override - public double jaccardSimilarity(IntSet other) {return container.jaccardSimilarity(other);} - - /** - * {@inheritDoc} - */ - @Override - public double jaccardDistance(IntSet other) {return container.jaccardDistance(other);} - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardSimilarity(IntSet other) {return container.weightedJaccardSimilarity(other);} - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardDistance(IntSet other) {return container.weightedJaccardDistance(other);} - - /* - * OTHERS - */ - - /** - * {@inheritDoc} - */ - @Override - public IntSet empty() {return new IntSetStatistics(container.empty());} - - /** - * {@inheritDoc} - */ - @Override - public IntSet clone() {return new IntSetStatistics(container.clone());} - - /** - * {@inheritDoc} - */ - @Override - public IntSet convert(int... a) {return new IntSetStatistics(container.convert(a));} - - /** - * {@inheritDoc} - */ - @Override - public IntSet convert(Collection c) {return new IntSetStatistics(container.convert(c));} - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() {return "Analyzed IntSet:\n" + container.debugInfo();} -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java deleted file mode 100755 index 2ba2a6161a89..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java +++ /dev/null @@ -1,869 +0,0 @@ -package io.druid.extendedset.utilities.random; - - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; - -/** - *

MersenneTwister and MersenneTwisterFast

- *

Version 13, based on version MT199937(99/10/29) - * of the Mersenne Twister algorithm found at - * - * The Mersenne Twister Home Page, with the initialization - * improved using the new 2002/1/26 initialization algorithm - * By Sean Luke, October 2004. - *

- *

MersenneTwister is a drop-in subclass replacement - * for java.util.Random. It is properly synchronized and - * can be used in a multithreaded environment. On modern VMs such - * as HotSpot, it is approximately 1/3 slower than java.util.Random. - *

- *

MersenneTwisterFast is not a subclass of java.util.Random. It has - * the same public methods as Random does, however, and it is - * algorithmically identical to MersenneTwister. MersenneTwisterFast - * has hard-code inlined all of its methods directly, and made all of them - * final (well, the ones of consequence anyway). Further, these - * methods are not synchronized, so the same MersenneTwisterFast - * instance cannot be shared by multiple threads. But all this helps - * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. - * java.util.Random is about 1/3 slower than MersenneTwisterFast. - *

- *

About the Mersenne Twister

- *

This is a Java version of the C-program for MT19937: Integer version. - * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, - * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp - * with an appropriate reference to your work". Indicate that this - * is a translation of their algorithm into Java. - *

- *

Reference. - * Makato Matsumoto and Takuji Nishimura, - * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform - * Pseudo-Random Number Generator", - * ACM Transactions on Modeling and Computer Simulation, - * Vol. 8, No. 1, January 1998, pp 3--30. - *

- *

About this Version

- *

- *

Changes Since V12: clone() method added. - *

- *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast - * is equal to other MersenneTwisterFasts with identical state; likewise - * MersenneTwister is equal to other MersenneTwister with identical state. - * This isn't equals(...) because that requires a contract of immutability - * to compare by value. - *

- *

Changes Since V10: A documentation error suggested that - * setSeed(int[]) required an int[] array 624 long. In fact, the array - * can be any non-zero length. The new version also checks for this fact. - *

- *

Changes Since V9: readState(stream) and writeState(stream) - * provided. - *

- *

Changes Since V8: setSeed(int) was only using the first 28 bits - * of the seed; it should have been 32 bits. For small-number seeds the - * behavior is identical. - *

- *

Changes Since V7: A documentation error in MersenneTwisterFast - * (but not MersenneTwister) stated that nextDouble selects uniformly from - * the full-open interval [0,1]. It does not. nextDouble's contract is - * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, - * namely, selection in the half-open interval [0,1). That is, 1.0 should - * not be returned. A similar contract exists in nextFloat. - *

- *

Changes Since V6: License has changed from LGPL to BSD. - * New timing information to compare against - * java.util.Random. Recent versions of HotSpot have helped Random increase - * in speed to the point where it is faster than MersenneTwister but slower - * than MersenneTwisterFast (which should be the case, as it's a less complex - * algorithm but is synchronized). - *

- *

Changes Since V5: New empty constructor made to work the same - * as java.util.Random -- namely, it seeds based on the current time in - * milliseconds. - *

- *

Changes Since V4: New initialization algorithms. See - * (see - * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) - *

- *

The MersenneTwister code is based on standard MT19937 C/C++ - * code by Takuji Nishimura, - * with suggestions from Topher Cooper and Marc Rieffel, July 1997. - * The code was originally translated into Java by Michael Lecuyer, - * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. - *

- *

Java notes

- *

- *

This implementation implements the bug fixes made - * in Java 1.2's version of Random, which means it can be used with - * earlier versions of Java. See - * - * the JDK 1.2 java.util.Random documentation for further documentation - * on the random-number generation contracts made. Additionally, there's - * an undocumented bug in the JDK java.util.Random.nextBytes() method, - * which this code fixes. - *

- *

Just like java.util.Random, this - * generator accepts a long seed but doesn't use all of it. java.util.Random - * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). - * So it's best if your seed does not exceed the int range. - *

- *

MersenneTwister can be used reliably - * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in - * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor - * java.util.Random) should be used with them. - *

- *

License

- *

- * Copyright (c) 2003 by Sean Luke.
- * Portions copyright (c) 1993 by Michael Lecuyer.
- * All rights reserved.
- *

- *

Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - *

    - *
  • Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - *
  • Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - *
  • Neither the name of the copyright owners, their employers, nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - *
- *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * @version 13 - */ - -@SuppressWarnings("serial") -public class MersenneTwister extends java.util.Random implements Cloneable -{ - // Period parameters - private static final int N = 624; - private static final int M = 397; - private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a - private static final int UPPER_MASK = 0x80000000; // most significant w-r bits - private static final int LOWER_MASK = 0x7fffffff; // least significant r bits - - // Tempering parameters - private static final int TEMPERING_MASK_B = 0x9d2c5680; - private static final int TEMPERING_MASK_C = 0xefc60000; - - private int mt[]; // the array for the state vector - private int mti; // mti==N+1 means mt[N] is not initialized - private int mag01[]; - - // a good initial seed (of int size, though stored in a long) - //private static final long GOOD_SEED = 4357; - - /* implemented here because there's a bug in Random's implementation - of the Gaussian code (divide by zero, and log(0), ugh!), yet its - gaussian variables are private so we can't access them here. :-( */ - - private double __nextNextGaussian; - private boolean __haveNextNextGaussian; - - /* We're overriding all internal data, to my knowledge, so this should be okay */ - - /** - * Constructor using the default seed. - */ - public MersenneTwister() - { - this(System.currentTimeMillis()); - } - - /** - * Constructor using a given seed. Though you pass this seed in - * as a long, it's best to make sure it's actually an integer. - * - * @param seed - */ - public MersenneTwister(final long seed) - { - super(seed); /* just in case */ - setSeed(seed); - } - - /** - * Constructor using an array of integers as seed. - * Your array must have a non-zero length. Only the first 624 integers - * in the array are used; if the array is shorter than this then - * integers are repeatedly used in a wrap-around fashion. - * - * @param array - */ - public MersenneTwister(final int[] array) - { - super(System.currentTimeMillis()); /* pick something at random just in case */ - setSeed(array); - } - - /** - * Tests the code. - * - * @param args - */ - public static void main(String args[]) - { - int j; - - MersenneTwister r; - - // CORRECTNESS TEST - // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out - - r = new MersenneTwister(new int[]{0x123, 0x234, 0x345, 0x456}); - System.out.println("Output of MersenneTwister with new (2002/1/26) seeding mechanism"); - for (j = 0; j < 1000; j++) { - // first, convert the int from signed to "unsigned" - long l = r.nextInt(); - if (l < 0) { - l += 4294967296L; // max int value - } - String s = String.valueOf(l); - while (s.length() < 10) { - s = " " + s; // buffer - } - System.out.print(s + " "); - if (j % 5 == 4) { - System.out.println(); - } - } - - // SPEED TEST - - final long SEED = 4357; - - int xx; - long ms; - System.out.println("\nTime to test grabbing 100000000 ints"); - - r = new MersenneTwister(SEED); - ms = System.currentTimeMillis(); - xx = 0; - for (j = 0; j < 100000000; j++) { - xx += r.nextInt(); - } - System.out.println("Mersenne Twister: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); - - System.out.println("To compare this with java.util.Random, run this same test on MersenneTwisterFast."); - System.out.println("The comparison with Random is removed from MersenneTwister because it is a proper"); - System.out.println("subclass of Random and this unfairly makes some of Random's methods un-inlinable,"); - System.out.println("so it would make Random look worse than it is."); - - // TEST TO COMPARE TYPE CONVERSION BETWEEN - // MersenneTwisterFast.java AND MersenneTwister.java - - - System.out.println("\nGrab the first 1000 booleans"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean() + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean(j / 999.0) + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean(j / 999.0f) + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - byte[] bytes = new byte[1000]; - System.out.println("\nGrab the first 1000 bytes using nextBytes"); - r = new MersenneTwister(SEED); - r.nextBytes(bytes); - for (j = 0; j < 1000; j++) { - System.out.print(bytes[j] + " "); - if (j % 16 == 15) { - System.out.println(); - } - } - if (!(j % 16 == 15)) { - System.out.println(); - } - - byte b; - System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print((b = r.nextByte()) + " "); - if (b != bytes[j]) { - System.out.print("BAD "); - } - if (j % 16 == 15) { - System.out.println(); - } - } - if (!(j % 16 == 15)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 shorts"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextShort() + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 ints"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextInt() + " "); - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 ints of different sizes"); - r = new MersenneTwister(SEED); - int max = 1; - for (j = 0; j < 1000; j++) { - System.out.print(r.nextInt(max) + " "); - max *= 2; - if (max <= 0) { - max = 1; - } - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 longs"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextLong() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 longs of different sizes"); - r = new MersenneTwister(SEED); - long max2 = 1; - for (j = 0; j < 1000; j++) { - System.out.print(r.nextLong(max2) + " "); - max2 *= 2; - if (max2 <= 0) { - max2 = 1; - } - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 floats"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextFloat() + " "); - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 doubles"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextDouble() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 gaussian doubles"); - r = new MersenneTwister(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextGaussian() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - } - - /** - * {@inheritDoc} - */ - @Override - public Object clone() throws CloneNotSupportedException - { - MersenneTwister f = (MersenneTwister) (super.clone()); - f.mt = mt.clone(); - f.mag01 = mag01.clone(); - return f; - } - - /** - * @param o - * - * @return ? - */ - public boolean stateEquals(Object o) - { - if (o == this) { - return true; - } - if (o == null || !(o instanceof MersenneTwister)) { - return false; - } - MersenneTwister other = (MersenneTwister) o; - if (mti != other.mti) { - return false; - } - for (int x = 0; x < mag01.length; x++) { - if (mag01[x] != other.mag01[x]) { - return false; - } - } - for (int x = 0; x < mt.length; x++) { - if (mt[x] != other.mt[x]) { - return false; - } - } - return true; - } - - /** - * Reads the entire state of the MersenneTwister RNG from the stream - * - * @param stream - * - * @throws IOException - */ - public void readState(DataInputStream stream) throws IOException - { - int len = mt.length; - for (int x = 0; x < len; x++) { - mt[x] = stream.readInt(); - } - - len = mag01.length; - for (int x = 0; x < len; x++) { - mag01[x] = stream.readInt(); - } - - mti = stream.readInt(); - __nextNextGaussian = stream.readDouble(); - __haveNextNextGaussian = stream.readBoolean(); - } - - /** - * Writes the entire state of the MersenneTwister RNG to the stream - * - * @param stream - * - * @throws IOException - */ - public void writeState(DataOutputStream stream) throws IOException - { - int len = mt.length; - for (int x = 0; x < len; x++) { - stream.writeInt(mt[x]); - } - - len = mag01.length; - for (int x = 0; x < len; x++) { - stream.writeInt(mag01[x]); - } - - stream.writeInt(mti); - stream.writeDouble(__nextNextGaussian); - stream.writeBoolean(__haveNextNextGaussian); - } - - /** - * Initialize the pseudo random number generator. Don't - * pass in a long that's bigger than an int (Mersenne Twister - * only uses the first 32 bits for its seed). - */ - @Override - synchronized public void setSeed(final long seed) - { - // it's always good style to call super - super.setSeed(seed); - - // Due to a bug in java.util.Random clear up to 1.2, we're - // doing our own Gaussian variable. - __haveNextNextGaussian = false; - - mt = new int[N]; - - mag01 = new int[2]; - mag01[0] = 0x0; - mag01[1] = MATRIX_A; - - mt[0] = (int) (seed & 0xffffffff); - for (mti = 1; mti < N; mti++) { - mt[mti] = - (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array mt[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - mt[mti] &= 0xffffffff; - /* for >32 bit machines */ - } - } - - /** - * Sets the seed of the MersenneTwister using an array of integers. - * Your array must have a non-zero length. Only the first 624 integers - * in the array are used; if the array is shorter than this then - * integers are repeatedly used in a wrap-around fashion. - * - * @param array - */ - synchronized public void setSeed(final int[] array) - { - if (array.length == 0) { - throw new IllegalArgumentException("Array length must be greater than zero"); - } - int i, j, k; - setSeed(19650218); - i = 1; - j = 0; - k = (N > array.length ? N : array.length); - for (; k != 0; k--) { - mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ - mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ - i++; - j++; - if (i >= N) { - mt[0] = mt[N - 1]; - i = 1; - } - if (j >= array.length) { - j = 0; - } - } - for (k = N - 1; k != 0; k--) { - mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ - mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ - i++; - if (i >= N) { - mt[0] = mt[N - 1]; - i = 1; - } - } - mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ - } - - /* If you've got a truly old version of Java, you can omit these - two next methods. */ - - /** - * Returns an integer with bits bits filled with a random number. - */ - @Override - synchronized protected int next(final int bits) - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return y >>> (32 - bits); // hope that's right! - } - - private synchronized void writeObject(final ObjectOutputStream out) - throws IOException - { - // just so we're synchronized. - out.defaultWriteObject(); - } - - private synchronized void readObject(final ObjectInputStream in) - throws IOException, ClassNotFoundException - { - // just so we're synchronized. - in.defaultReadObject(); - } - - /** - * This method is missing from jdk 1.0.x and below. JDK 1.1 - * includes this for us, but what the heck. - */ - @Override - public boolean nextBoolean() {return next(1) != 0;} - - /** - * This generates a coin flip with a probability probability - * of returning true, else returning false. probability must - * be between 0.0 and 1.0, inclusive. Not as precise a random real - * event as nextBoolean(double), but twice as fast. To explicitly - * use this, remember you may need to cast to float first. - * - * @param probability - * - * @return ? - */ - public boolean nextBoolean(final float probability) - { - if (probability < 0.0f || probability > 1.0f) { - throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); - } - if (probability == 0.0f) { - return false; // fix half-open issues - } else if (probability == 1.0f) { - return true; // fix half-open issues - } - return nextFloat() < probability; - } - - /** - * This generates a coin flip with a probability probability - * of returning true, else returning false. probability must - * be between 0.0 and 1.0, inclusive. - * - * @param probability - * - * @return ? - */ - public boolean nextBoolean(final double probability) - { - if (probability < 0.0 || probability > 1.0) { - throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); - } - if (probability == 0.0) { - return false; // fix half-open issues - } else if (probability == 1.0) { - return true; // fix half-open issues - } - return nextDouble() < probability; - } - - /** - * This method is missing from JDK 1.1 and below. JDK 1.2 - * includes this for us, but what the heck. - */ - @Override - public int nextInt(final int n) - { - if (n <= 0) { - throw new IllegalArgumentException("n must be > 0"); - } - - if ((n & -n) == n) { - return (int) ((n * (long) next(31)) >> 31); - } - - int bits, val; - do { - bits = next(31); - val = bits % n; - } - while (bits - val + (n - 1) < 0); - return val; - } - - /** - * This method is for completness' sake. - * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, - * n must be > 0, or an IllegalArgumentException is raised. - * - * @param n - * - * @return ? - */ - public long nextLong(final long n) - { - if (n <= 0) { - throw new IllegalArgumentException("n must be > 0"); - } - - long bits, val; - do { - bits = (nextLong() >>> 1); - val = bits % n; - } - while (bits - val + (n - 1) < 0); - return val; - } - - /** - * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes - * this for us, but what the heck. - * - * @return ? - */ - @Override - public double nextDouble() - { - return (((long) next(26) << 27) + next(27)) - / (double) (1L << 53); - } - - /** - * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes - * this for us, but what the heck. - */ - - @Override - public float nextFloat() - { - return next(24) / ((float) (1 << 24)); - } - - /** - * A bug fix for all versions of the JDK. The JDK appears to - * use all four bytes in an integer as independent byte values! - * Totally wrong. I've submitted a bug report. - */ - - @Override - public void nextBytes(final byte[] bytes) - { - for (int x = 0; x < bytes.length; x++) { - bytes[x] = (byte) next(8); - } - } - - /** - * For completeness' sake, though it's not in java.util.Random. - * - * @return ? - */ - public char nextChar() - { - // chars are 16-bit UniCode values - return (char) (next(16)); - } - - /** - * For completeness' sake, though it's not in java.util.Random. - * - * @return ? - */ - public short nextShort() - { - return (short) (next(16)); - } - - /** - * For completeness' sake, though it's not in java.util.Random. - * - * @return ? - */ - public byte nextByte() - { - return (byte) (next(8)); - } -// } - - /** - * A bug fix for all JDK code including 1.2. nextGaussian can theoretically - * ask for the log of 0 and divide it by 0! See Java bug - * - * http://developer.java.sun.com/developer/bugParade/bugs/4254501.html - * - * @return ? - */ - @Override - synchronized public double nextGaussian() - { - if (__haveNextNextGaussian) { - __haveNextNextGaussian = false; - return __nextNextGaussian; - } -// else -// { - double v1, v2, s; - do { - v1 = 2 * nextDouble() - 1; // between -1.0 and 1.0 - v2 = 2 * nextDouble() - 1; // between -1.0 and 1.0 - s = v1 * v1 + v2 * v2; - } while (s >= 1 || s == 0); - double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); - __nextNextGaussian = v2 * multiplier; - __haveNextNextGaussian = true; - return v1 * multiplier; - } - -} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java deleted file mode 100755 index 0789c2008f75..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java +++ /dev/null @@ -1,1470 +0,0 @@ -package io.druid.extendedset.utilities.random; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.Serializable; -import java.util.Random; - -/** - *

MersenneTwister and MersenneTwisterFast

- *

Version 13, based on version MT199937(99/10/29) - * of the Mersenne Twister algorithm found at - * - * The Mersenne Twister Home Page, with the initialization - * improved using the new 2002/1/26 initialization algorithm - * By Sean Luke, October 2004. - *

- *

MersenneTwister is a drop-in subclass replacement - * for java.util.Random. It is properly synchronized and - * can be used in a multithreaded environment. On modern VMs such - * as HotSpot, it is approximately 1/3 slower than java.util.Random. - *

- *

MersenneTwisterFast is not a subclass of java.util.Random. It has - * the same public methods as Random does, however, and it is - * algorithmically identical to MersenneTwister. MersenneTwisterFast - * has hard-code inlined all of its methods directly, and made all of them - * final (well, the ones of consequence anyway). Further, these - * methods are not synchronized, so the same MersenneTwisterFast - * instance cannot be shared by multiple threads. But all this helps - * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. - * java.util.Random is about 1/3 slower than MersenneTwisterFast. - *

- *

About the Mersenne Twister

- *

This is a Java version of the C-program for MT19937: Integer version. - * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, - * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp - * with an appropriate reference to your work". Indicate that this - * is a translation of their algorithm into Java. - *

- *

Reference. - * Makato Matsumoto and Takuji Nishimura, - * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform - * Pseudo-Random Number Generator", - * ACM Transactions on Modeling and. Computer Simulation, - * Vol. 8, No. 1, January 1998, pp 3--30. - *

- *

About this Version

- *

- *

Changes Since V12: clone() method added. - *

- *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast - * is equal to other MersenneTwisterFasts with identical state; likewise - * MersenneTwister is equal to other MersenneTwister with identical state. - * This isn't equals(...) because that requires a contract of immutability - * to compare by value. - *

- *

Changes Since V10: A documentation error suggested that - * setSeed(int[]) required an int[] array 624 long. In fact, the array - * can be any non-zero length. The new version also checks for this fact. - *

- *

Changes Since V9: readState(stream) and writeState(stream) - * provided. - *

- *

Changes Since V8: setSeed(int) was only using the first 28 bits - * of the seed; it should have been 32 bits. For small-number seeds the - * behavior is identical. - *

- *

Changes Since V7: A documentation error in MersenneTwisterFast - * (but not MersenneTwister) stated that nextDouble selects uniformly from - * the full-open interval [0,1]. It does not. nextDouble's contract is - * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, - * namely, selection in the half-open interval [0,1). That is, 1.0 should - * not be returned. A similar contract exists in nextFloat. - *

- *

Changes Since V6: License has changed from LGPL to BSD. - * New timing information to compare against - * java.util.Random. Recent versions of HotSpot have helped Random increase - * in speed to the point where it is faster than MersenneTwister but slower - * than MersenneTwisterFast (which should be the case, as it's a less complex - * algorithm but is synchronized). - *

- *

Changes Since V5: New empty constructor made to work the same - * as java.util.Random -- namely, it seeds based on the current time in - * milliseconds. - *

- *

Changes Since V4: New initialization algorithms. See - * (see - * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) - *

- *

The MersenneTwister code is based on standard MT19937 C/C++ - * code by Takuji Nishimura, - * with suggestions from Topher Cooper and Marc Rieffel, July 1997. - * The code was originally translated into Java by Michael Lecuyer, - * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. - *

- *

Java notes

- *

- *

This implementation implements the bug fixes made - * in Java 1.2's version of Random, which means it can be used with - * earlier versions of Java. See - * - * the JDK 1.2 java.util.Random documentation for further documentation - * on the random-number generation contracts made. Additionally, there's - * an undocumented bug in the JDK java.util.Random.nextBytes() method, - * which this code fixes. - *

- *

Just like java.util.Random, this - * generator accepts a long seed but doesn't use all of it. java.util.Random - * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). - * So it's best if your seed does not exceed the int range. - *

- *

MersenneTwister can be used reliably - * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in - * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor - * java.util.Random) should be used with them. - *

- *

License

- *

- * Copyright (c) 2003 by Sean Luke.
- * Portions copyright (c) 1993 by Michael Lecuyer.
- * All rights reserved.
- *

- *

Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - *

    - *
  • Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - *
  • Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - *
  • Neither the name of the copyright owners, their employers, nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - *
- *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * @version 13 - */ - -// Note: this class is hard-inlined in all of its methods. This makes some of -// the methods well-nigh unreadable in their complexity. In fact, the Mersenne -// Twister is fairly easy code to understand: if you're trying to get a handle -// on the code, I strongly suggest looking at MersenneTwister.java first. -// -- Sean - -@SuppressWarnings("serial") -public class MersenneTwisterFast implements Serializable, Cloneable -{ - // Period parameters - private static final int N = 624; - private static final int M = 397; - private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a - private static final int UPPER_MASK = 0x80000000; // most significant w-r bits - private static final int LOWER_MASK = 0x7fffffff; // least significant r bits - - - // Tempering parameters - private static final int TEMPERING_MASK_B = 0x9d2c5680; - private static final int TEMPERING_MASK_C = 0xefc60000; - - private int mt[]; // the array for the state vector - private int mti; // mti==N+1 means mt[N] is not initialized - private int mag01[]; - - // a good initial seed (of int size, though stored in a long) - //private static final long GOOD_SEED = 4357; - - private double __nextNextGaussian; - private boolean __haveNextNextGaussian; - - /* We're overriding all internal data, to my knowledge, so this should be okay */ - - /** - * Constructor using the default seed. - */ - public MersenneTwisterFast() - { - this(System.currentTimeMillis()); - } - - /** - * Constructor using a given seed. Though you pass this seed in - * as a long, it's best to make sure it's actually an integer. - * - * @param seed - */ - public MersenneTwisterFast(final long seed) - { - setSeed(seed); - } - - /** - * Constructor using an array of integers as seed. - * Your array must have a non-zero length. Only the first 624 integers - * in the array are used; if the array is shorter than this then - * integers are repeatedly used in a wrap-around fashion. - * - * @param array - */ - public MersenneTwisterFast(final int[] array) - { - setSeed(array); - } - - /** - * Tests the code. - * - * @param args - */ - public static void main(String args[]) - { - int j; - - MersenneTwisterFast r; - - // CORRECTNESS TEST - // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out - - r = new MersenneTwisterFast(new int[]{0x123, 0x234, 0x345, 0x456}); - System.out.println("Output of MersenneTwisterFast with new (2002/1/26) seeding mechanism"); - for (j = 0; j < 1000; j++) { - // first, convert the int from signed to "unsigned" - long l = r.nextInt(); - if (l < 0) { - l += 4294967296L; // max int value - } - String s = String.valueOf(l); - while (s.length() < 10) { - s = " " + s; // buffer - } - System.out.print(s + " "); - if (j % 5 == 4) { - System.out.println(); - } - } - - // SPEED TEST - - final long SEED = 4357; - - int xx; - long ms; - System.out.println("\nTime to test grabbing 100000000 ints"); - - Random rr = new Random(SEED); - xx = 0; - ms = System.currentTimeMillis(); - for (j = 0; j < 100000000; j++) { - xx += rr.nextInt(); - } - System.out.println("java.util.Random: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); - - r = new MersenneTwisterFast(SEED); - ms = System.currentTimeMillis(); - xx = 0; - for (j = 0; j < 100000000; j++) { - xx += r.nextInt(); - } - System.out.println("Mersenne Twister Fast: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); - - // TEST TO COMPARE TYPE CONVERSION BETWEEN - // MersenneTwisterFast.java AND MersenneTwister.java - - System.out.println("\nGrab the first 1000 booleans"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean() + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean((j / 999.0)) + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextBoolean((j / 999.0f)) + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - byte[] bytes = new byte[1000]; - System.out.println("\nGrab the first 1000 bytes using nextBytes"); - r = new MersenneTwisterFast(SEED); - r.nextBytes(bytes); - for (j = 0; j < 1000; j++) { - System.out.print(bytes[j] + " "); - if (j % 16 == 15) { - System.out.println(); - } - } - if (!(j % 16 == 15)) { - System.out.println(); - } - - byte b; - System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print((b = r.nextByte()) + " "); - if (b != bytes[j]) { - System.out.print("BAD "); - } - if (j % 16 == 15) { - System.out.println(); - } - } - if (!(j % 16 == 15)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 shorts"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextShort() + " "); - if (j % 8 == 7) { - System.out.println(); - } - } - if (!(j % 8 == 7)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 ints"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextInt() + " "); - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 ints of different sizes"); - r = new MersenneTwisterFast(SEED); - int max = 1; - for (j = 0; j < 1000; j++) { - System.out.print(r.nextInt(max) + " "); - max *= 2; - if (max <= 0) { - max = 1; - } - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 longs"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextLong() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 longs of different sizes"); - r = new MersenneTwisterFast(SEED); - long max2 = 1; - for (j = 0; j < 1000; j++) { - System.out.print(r.nextLong(max2) + " "); - max2 *= 2; - if (max2 <= 0) { - max2 = 1; - } - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 floats"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextFloat() + " "); - if (j % 4 == 3) { - System.out.println(); - } - } - if (!(j % 4 == 3)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 doubles"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextDouble() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - System.out.println("\nGrab the first 1000 gaussian doubles"); - r = new MersenneTwisterFast(SEED); - for (j = 0; j < 1000; j++) { - System.out.print(r.nextGaussian() + " "); - if (j % 3 == 2) { - System.out.println(); - } - } - if (!(j % 3 == 2)) { - System.out.println(); - } - - } - - /** - * {@inheritDoc} - */ - @Override - public Object clone() throws CloneNotSupportedException - { - MersenneTwisterFast f = (MersenneTwisterFast) (super.clone()); - f.mt = mt.clone(); - f.mag01 = mag01.clone(); - return f; - } - - /** - * @param o - * - * @return ? - */ - public boolean stateEquals(Object o) - { - if (o == this) { - return true; - } - if (o == null || !(o instanceof MersenneTwisterFast)) { - return false; - } - MersenneTwisterFast other = (MersenneTwisterFast) o; - if (mti != other.mti) { - return false; - } - for (int x = 0; x < mag01.length; x++) { - if (mag01[x] != other.mag01[x]) { - return false; - } - } - for (int x = 0; x < mt.length; x++) { - if (mt[x] != other.mt[x]) { - return false; - } - } - return true; - } - - /** - * Reads the entire state of the MersenneTwister RNG from the stream - * - * @param stream - * - * @throws IOException - */ - public void readState(DataInputStream stream) throws IOException - { - int len = mt.length; - for (int x = 0; x < len; x++) { - mt[x] = stream.readInt(); - } - - len = mag01.length; - for (int x = 0; x < len; x++) { - mag01[x] = stream.readInt(); - } - - mti = stream.readInt(); - __nextNextGaussian = stream.readDouble(); - __haveNextNextGaussian = stream.readBoolean(); - } - - /** - * Writes the entire state of the MersenneTwister RNG to the stream - * - * @param stream - * - * @throws IOException - */ - public void writeState(DataOutputStream stream) throws IOException - { - int len = mt.length; - for (int x = 0; x < len; x++) { - stream.writeInt(mt[x]); - } - - len = mag01.length; - for (int x = 0; x < len; x++) { - stream.writeInt(mag01[x]); - } - - stream.writeInt(mti); - stream.writeDouble(__nextNextGaussian); - stream.writeBoolean(__haveNextNextGaussian); - } - - /** - * Initialize the pseudo random number generator. Don't - * pass in a long that's bigger than an int (Mersenne Twister - * only uses the first 32 bits for its seed). - * - * @param seed - */ - synchronized public void setSeed(final long seed) - { - // Due to a bug in java.util.Random clear up to 1.2, we're - // doing our own Gaussian variable. - __haveNextNextGaussian = false; - - mt = new int[N]; - - mag01 = new int[2]; - mag01[0] = 0x0; - mag01[1] = MATRIX_A; - - mt[0] = (int) (seed & 0xffffffff); - for (mti = 1; mti < N; mti++) { - mt[mti] = - (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array mt[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - mt[mti] &= 0xffffffff; - /* for >32 bit machines */ - } - } - - /** - * Sets the seed of the MersenneTwister using an array of integers. - * Your array must have a non-zero length. Only the first 624 integers - * in the array are used; if the array is shorter than this then - * integers are repeatedly used in a wrap-around fashion. - * - * @param array - */ - synchronized public void setSeed(final int[] array) - { - if (array.length == 0) { - throw new IllegalArgumentException("Array length must be greater than zero"); - } - int i, j, k; - setSeed(19650218); - i = 1; - j = 0; - k = (N > array.length ? N : array.length); - for (; k != 0; k--) { - mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ - mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ - i++; - j++; - if (i >= N) { - mt[0] = mt[N - 1]; - i = 1; - } - if (j >= array.length) { - j = 0; - } - } - for (k = N - 1; k != 0; k--) { - mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ - mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ - i++; - if (i >= N) { - mt[0] = mt[N - 1]; - i = 1; - } - } - mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ - } - - /** - * @return ? - */ - public final int nextInt() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return y; - } - - /** - * @return ? - */ - public final short nextShort() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (short) (y >>> 16); - } - - /** - * @return ? - */ - public final char nextChar() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (char) (y >>> 16); - } - - /** - * @return ? - */ - public final boolean nextBoolean() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return ((y >>> 31) != 0); - } - - /** - * This generates a coin flip with a probability probability - * of returning true, else returning false. probability must - * be between 0.0 and 1.0, inclusive. Not as precise a random real - * event as nextBoolean(double), but twice as fast. To explicitly - * use this, remember you may need to cast to float first. - * - * @param probability - * - * @return ? - */ - public final boolean nextBoolean(final float probability) - { - int y; - - if (probability < 0.0f || probability > 1.0f) { - throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); - } - if (probability == 0.0f) { - return false; // fix half-open issues - } else if (probability == 1.0f) { - return true; // fix half-open issues - } - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (y >>> 8) / ((float) (1 << 24)) < probability; - } - - /** - * This generates a coin flip with a probability probability - * of returning true, else returning false. probability must - * be between 0.0 and 1.0, inclusive. - * - * @param probability - * - * @return ? - */ - public final boolean nextBoolean(final double probability) - { - int y; - int z; - - if (probability < 0.0 || probability > 1.0) { - throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); - } - if (probability == 0.0) { - return false; // fix half-open issues - } else if (probability == 1.0) { - return true; // fix half-open issues - } - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - for (; kk < N - 1; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; - - mti = 0; - } - - z = mt[mti++]; - z ^= z >>> 11; // TEMPERING_SHIFT_U(z) - z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) - z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) - z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) - - /* derived from nextDouble documentation in jdk 1.2 docs, see top */ - return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53) < probability; - } - - /** - * @return ? - */ - public final byte nextByte() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (byte) (y >>> 24); - } - - /** - * @param bytes - */ - public final void nextBytes(byte[] bytes) - { - int y; - - for (int x = 0; x < bytes.length; x++) { - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - bytes[x] = (byte) (y >>> 24); - } - } - - /** - * @return ? - */ - public final long nextLong() - { - int y; - int z; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - for (; kk < N - 1; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; - - mti = 0; - } - - z = mt[mti++]; - z ^= z >>> 11; // TEMPERING_SHIFT_U(z) - z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) - z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) - z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) - - return (((long) y) << 32) + z; - } - - /** - * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, - * n must be > 0, or an IllegalArgumentException is raised. - * - * @param n - * - * @return ? - */ - public final long nextLong(final long n) - { - if (n <= 0) { - throw new IllegalArgumentException("n must be > 0"); - } - - long bits, val; - do { - int y; - int z; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - for (; kk < N - 1; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; - - mti = 0; - } - - z = mt[mti++]; - z ^= z >>> 11; // TEMPERING_SHIFT_U(z) - z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) - z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) - z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) - - bits = (((((long) y) << 32) + z) >>> 1); - val = bits % n; - } while (bits - val + (n - 1) < 0); - return val; - } - - /** - * Returns a random double in the half-open range from [0.0,1.0). Thus 0.0 is a valid - * result but 1.0 is not. - * - * @return ? - */ - public final double nextDouble() - { - int y; - int z; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - for (; kk < N - 1; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; - - mti = 0; - } - - z = mt[mti++]; - z ^= z >>> 11; // TEMPERING_SHIFT_U(z) - z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) - z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) - z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) - - /* derived from nextDouble documentation in jdk 1.2 docs, see top */ - return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53); - } - - /** - * @return ? - */ - public final double nextGaussian() - { - if (__haveNextNextGaussian) { - __haveNextNextGaussian = false; - return __nextNextGaussian; - } -// else -// { - double v1, v2, s; - do { - int y; - int z; - int a; - int b; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - for (; kk < N - 1; kk++) { - z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; - } - z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; - - mti = 0; - } - - z = mt[mti++]; - z ^= z >>> 11; // TEMPERING_SHIFT_U(z) - z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) - z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) - z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (a >>> 1) ^ mag01[a & 0x1]; - } - for (; kk < N - 1; kk++) { - a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (a >>> 1) ^ mag01[a & 0x1]; - } - a = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (a >>> 1) ^ mag01[a & 0x1]; - - mti = 0; - } - - a = mt[mti++]; - a ^= a >>> 11; // TEMPERING_SHIFT_U(a) - a ^= (a << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(a) - a ^= (a << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(a) - a ^= (a >>> 18); // TEMPERING_SHIFT_L(a) - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (b >>> 1) ^ mag01[b & 0x1]; - } - for (; kk < N - 1; kk++) { - b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (b >>> 1) ^ mag01[b & 0x1]; - } - b = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (b >>> 1) ^ mag01[b & 0x1]; - - mti = 0; - } - - b = mt[mti++]; - b ^= b >>> 11; // TEMPERING_SHIFT_U(b) - b ^= (b << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(b) - b ^= (b << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(b) - b ^= (b >>> 18); // TEMPERING_SHIFT_L(b) - - /* derived from nextDouble documentation in jdk 1.2 docs, see top */ - v1 = 2 * - (((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53)) - - 1; - v2 = 2 * (((((long) (a >>> 6)) << 27) + (b >>> 5)) / (double) (1L << 53)) - - 1; - s = v1 * v1 + v2 * v2; - } while (s >= 1 || s == 0); - double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); - __nextNextGaussian = v2 * multiplier; - __haveNextNextGaussian = true; - return v1 * multiplier; -// } - } - - /** - * Returns a random float in the half-open range from [0.0f,1.0f). Thus 0.0f is a valid - * result but 1.0f is not. - * - * @return ? - */ - public final float nextFloat() - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (y >>> 8) / ((float) (1 << 24)); - } - - /** - * Returns an integer drawn uniformly from 0 to n-1. Suffice it to say, - * n must be > 0, or an IllegalArgumentException is raised. - * - * @param n - * - * @return ? - */ - public final int nextInt(final int n) - { - if (n <= 0) { - throw new IllegalArgumentException("n must be > 0"); - } - - if ((n & -n) == n) // i.e., n is a power of 2 - { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - return (int) ((n * (long) (y >>> 1)) >> 31); - } - - int bits, val; - do { - int y; - - if (mti >= N) // generate N words at one time - { - int kk; - @SuppressWarnings("hiding") - final int[] mt = this.mt; // locals are slightly faster - @SuppressWarnings("hiding") - final int[] mag01 = this.mag01; // locals are slightly faster - - for (kk = 0; kk < N - M; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - for (; kk < N - 1; kk++) { - y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); - mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; - } - y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); - mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; - - mti = 0; - } - - y = mt[mti++]; - y ^= y >>> 11; // TEMPERING_SHIFT_U(y) - y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) - y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) - y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) - - bits = (y >>> 1); - val = bits % n; - } while (bits - val + (n - 1) < 0); - return val; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java deleted file mode 100755 index cb4bf71b41de..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java +++ /dev/null @@ -1,885 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers; - - -import io.druid.extendedset.AbstractExtendedSet; -import io.druid.extendedset.ExtendedSet; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.SortedSet; - -/** - * {@link ExtendedSet}-based class internally managed by an instance of any - * class implementing {@link Collection} - * - * @param the type of elements maintained by this set - * - * @author Alessandro Colantonio - * @version $Id$ - */ -public class GenericExtendedSet> extends AbstractExtendedSet -{ - /** - * class implementing {@link Collection} that is used to collect elements - */ - private final Class setClass; - /** - * elements of the set - */ - private /*final*/ Collection elements; - - /** - * Empty-set constructor - * - * @param setClass {@link Collection}-derived class - */ - @SuppressWarnings("unchecked") - public GenericExtendedSet(Class setClass) - { - this.setClass = setClass; - try { - elements = setClass.newInstance(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - throw new UnsupportedOperationException(); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return isEmpty() ? 0D : 1D; - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet empty() - { - return new GenericExtendedSet(setClass); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator iterator() - { - // prepare the sorted set - final Collection sorted; - if (elements instanceof SortedSet || elements instanceof List) { - //NOTE: SortedSet.comparator() is null - sorted = elements; - } else { - sorted = new ArrayList(elements); - Collections.sort((List) sorted); - } - - // iterate over the sorted set - return new ExtendedIterator() - { - final Iterator itr = sorted.iterator(); - T current; - - { - current = itr.hasNext() ? itr.next() : null; - } - - @Override - public void skipAllBefore(T element) - { - while (element.compareTo(current) > 0) { - next(); - } - } - - @Override - public boolean hasNext() - { - return current != null; - } - - @Override - public T next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - T prev = current; - current = itr.hasNext() ? itr.next() : null; - return prev; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator descendingIterator() - { - // prepare the sorted set - final Collection sorted; -//TODO -// if (elements instanceof SortedSet || elements instanceof List) { -// //NOTE: SortedSet.comparator() is null -// sorted = elements; -// } else { - sorted = new ArrayList(elements); - Collections.sort((List) sorted, Collections.reverseOrder()); -// } - - // iterate over the sorted set - return new ExtendedIterator() - { - final Iterator itr = sorted.iterator(); - T current; - - { - current = itr.hasNext() ? itr.next() : null; - } - - @Override - public void skipAllBefore(T element) - { - while (element.compareTo(current) > 0) { - next(); - } - } - - @Override - public boolean hasNext() - { - return current != null; - } - - @Override - public T next() - { - if (!hasNext()) { - throw new NoSuchElementException(); - } - T prev = current; - current = itr.hasNext() ? itr.next() : null; - return prev; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public GenericExtendedSet clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - GenericExtendedSet c = empty(); - if (elements instanceof Cloneable) { - try { - c.elements = (Collection) elements.getClass().getMethod("clone").invoke(elements); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } else { - c.elements.addAll(elements); - } - return c; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return setClass.getSimpleName() + ": " + elements.toString(); - } - - - - /* - * Collection methods - */ - - /** - * {@inheritDoc} - */ - @Override - public boolean add(T e) - { - if (elements instanceof List) { - final List l = (List) elements; - int pos = Collections.binarySearch(l, e); - if (pos >= 0) { - return false; - } - l.add(-(pos + 1), e); - return true; - } - return elements.add(e); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean remove(Object o) - { - if (elements instanceof List) { - try { - final List l = (List) elements; - int pos = Collections.binarySearch(l, (T) o); - if (pos < 0) { - return false; - } - l.remove(pos); - return true; - } - catch (ClassCastException e) { - return false; - } - } - return elements.remove(o); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean contains(Object o) - { - if (elements instanceof List) { - try { - return Collections.binarySearch((List) elements, (T) o) >= 0; - } - catch (ClassCastException e) { - return false; - } - } - return elements.contains(o); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean containsAll(Collection c) - { - if (isEmpty() || c == null || c.isEmpty()) { - return false; - } - if (this == c) { - return true; - } - - if (elements instanceof List - && c instanceof GenericExtendedSet - && ((GenericExtendedSet) c).elements instanceof List) { - Iterator thisItr = elements.iterator(); - Iterator otherItr = ((GenericExtendedSet) c).elements.iterator(); - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r; - while ((r = otherValue.compareTo(thisValue)) > 0) { - if (!thisItr.hasNext()) { - return false; - } - thisValue = thisItr.next(); - } - if (r < 0) { - return false; - } - } - return !otherItr.hasNext(); - } - - return elements.containsAll(c); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(Collection c) - { - if (elements instanceof List) { - //TODO: copiare codice di union - Collection res = union(c).elements; - boolean r = !res.equals(elements); - elements = res; - return r; - } - return elements.addAll(c); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean retainAll(Collection c) - { - if (elements instanceof List) { - try { - //TODO: copiare codice di intersection - Collection res = intersection((Collection) c).elements; - boolean r = !res.equals(elements); - elements = res; - return r; - } - catch (ClassCastException e) { - return false; - } - } - return elements.retainAll(c); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean removeAll(Collection c) - { - if (elements instanceof List) { - try { - //TODO: copiare codice di difference - Collection res = difference((Collection) c).elements; - boolean r = !res.equals(elements); - elements = res; - return r; - } - catch (ClassCastException e) { - return false; - } - } - return elements.removeAll(c); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object o) - { - return o instanceof GenericExtendedSet && ((GenericExtendedSet) o).elements.equals(elements); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() {return elements.size();} - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() {return elements.isEmpty();} - - /** - * {@inheritDoc} - */ - @Override - public void clear() {elements.clear();} - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() {return elements.hashCode();} - - - /* - * SortedSet methods - */ - - /** - * {@inheritDoc} - */ - @Override - public Comparator comparator() - { - return null; - } - - /** - * {@inheritDoc} - */ - @Override - public T first() - { - if (elements instanceof SortedSet) { - return ((SortedSet) elements).first(); - } - if (elements instanceof List) { - return ((List) elements).get(0); - } - return super.first(); - } - - /** - * {@inheritDoc} - */ - @Override - public T last() - { - if (elements instanceof SortedSet) { - return ((SortedSet) elements).last(); - } - if (elements instanceof List) { - return ((List) elements).get(elements.size() - 1); - } - return super.last(); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet headSet(T toElement) - { - if (elements instanceof SortedSet) { - GenericExtendedSet c = empty(); - c.elements = ((SortedSet) elements).headSet(toElement); - return c; - } - return super.headSet(toElement); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet tailSet(T fromElement) - { - if (elements instanceof SortedSet) { - GenericExtendedSet c = empty(); - c.elements = ((SortedSet) elements).tailSet(fromElement); - return c; - } - return super.headSet(fromElement); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet subSet(T fromElement, T toElement) - { - if (elements instanceof SortedSet) { - GenericExtendedSet c = empty(); - c.elements = ((SortedSet) elements).subSet(fromElement, toElement); - return c; - } - return super.headSet(toElement); - } - - - /* - * ExtendedSet methods - */ - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(Collection other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return 0; - } - if (this == other) { - return size(); - } - - if (elements instanceof List - && other instanceof GenericExtendedSet - && ((GenericExtendedSet) other).elements instanceof List) { - int res = 0; - Iterator thisItr = elements.iterator(); - @SuppressWarnings("unchecked") - Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r = thisValue.compareTo(otherValue); - while (r != 0) { - while ((r = thisValue.compareTo(otherValue)) > 0) { - if (!otherItr.hasNext()) { - return res; - } - otherValue = otherItr.next(); - } - if (r == 0) { - break; - } - while ((r = otherValue.compareTo(thisValue)) > 0) { - if (!thisItr.hasNext()) { - return res; - } - thisValue = thisItr.next(); - } - } - - res++; - } - return res; - } - - return super.intersectionSize(other); - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet intersection(Collection other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return empty(); - } - if (this == other) { - return clone(); - } - - if (elements instanceof List - && other instanceof GenericExtendedSet - && ((GenericExtendedSet) other).elements instanceof List) { - GenericExtendedSet res = empty(); - Iterator thisItr = elements.iterator(); - @SuppressWarnings("unchecked") - Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r = thisValue.compareTo(otherValue); - while (r != 0) { - while ((r = thisValue.compareTo(otherValue)) > 0) { - if (!otherItr.hasNext()) { - return res; - } - otherValue = otherItr.next(); - } - if (r == 0) { - break; - } - while ((r = otherValue.compareTo(thisValue)) > 0) { - if (!thisItr.hasNext()) { - return res; - } - thisValue = thisItr.next(); - } - } - - res.elements.add(thisValue); - } - return res; - } - - GenericExtendedSet clone = clone(); - clone.elements.retainAll(other); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet union(Collection other) - { - if (this == other || other == null || other.isEmpty()) { - return clone(); - } - if (isEmpty()) { - GenericExtendedSet res = empty(); - res.elements.addAll(other); - return res; - } - - if (elements instanceof List - && other instanceof GenericExtendedSet - && ((GenericExtendedSet) other).elements instanceof List) { - GenericExtendedSet res = empty(); - Iterator thisItr = elements.iterator(); - @SuppressWarnings("unchecked") - Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); -mainLoop: - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r = thisValue.compareTo(otherValue); - while (r != 0) { - while ((r = thisValue.compareTo(otherValue)) > 0) { - res.elements.add(otherValue); - if (!otherItr.hasNext()) { - res.elements.add(thisValue); - break mainLoop; - } - otherValue = otherItr.next(); - } - if (r == 0) { - break; - } - while ((r = otherValue.compareTo(thisValue)) > 0) { - res.elements.add(thisValue); - if (!thisItr.hasNext()) { - res.elements.add(otherValue); - break mainLoop; - } - thisValue = thisItr.next(); - } - } - - res.elements.add(thisValue); - } - while (thisItr.hasNext()) { - res.elements.add(thisItr.next()); - } - while (otherItr.hasNext()) { - res.elements.add(otherItr.next()); - } - return res; - } - - GenericExtendedSet clone = clone(); - for (T e : other) { - clone.add(e); - } - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet difference(Collection other) - { - if (isEmpty() || this == other) { - return empty(); - } - if (other == null || other.isEmpty()) { - return clone(); - } - - if (elements instanceof List - && other instanceof GenericExtendedSet - && ((GenericExtendedSet) other).elements instanceof List) { - GenericExtendedSet res = empty(); - Iterator thisItr = elements.iterator(); - @SuppressWarnings("unchecked") - Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); -mainLoop: - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r = thisValue.compareTo(otherValue); - while (r != 0) { - while ((r = thisValue.compareTo(otherValue)) > 0) { - if (!otherItr.hasNext()) { - res.elements.add(thisValue); - break mainLoop; - } - otherValue = otherItr.next(); - } - if (r == 0) { - break; - } - while ((r = otherValue.compareTo(thisValue)) > 0) { - res.elements.add(thisValue); - if (!thisItr.hasNext()) { - break mainLoop; - } - thisValue = thisItr.next(); - } - } - } - while (thisItr.hasNext()) { - res.elements.add(thisItr.next()); - } - return res; - } - - GenericExtendedSet clone = clone(); - clone.elements.removeAll(other); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet symmetricDifference(Collection other) - { - if (this == other || other == null || other.isEmpty()) { - return clone(); - } - if (isEmpty()) { - GenericExtendedSet res = empty(); - res.elements.addAll(other); - return res; - } - - if (elements instanceof List - && other instanceof GenericExtendedSet - && ((GenericExtendedSet) other).elements instanceof List) { - GenericExtendedSet res = empty(); - Iterator thisItr = elements.iterator(); - @SuppressWarnings("unchecked") - Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); -mainLoop: - while (thisItr.hasNext() && otherItr.hasNext()) { - T thisValue = thisItr.next(); - T otherValue = otherItr.next(); - - int r = thisValue.compareTo(otherValue); - while (r != 0) { - while ((r = thisValue.compareTo(otherValue)) > 0) { - res.elements.add(otherValue); - if (!otherItr.hasNext()) { - res.elements.add(thisValue); - break mainLoop; - } - otherValue = otherItr.next(); - } - if (r == 0) { - break; - } - while ((r = otherValue.compareTo(thisValue)) > 0) { - res.elements.add(thisValue); - if (!thisItr.hasNext()) { - res.elements.add(otherValue); - break mainLoop; - } - thisValue = thisItr.next(); - } - } - } - while (thisItr.hasNext()) { - res.elements.add(thisItr.next()); - } - while (otherItr.hasNext()) { - res.elements.add(otherItr.next()); - } - return res; - } - - GenericExtendedSet clone = union(other); - clone.removeAll(intersection(other)); - return clone; - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - throw new UnsupportedOperationException(); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedSet unmodifiable() - { - GenericExtendedSet c = empty(); - c.elements = Collections.unmodifiableCollection(elements); - return c; - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(T from, T to) - { - throw new UnsupportedOperationException(); - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet convert(Collection c) - { - GenericExtendedSet res = (GenericExtendedSet) super.convert(c); - if (res.elements instanceof List) { - Collections.sort((List) res.elements); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public GenericExtendedSet convert(Object... e) - { - GenericExtendedSet res = (GenericExtendedSet) super.convert(e); - if (res.elements instanceof List) { - Collections.sort((List) res.elements); - } - return res; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java deleted file mode 100755 index 11532dbb191d..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java +++ /dev/null @@ -1,741 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers; - - -import io.druid.extendedset.AbstractExtendedSet; -import io.druid.extendedset.ExtendedSet; -import io.druid.extendedset.intset.IntSet; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * An {@link ExtendedSet} implementation that maps each element of the universe (i.e., the collection of all possible elements) to an integer referred to as its "index". - * - * @param < T > the type of elements maintained by this set - * - * @author Alessandro Colantonio - * @version $Id: IndexedSet.java 154 2011-05-30 22:19:24Z cocciasik $ - * @see ExtendedSet - * @see AbstractExtendedSet - */ -public class IndexedSet extends AbstractExtendedSet implements java.io.Serializable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = -2386771695765773453L; - - // indices - /** - * @uml.property name="indices" - * @uml.associationEnd - */ - private final IntSet indices; - - // mapping to translate items to indices and vice-versa - private final Map itemToIndex; - private final T[] indexToItem; - - /** - * Creates an empty {@link IndexedSet} based on a given collection that - * represents the set of all possible items that can be added to the - * {@link IndexedSet} instance. - *

- * VERY IMPORTANT! to correctly work and effectively reduce the - * memory allocation, new instances of {@link IndexedSet} must be - * created through the {@link #clone()} or {@link #empty()} methods and - * not by calling many times this constructor with the same - * collection for universe! - * - * @param indices {@link IntSet} instance used for internal representation - * @param universe collection of all possible items. Order will be - * preserved. - */ - @SuppressWarnings("unchecked") - public IndexedSet(IntSet indices, final Collection universe) - { - // NOTE: this procedure removes duplicates while keeping the order - indexToItem = universe instanceof Set ? (T[]) universe.toArray() : (T[]) (new LinkedHashSet(universe)).toArray(); - itemToIndex = new HashMap(Math.max((int) (indexToItem.length / .75f) + 1, 16)); - for (int i = 0; i < indexToItem.length; i++) { - itemToIndex.put(indexToItem[i], Integer.valueOf(i)); - } - this.indices = indices; - } - - /** - * Creates a {@link IndexedSet} instance from a given universe - * mapping - * - * @param itemToIndex universe item-to-index mapping - * @param indexToItem universe index-to-item mapping - * @param indices initial item set - */ - private IndexedSet(Map itemToIndex, T[] indexToItem, IntSet indices) - { - this.itemToIndex = itemToIndex; - this.indexToItem = indexToItem; - this.indices = indices; - } - - /** - * A shortcut for new IndexedSet<T>(itemToIndex, indexToItem, indices) - */ - private IndexedSet createFromIndices(IntSet indx) - { - return new IndexedSet(itemToIndex, indexToItem, indx); - } - - /** - * Checks if the given collection is a instance of {@link IndexedSet} with - * the same index mappings - * - * @param c collection to check - * - * @return true if the given collection is a instance of - * {@link IndexedSet} with the same index mappings - */ - private boolean hasSameIndices(Collection c) - { - // since indices are always re-created through constructor and - // referenced through clone(), it is sufficient to check just only one - // mapping table - return (c instanceof IndexedSet) && (indexToItem == ((IndexedSet) c).indexToItem); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet clone() - { - return createFromIndices(indices.clone()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (obj == null || !(obj instanceof Collection)) { - return false; - } - IndexedSet other = convert((Collection) obj); - return this.indexToItem == other.indexToItem - && this.itemToIndex == other.itemToIndex - && this.indices.equals(other.indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return indices.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(ExtendedSet o) - { - return indices.compareTo(convert(o).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public Comparator comparator() - { - return new Comparator() - { - @Override - public int compare(T o1, T o2) - { - // compare elements according to the universe ordering - return itemToIndex.get(o1).compareTo(itemToIndex.get(o2)); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public T first() - { - return indexToItem[indices.first()]; - } - - /** - * {@inheritDoc} - */ - @Override - public T last() - { - return indexToItem[indices.last()]; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(T e) - { - Integer index = itemToIndex.get(e); - if (index == null) { - throw new IllegalArgumentException("element not in the current universe"); - } - return indices.add(index.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(Collection c) - { - return c != null && !c.isEmpty() && indices.addAll(convert(c).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - indices.clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(T e) - { - indices.flip(itemToIndex.get(e).intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(Object o) - { - if (o == null) { - return false; - } - Integer index = itemToIndex.get(o); - return index != null && indices.contains(index.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(Collection c) - { - return c == null || indices.containsAll(convert(c).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(Collection other) - { - return other == null || indices.containsAny(convert(other).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(Collection other, int minElements) - { - return other != null && !other.isEmpty() && indices.containsAtLeast(convert(other).indices, minElements); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return indices.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator iterator() - { - return new ExtendedIterator() - { - final IntSet.IntIterator itr = indices.iterator(); - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public T next() {return indexToItem[itr.next()];} - - @Override - public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} - - @Override - public void remove() {itr.remove();} - }; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator descendingIterator() - { - return new ExtendedIterator() - { - final IntSet.IntIterator itr = indices.descendingIterator(); - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public T next() {return indexToItem[itr.next()];} - - @Override - public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} - - @Override - public void remove() {itr.remove();} - }; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(Object o) - { - if (o == null) { - return false; - } - Integer index = itemToIndex.get(o); - return index != null && indices.remove(index.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(Collection c) - { - return c != null && !c.isEmpty() && indices.removeAll(convert(c).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(Collection c) - { - if (isEmpty()) { - return false; - } - if (c == null || c.isEmpty()) { - indices.clear(); - return true; - } - return indices.retainAll(convert(c).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return indices.size(); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet intersection(Collection other) - { - if (other == null) { - return empty(); - } - return createFromIndices(indices.intersection(convert(other).indices)); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet union(Collection other) - { - if (other == null) { - return clone(); - } - return createFromIndices(indices.union(convert(other).indices)); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet difference(Collection other) - { - if (other == null) { - return clone(); - } - return createFromIndices(indices.difference(convert(other).indices)); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet symmetricDifference(Collection other) - { - if (other == null) { - return clone(); - } - return createFromIndices(indices.symmetricDifference(convert(other).indices)); - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet complemented() - { - return createFromIndices(indices.complemented()); - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - indices.complement(); - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(Collection other) - { - if (other == null) { - return 0; - } - return indices.intersectionSize(convert(other).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(Collection other) - { - if (other == null) { - return size(); - } - return indices.unionSize(convert(other).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(Collection other) - { - if (other == null) { - return size(); - } - return indices.symmetricDifferenceSize(convert(other).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(Collection other) - { - if (other == null) { - return size(); - } - return indices.differenceSize(convert(other).indices); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - return indices.complementSize(); - } - - /** - * Returns the collection of all possible elements - * - * @return the collection of all possible elements - */ - public IndexedSet universe() - { - IntSet allItems = indices.empty(); - allItems.fill(0, indexToItem.length - 1); - return createFromIndices(allItems); - } - - /** - * Returns the index of the given item - * - * @param item - * - * @return the index of the given item - */ - public Integer absoluteIndexOf(T item) - { - return itemToIndex.get(item); - } - - /** - * Returns the item corresponding to the given index - * - * @param i index - * - * @return the item - */ - public T absoluteGet(int i) - { - return indexToItem[i]; - } - - /** - * Returns the set of indices. Modifications to this set are reflected to - * this {@link IndexedSet} instance. Trying to perform operation on - * out-of-bound indices will throw an {@link IllegalArgumentException} - * exception. - * - * @return the index set - * - * @see #absoluteGet(int) - * @see #absoluteIndexOf(Object) - */ - public IntSet indices() - { - return indices; - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet empty() - { - return createFromIndices(indices.empty()); - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - return indices.bitmapCompressionRatio(); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return indices.collectionCompressionRatio(); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public IndexedSet convert(Collection c) - { - if (c == null) { - return empty(); - } - - // useless to convert... - if (hasSameIndices(c)) { - return (IndexedSet) c; - } - - // NOTE: cannot call super.convert(c) because of loop - IndexedSet res = empty(); - for (T t : (Collection) c) { - res.add(t); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public IndexedSet convert(Object... e) - { - return (IndexedSet) super.convert(e); - } - - /** - * {@inheritDoc} - */ - @Override - public List> powerSet() - { - return powerSet(1, Integer.MAX_VALUE); - } - - /** - * {@inheritDoc} - */ - @Override - public List> powerSet(int min, int max) - { - List ps = indices.powerSet(min, max); - List> res = new ArrayList>(ps.size()); - for (IntSet s : ps) { - res.add(createFromIndices(s)); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return String.format("items = %s\nitemToIndex = %s\nindexToItem = %s\n", - indices.debugInfo(), itemToIndex.toString(), Arrays.toString(indexToItem) - ); - } - - /** - * {@inheritDoc} - */ - @Override - public double jaccardSimilarity(ExtendedSet other) - { - return indices.jaccardSimilarity(convert(other).indices); - } - - //TODO -// /** -// * {@inheritDoc} -// */ -// @Override -// public IndexedSet unmodifiable() { -// return createFromIndices(indices.unmodifiable()); -// } -// -// /** -// * {@inheritDoc} -// */ -// @Override -// public IndexedSet subSet(T fromElement, T toElement) { -// return createFromIndices(indices.subSet(itemToIndex.get(fromElement), itemToIndex.get(toElement))); -// } -// -// /** -// * {@inheritDoc} -// */ -// @Override -// public IndexedSet headSet(T toElement) { -// return createFromIndices(indices.headSet(itemToIndex.get(toElement))); -// } -// -// /** -// * {@inheritDoc} -// */ -// @Override -// public IndexedSet tailSet(T fromElement) { -// return createFromIndices(indices.tailSet(itemToIndex.get(fromElement))); -// } - - /** - * {@inheritDoc} - */ - @Override - public T get(int i) - { - return indexToItem[indices.get(i)]; - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(T e) - { - return indices.indexOf(itemToIndex.get(e).intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(T from, T to) - { - indices.clear(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(T from, T to) - { - indices.fill(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java deleted file mode 100755 index 0c92053a2381..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java +++ /dev/null @@ -1,580 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers; - -import io.druid.extendedset.AbstractExtendedSet; -import io.druid.extendedset.ExtendedSet; -import io.druid.extendedset.intset.IntSet; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; - -/** - * This class provides a "wrapper" for any {@link IntSet} instance in order to be used as an {@link ExtendedSet} instance. - * - * @author Alessandro Colantonio - * @version $Id: IntegerSet.java 153 2011-05-30 16:39:57Z cocciasik $ - */ -public class IntegerSet extends AbstractExtendedSet -{ - /** - * the collection of int numbers - * - * @uml.property name="items" - * @uml.associationEnd - */ - private final IntSet items; - - /** - * Wraps an instance of {@link IntSet} - * - * @param items the {@link IntSet} to wrap - */ - public IntegerSet(IntSet items) - { - this.items = items; - } - - /** - * @return the internal integer representation - */ - public IntSet intSet() - { - return items; - } - - /** - * Converts a generic collection of {@link Integer} instances to a - * {@link IntSet} instance. If the given collection is an - * {@link IntegerSet} instance, it returns the contained - * {@link #items} object. - * - * @param c the generic collection of {@link Integer} instances - * - * @return the resulting {@link IntSet} instance - */ - private IntSet toIntSet(Collection c) - { - // nothing to convert - if (c == null) { - return null; - } - if (c instanceof IntegerSet) { - return ((IntegerSet) c).items; - } - - // extract integers from the given collection - IntSet res = items.empty(); - List sorted = new ArrayList(c.size()); - for (Object i : c) { - try { - sorted.add((Integer) i); - } - catch (ClassCastException e) { - // do nothing - } - } - Collections.sort(sorted); - for (Integer i : sorted) { - res.add(i.intValue()); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(Collection c) - { - return items.addAll(toIntSet(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - return items.bitmapCompressionRatio(); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(Integer from, Integer to) - { - items.clear(from.intValue(), to.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - return new IntegerSet(items.clone()); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return items.collectionCompressionRatio(); - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(ExtendedSet o) - { - return items.compareTo(toIntSet(o)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet complemented() - { - return new IntegerSet(items.complemented()); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - return items.complementSize(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(Collection other) - { - return items.containsAny(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(Collection other, int minElements) - { - return items.containsAtLeast(toIntSet(other), minElements); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet convert(Collection c) - { - return new IntegerSet(toIntSet(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet convert(Object... e) - { - return convert(Arrays.asList(e)); - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - return getClass().getSimpleName() + "\n" + items.debugInfo(); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator descendingIterator() - { - return new ExtendedIterator() - { - final IntSet.IntIterator itr = items.descendingIterator(); - - @Override - public void remove() {itr.remove();} - - @Override - public Integer next() {return Integer.valueOf(itr.next());} - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} - }; - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet difference(Collection other) - { - return new IntegerSet(items.difference(toIntSet(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(Collection other) - { - return items.differenceSize(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet empty() - { - return new IntegerSet(items.empty()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (!(o instanceof IntegerSet)) { - return false; - } - return items.equals(((IntegerSet) o).items); - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(Integer from, Integer to) - { - items.fill(from.intValue(), to.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public Integer first() - { - return Integer.valueOf(items.first()); - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(Integer e) - { - items.flip(e.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public Integer get(int i) - { - return Integer.valueOf(items.get(i)); - } - - /** - * {@inheritDoc} - */ - @Override - public int indexOf(Integer e) - { - return items.indexOf(e.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet intersection(Collection other) - { - return new IntegerSet(items.intersection(toIntSet(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public int intersectionSize(Collection other) - { - return items.intersectionSize(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator iterator() - { - return new ExtendedIterator() - { - final IntSet.IntIterator itr = items.iterator(); - - @Override - public void remove() {itr.remove();} - - @Override - public Integer next() {return Integer.valueOf(itr.next());} - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} - }; - } - - /** - * {@inheritDoc} - */ - @Override - public Integer last() - { - return Integer.valueOf(items.last()); - } - - /** - * {@inheritDoc} - */ - @Override - public List powerSet() - { - return powerSet(1, Integer.MAX_VALUE); - } - - /** - * {@inheritDoc} - */ - @Override - public List powerSet(int min, int max) - { - List ps = items.powerSet(min, max); - List res = new ArrayList(ps.size()); - for (IntSet s : ps) { - res.add(new IntegerSet(s)); - } - return res; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(Collection c) - { - return items.removeAll(toIntSet(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(Collection c) - { - return items.retainAll(toIntSet(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet symmetricDifference(Collection other) - { - return new IntegerSet(items.symmetricDifference(toIntSet(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(Collection other) - { - return items.symmetricDifferenceSize(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public IntegerSet union(Collection other) - { - return new IntegerSet(items.union(toIntSet(other))); - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(Collection other) - { - return items.unionSize(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return items.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - items.complement(); - } - - /** - * {@inheritDoc} - */ - @Override - public Comparator comparator() - { - return null; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(Integer e) - { - return items.add(e.intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - items.clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean contains(Object o) - { - return o instanceof Integer && items.contains(((Integer) o).intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(Collection c) - { - return items.containsAll(toIntSet(c)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return items.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean remove(Object o) - { - return o instanceof Integer && items.remove(((Integer) o).intValue()); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return items.size(); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - // NOTE: by not calling super.toString(), we avoid to iterate over new - // Integer instances, thus avoiding to waste time and memory with garbage - // collection - return items.toString(); - } - - /** - * {@inheritDoc} - */ - @Override - public double jaccardSimilarity(ExtendedSet other) - { - return items.jaccardSimilarity(toIntSet(other)); - } - - /** - * {@inheritDoc} - */ - @Override - public double weightedJaccardSimilarity(ExtendedSet other) - { - return items.weightedJaccardSimilarity(toIntSet(other)); - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java deleted file mode 100755 index ad60d782fe98..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java +++ /dev/null @@ -1,1692 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers; - -import io.druid.extendedset.ExtendedSet; -import io.druid.extendedset.intset.ConciseSetUtils; -import io.druid.extendedset.intset.IntSet; -import io.druid.extendedset.intset.IntSet.IntIterator; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.BitSet; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map.Entry; -import java.util.NavigableMap; -import java.util.NoSuchElementException; -import java.util.SortedSet; -import java.util.TreeMap; - -/** - * Very similar to {@link ExtendedSet} but for the primitive long type. - * - * @author Alessandro Colantonio - * @version $Id: LongSet.java 154 2011-05-30 22:19:24Z cocciasik $ - */ -public class LongSet implements Cloneable, Comparable, java.io.Serializable, Iterable -{ - /** - * generated ID - */ - private static final long serialVersionUID = -6165350530254304256L; - - /** - * maximum cardinality of each subset - */ - private static int SUBSET_SIZE = ConciseSetUtils.MAX_ALLOWED_INTEGER + 1; - - /** - * transaction-item pair indices (from 0 to {@link #SUBSET_SIZE} - 1) - * - * @uml.property name="firstIndices" - * @uml.associationEnd - */ - private final IntSet firstIndices; - - /** - * transaction-item pair indices (from {@link #SUBSET_SIZE}) - */ - private final NavigableMap otherIndices; - - /** - * Creates an empty set - * - * @param block {@link IntSet} instance internally used to represent - * {@link Long} values. It can be non-empty. - */ - public LongSet(IntSet block) - { - firstIndices = block.empty(); - otherIndices = new TreeMap(); - } - - /** - * Shallow-copy constructor - */ - private LongSet(IntSet firstIndices, NavigableMap otherIndices) - { - this.firstIndices = firstIndices; - this.otherIndices = otherIndices; - } - - /** - * @return an empty {@link IntSet} instance of the same type of that of - * internally used to represent integers - */ - public IntSet emptyBlock() - { - return firstIndices.empty(); - } - - /** - * Retains only the elements in this set that are contained in the specified - * collection. In other words, removes from this set all of its elements - * that are not contained in the specified collection. - * - * @param other collection containing elements to be retained in this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if this set contains a null element and the specified - * collection does not permit null elements (optional), or if - * the specified collection is null - * @see #remove(long) - */ - @SuppressWarnings("null") - public boolean retainAll(LongSet other) - { - if (isEmpty() || this == other) { - return false; - } - if (other == null || other.isEmpty()) { - clear(); - return true; - } - - boolean res = firstIndices.retainAll(other.firstIndices); - if (otherIndices.isEmpty()) { - return res; - } - if (other.otherIndices.isEmpty()) { - otherIndices.clear(); - return true; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - itr1.remove(); - while (itr1.hasNext()) { - itr1.next(); - itr1.remove(); - } - return true; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c < 0) { - itr1.remove(); - res = true; - } else if (c == 0) { - res |= e1.getValue().retainAll(e2.getValue()); - if (e1.getValue().isEmpty()) { - itr1.remove(); - } - } - } - } - - /** - * Generates the intersection set - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #retainAll(LongSet) - */ - @SuppressWarnings("null") - public LongSet intersection(LongSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return empty(); - } - if (this == other) { - return clone(); - } - - LongSet res = new LongSet(firstIndices.intersection(other.firstIndices), new TreeMap()); - if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c == 0) { - IntSet s = e1.getValue().intersection(e2.getValue()); - if (!s.isEmpty()) { - res.otherIndices.put(e1.getKey(), s); - } - } - } - } - - /** - * Adds all of the elements in the specified collection to this set if - * they're not already present. - * - * @param other collection containing elements to be added to this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if the specified collection contains one or more null - * elements and this set does not permit null elements, or if - * the specified collection is null - * @throws IllegalArgumentException if some property of an element of the specified collection - * prevents it from being added to this set - * @see #add(long) - */ - @SuppressWarnings("null") - public boolean addAll(LongSet other) - { - if (other == null || other.isEmpty() || this == other) { - return false; - } - - boolean res = firstIndices.addAll(other.firstIndices); - if (other.otherIndices.isEmpty()) { - return res; - } - if (otherIndices.isEmpty()) { - for (Entry e : other.otherIndices.entrySet()) { - otherIndices.put(e.getKey(), e.getValue().clone()); - } - return true; - } - Iterator> itr1 = new ArrayList>(otherIndices.entrySet()).iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return res; - } - } - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - otherIndices.put(e2.getKey(), e2.getValue().clone()); - while (itr2.hasNext()) { - e2 = itr2.next(); - otherIndices.put(e2.getKey(), e2.getValue().clone()); - } - return true; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c > 0) { - otherIndices.put(e2.getKey(), e2.getValue().clone()); - res = true; - } else if (c == 0) { - res |= e1.getValue().addAll(e2.getValue()); - } - } - } - - /** - * Generates the union set - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #addAll(LongSet) - */ - @SuppressWarnings("null") - public LongSet union(LongSet other) - { - if (other == null || other.isEmpty() || this == other) { - return clone(); - } - if (isEmpty()) { - return other.clone(); - } - - LongSet res = new LongSet(firstIndices.union(other.firstIndices), new TreeMap()); - if (other.otherIndices.isEmpty()) { - for (Entry e : otherIndices.entrySet()) { - res.otherIndices.put(e.getKey(), e.getValue().clone()); - } - return res; - } - if (otherIndices.isEmpty()) { - for (Entry e : other.otherIndices.entrySet()) { - res.otherIndices.put(e.getKey(), e.getValue().clone()); - } - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - if (c != 0) { - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } - while (itr2.hasNext()) { - e2 = itr2.next(); - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - while (itr1.hasNext()) { - e1 = itr1.next(); - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c < 0) { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } else if (c > 0) { - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } else { - res.otherIndices.put(e1.getKey(), e1.getValue().union(e2.getValue())); - } - } - } - - /** - * Removes from this set all of its elements that are contained in the - * specified collection. - * - * @param other collection containing elements to be removed from this set - * - * @return true if this set changed as a result of the call - * - * @throws NullPointerException if this set contains a null element and the specified - * collection does not permit null elements (optional), or if - * the specified collection is null - * @see #remove(long) - * @see #contains(long) - */ - @SuppressWarnings("null") - public boolean removeAll(LongSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return false; - } - if (this == other) { - clear(); - return true; - } - - boolean res = firstIndices.removeAll(other.firstIndices); - if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c == 0) { - res |= e1.getValue().removeAll(e2.getValue()); - if (e1.getValue().isEmpty()) { - itr1.remove(); - } - } - } - } - - /** - * Generates the difference set - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #removeAll(LongSet) - */ - @SuppressWarnings("null") - public LongSet difference(LongSet other) - { - if (other == null || other.isEmpty()) { - return clone(); - } - if (isEmpty() || this == other) { - return empty(); - } - - LongSet res = new LongSet(firstIndices.difference(other.firstIndices), new TreeMap()); - if (otherIndices.isEmpty()) { - return res; - } - if (other.otherIndices.isEmpty()) { - for (Entry e : otherIndices.entrySet()) { - res.otherIndices.put(e.getKey(), e.getValue().clone()); - } - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - while (itr1.hasNext()) { - e1 = itr1.next(); - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c < 0) { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } else if (c == 0) { - IntSet s = e1.getValue().difference(e2.getValue()); - if (!s.isEmpty()) { - res.otherIndices.put(e1.getKey(), s); - } - } - } - } - - /** - * Generates the symmetric difference set - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #flip(long) - */ - @SuppressWarnings("null") - public LongSet symmetricDifference(LongSet other) - { - if (other == null || other.isEmpty() || this == other) { - return clone(); - } - if (isEmpty()) { - return other.clone(); - } - - LongSet res = new LongSet(firstIndices.symmetricDifference(other.firstIndices), new TreeMap()); - if (other.otherIndices.isEmpty()) { - for (Entry e : otherIndices.entrySet()) { - res.otherIndices.put(e.getKey(), e.getValue().clone()); - } - return res; - } - if (otherIndices.isEmpty()) { - for (Entry e : other.otherIndices.entrySet()) { - res.otherIndices.put(e.getKey(), e.getValue().clone()); - } - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - if (c != 0) { - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } - while (itr2.hasNext()) { - e2 = itr2.next(); - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - while (itr1.hasNext()) { - e1 = itr1.next(); - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c < 0) { - res.otherIndices.put(e1.getKey(), e1.getValue().clone()); - } else if (c > 0) { - res.otherIndices.put(e2.getKey(), e2.getValue().clone()); - } else { - res.otherIndices.put(e1.getKey(), e1.getValue().symmetricDifference(e2.getValue())); - } - } - } - - /** - * Generates the complement set. The returned set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @return the complement set - * - * @see LongSet#complement() - */ - public LongSet complemented() - { - LongSet cloned = clone(); - cloned.complement(); - return cloned; - } - - /** - * Complements the current set. The modified set is represented by all the - * elements strictly less than {@link #last()} that do not exist in the - * current set. - * - * @see LongSet#complemented() - */ - public void complement() - { - if (otherIndices.isEmpty()) { - firstIndices.complement(); - return; - } - - // complement the last block - Iterator> itr = otherIndices.descendingMap().entrySet().iterator(); - Entry e = itr.next(); - e.getValue().complement(); - if (e.getValue().isEmpty()) { - itr.remove(); - } - - // complement other blocks - NavigableMap toAdd = new TreeMap(); // avoid concurrent modification - for (long i = e.getKey().longValue() - SUBSET_SIZE; i > 0L; i -= SUBSET_SIZE) { - while (e != null && e.getKey().longValue() > i) { - e = itr.hasNext() ? itr.next() : null; - } - - if (e != null && e.getKey().longValue() == i) { - if (e.getValue().add(SUBSET_SIZE - 1)) { - e.getValue().complement(); - e.getValue().add(SUBSET_SIZE - 1); - } else { - e.getValue().complement(); - } - if (e.getValue().isEmpty()) { - itr.remove(); - } - } else { - IntSet s = firstIndices.empty(); - s.fill(0, SUBSET_SIZE - 1); - toAdd.put(Long.valueOf(i), s); - } - } - otherIndices.putAll(toAdd); - if (firstIndices.add(SUBSET_SIZE - 1)) { - firstIndices.complement(); - firstIndices.add(SUBSET_SIZE - 1); - } else { - firstIndices.complement(); - } - } - - /** - * Computes the intersection set size. - *

- * This is faster than calling {@link #intersection(LongSet)} and - * then {@link #size()} - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the size - */ - @SuppressWarnings("null") - public long intersectionSize(LongSet other) - { - if (isEmpty() || other == null || other.isEmpty()) { - return 0L; - } - if (this == other) { - return size(); - } - - long res = firstIndices.intersectionSize(other.firstIndices); - if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { - return res; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return res; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return res; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c == 0) { - res += e1.getValue().intersectionSize(e2.getValue()); - } - } - } - - /** - * Computes the union set size. - *

- * This is faster than calling {@link #union(LongSet)} and then - * {@link #size()} - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the size - */ - public long unionSize(LongSet other) - { - return other == null ? size() : size() + other.size() - intersectionSize(other); - } - - /** - * Computes the symmetric difference set size. - *

- * This is faster than calling {@link #symmetricDifference(LongSet)} - * and then {@link #size()} - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the size - */ - public long symmetricDifferenceSize(LongSet other) - { - return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); - } - - /** - * Computes the difference set size. - *

- * This is faster than calling {@link #difference(LongSet)} and then - * {@link #size()} - * - * @param other {@link LongSet} instance that represents the right - * operand - * - * @return the size - */ - public long differenceSize(LongSet other) - { - return other == null ? size() : size() - intersectionSize(other); - } - - /** - * Computes the complement set size. - *

- * This is faster than calling {@link #complemented()} and then - * {@link #size()} - * - * @return the size - */ - public long complementSize() - { - if (isEmpty()) { - return 0L; - } - return last() - size() + 1L; - } - - /** - * Generates an empty set - * - * @return the empty set - */ - public LongSet empty() - { - return new LongSet(firstIndices.empty(), new TreeMap()); - } - - /** - * See the clone() of {@link Object} - * - * @return cloned object - */ - @Override - public LongSet clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - NavigableMap otherIndicesClone = new TreeMap(); - for (Entry e : otherIndices.entrySet()) { - otherIndicesClone.put(e.getKey(), e.getValue().clone()); - } - return new LongSet(firstIndices.clone(), otherIndicesClone); - } - - /** - * Computes the compression factor of the equivalent bitmap representation - * (1 means not compressed, namely a memory footprint similar to - * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) - * - * @return the compression factor - */ - public double bitmapCompressionRatio() - { - //TODO - throw new RuntimeException("TODO"); - } - - /** - * Computes the compression factor of the equivalent integer collection (1 - * means not compressed, namely a memory footprint similar to - * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) - * - * @return the compression factor - */ - public double collectionCompressionRatio() - { - //TODO - throw new RuntimeException("TODO"); - } - - /** - * @return a {@link ExtendedLongIterator} instance to iterate over the set - */ - public ExtendedLongIterator longIterator() - { - return new ExtendedLongIterator(); - } - - /** - * @return a {@link ExtendedLongIterator} instance to iterate over the set in - * descending order - */ - public ExtendedLongIterator descendingLongIterator() - { - return new ReverseLongIterator(); - } - - /** - * {@inheritDoc} - */ - @Override - public Iterator iterator() - { - return new Iterator() - { - final ExtendedLongIterator itr = longIterator(); - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public Long next() {return Long.valueOf(itr.next());} - - @Override - public void remove() {itr.remove();} - }; - } - - /** - * Prints debug info about the given {@link LongSet} implementation - * - * @return a string that describes the internal representation of the - * instance - */ - public String debugInfo() - { - StringBuilder s = new StringBuilder(); - - s.append("elements: "); - s.append(toString()); - s.append("\nfirstIndices: " + firstIndices); - s.append('\n'); - s.append("otherIndices: " + otherIndices.size()); - s.append('\n'); - for (Entry e : otherIndices.entrySet()) { - s.append('\t'); - s.append(e.getKey()); - s.append(", "); - s.append(e.getValue()); - s.append('\n'); - } - - return s.toString(); - } - - /** - * Adds to the set all the elements between first and - * last, both included. - * - * @param from first element - * @param to last element - */ - public void fill(long from, long to) - { - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - if (from == to) { - add(from); - return; - } - - final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; - final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; - if (firstBlockIndex == lastBlockIndex) { - // Case 1: One block - if (firstBlockIndex == 0L) { - firstIndices.fill((int) from, (int) to); - } else { - IntSet s = otherIndices.get(firstBlockIndex); - if (s == null) { - otherIndices.put(firstBlockIndex, s = firstIndices.empty()); - } - s.fill((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); - } - } else { - // Case 2: Multiple blocks - // Handle first block - if (firstBlockIndex == 0L) { - firstIndices.fill((int) from, SUBSET_SIZE - 1); - } else { - IntSet s = otherIndices.get(firstBlockIndex); - if (s == null) { - otherIndices.put(firstBlockIndex, s = firstIndices.empty()); - } - s.fill((int) (from - firstBlockIndex), SUBSET_SIZE - 1); - } - - // Handle intermediate words, if any - for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { - IntSet s = firstIndices.empty(); - s.fill(0, SUBSET_SIZE - 1); - otherIndices.put(Long.valueOf(i), s); - } - - // Handle last word - IntSet s = otherIndices.get(lastBlockIndex); - if (s == null) { - otherIndices.put(lastBlockIndex, s = firstIndices.empty()); - } - s.fill(0, (int) (to - lastBlockIndex)); - } - } - - /** - * Removes from the set all the elements between first and - * last, both included. - * - * @param from first element - * @param to last element - */ - public void clear(long from, long to) - { - if (from > to) { - throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); - } - if (from == to) { - remove(from); - return; - } - - final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; - final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; - if (firstBlockIndex == lastBlockIndex) { - // Case 1: One block - if (firstBlockIndex == 0L) { - firstIndices.clear((int) from, (int) to); - } else { - IntSet s = otherIndices.get(firstBlockIndex); - if (s != null) { - s.clear((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); - if (s.isEmpty()) { - otherIndices.remove(firstBlockIndex); - } - } - } - } else { - // Case 2: Multiple blocks - // Handle first block - if (firstBlockIndex == 0L) { - firstIndices.clear((int) from, SUBSET_SIZE - 1); - } else { - IntSet s = otherIndices.get(firstBlockIndex); - if (s != null) { - s.clear((int) (from - firstBlockIndex), SUBSET_SIZE - 1); - if (s.isEmpty()) { - otherIndices.remove(firstBlockIndex); - } - } - } - - // Handle intermediate words, if any - for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { - otherIndices.remove(Long.valueOf(i)); - } - - // Handle last word - IntSet s = otherIndices.get(lastBlockIndex); - if (s != null) { - s.clear(0, (int) (to - lastBlockIndex)); - if (s.isEmpty()) { - otherIndices.remove(lastBlockIndex); - } - } - } - } - - /** - * Adds the element if it not existing, or removes it if existing - * - * @param e element to flip - * - * @see #symmetricDifference(LongSet) - */ - public void flip(long e) - { - if (e < SUBSET_SIZE) { - firstIndices.flip((int) e); - return; - } - - final long block = (e / SUBSET_SIZE) * SUBSET_SIZE; - IntSet s = otherIndices.get(block); - if (s == null) { - otherIndices.put(block, s = firstIndices.empty()); - } - s.flip((int) (e - block)); - if (s.isEmpty()) { - otherIndices.remove(block); - } - } - - /** - * Gets the ith element of the set - * - * @param index position of the element in the sorted set - * - * @return the ith element of the set - * - * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to - * {@link #size()} - */ - public long get(long index) - { - if (index < firstIndices.size()) { - return firstIndices.get((int) index); - } - - index -= firstIndices.size(); - for (Entry e : otherIndices.entrySet()) { - if (index < e.getValue().size()) { - return e.getKey().longValue() + e.getValue().get((int) index); - } - index -= e.getValue().size(); - } - throw new IndexOutOfBoundsException(Long.toString(index)); - } - - /** - * Provides position of element within the set. - *

- * It returns -1 if the element does not exist within the set. - * - * @param i element of the set - * - * @return the element position - */ - public long indexOf(long i) - { - if (i < SUBSET_SIZE) { - return firstIndices.indexOf((int) i); - } - long prev = firstIndices.size(); - for (Entry e : otherIndices.entrySet()) { - if (i < e.getKey().longValue() + SUBSET_SIZE) { - return prev + e.getValue().indexOf((int) (i - e.getKey().longValue())); - } - prev += e.getValue().size(); - } - return -1L; - } - - /** - * Converts a given array into an instance of the current class. - * - * @param a array to use to generate the new instance - * - * @return the converted collection - */ - public LongSet convert(long... a) - { - LongSet res = empty(); - if (a != null) { - a = Arrays.copyOf(a, a.length); - Arrays.sort(a); - for (long i : a) { - res.add(i); - } - } - return res; - } - - /** - * Converts a given array into an instance of the current class. - * - * @param a array to use to generate the new instance - * - * @return the converted collection - */ - public LongSet convert(Collection a) - { - LongSet res = empty(); - Collection sorted; - if (a != null) { - if (a instanceof SortedSet && ((SortedSet) a).comparator() == null) { - sorted = a; - } else { - sorted = new ArrayList(a); - Collections.sort((List) sorted); - } - for (long i : sorted) { - res.add(i); - } - } - return res; - } - - /** - * Returns the first (lowest) element currently in this set. - * - * @return the first (lowest) element currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public long first() - { - if (!firstIndices.isEmpty()) { - return firstIndices.first(); - } - if (otherIndices.isEmpty()) { - throw new NoSuchElementException(); - } - Entry e = otherIndices.firstEntry(); - return e.getKey().longValue() + e.getValue().first(); - } - - /** - * Returns the last (highest) element currently in this set. - * - * @return the last (highest) element currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public long last() - { - if (otherIndices.isEmpty() && firstIndices.isEmpty()) { - throw new NoSuchElementException(); - } - if (!otherIndices.isEmpty()) { - Entry e = otherIndices.lastEntry(); - return e.getKey().longValue() + e.getValue().last(); - } - return firstIndices.last(); - } - - /** - * @return the number of elements in this set (its cardinality) - */ - public long size() - { - long res = firstIndices.size(); - for (Entry e : otherIndices.entrySet()) { - res += e.getValue().size(); - } - return res; - } - - /** - * @return true if this set contains no elements - */ - public boolean isEmpty() - { - return firstIndices.isEmpty() && otherIndices.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return 31 * firstIndices.hashCode() + otherIndices.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof LongSet)) { - return false; - } - final LongSet other = (LongSet) obj; - return firstIndices.equals(other.firstIndices) - && otherIndices.equals(other.otherIndices); - } - - /** - * Returns true if this set contains the specified element. - * - * @param i element whose presence in this set is to be tested - * - * @return true if this set contains the specified element - */ - public boolean contains(long i) - { - if (i < SUBSET_SIZE) { - return firstIndices.contains((int) i); - } - long first = (i / SUBSET_SIZE) * SUBSET_SIZE; - IntSet s = otherIndices.get(first); - if (s == null) { - return false; - } - return s.contains((int) (i - first)); - } - - /** - * Adds the specified element to this set if it is not already present. It - * ensures that sets never contain duplicate elements. - * - * @param i element to be added to this set - * - * @return true if this set did not already contain the specified - * element - * - * @throws IllegalArgumentException if some property of the specified element prevents it from - * being added to this set - */ - public boolean add(long i) - { - if (i < SUBSET_SIZE) { - return firstIndices.add((int) i); - } - long first = (i / SUBSET_SIZE) * SUBSET_SIZE; - IntSet s = otherIndices.get(first); - if (s == null) { - otherIndices.put(first, s = firstIndices.empty()); - } - return s.add((int) (i - first)); - } - - /** - * Removes the specified element from this set if it is present. - * - * @param i object to be removed from this set, if present - * - * @return true if this set contained the specified element - * - * @throws UnsupportedOperationException if the remove operation is not supported by this set - */ - public boolean remove(long i) - { - if (i < SUBSET_SIZE) { - return firstIndices.remove((int) i); - } - long first = (i / SUBSET_SIZE) * SUBSET_SIZE; - IntSet s = otherIndices.get(first); - if (s == null) { - return false; - } - boolean res = s.remove((int) (i - first)); - if (res && s.isEmpty()) { - otherIndices.remove(first); - } - return res; - } - - /** - * Returns true if this set contains all of the elements of the - * specified collection. - * - * @param other collection to be checked for containment in this set - * - * @return true if this set contains all of the elements of the - * specified collection - * - * @throws NullPointerException if the specified collection contains one or more null - * elements and this set does not permit null elements - * (optional), or if the specified collection is null - * @see #contains(long) - */ - @SuppressWarnings("null") - public boolean containsAll(LongSet other) - { - if (other == null || other.isEmpty() || other == this) { - return true; - } - if (isEmpty()) { - return false; - } - - if (!firstIndices.containsAll(other.firstIndices)) { - return false; - } - if (other.otherIndices.isEmpty()) { - return true; - } - if (otherIndices.isEmpty()) { - return false; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return c == 0 && !itr2.hasNext(); - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return true; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c > 0) { - return false; - } else if (c == 0) { - if (!e1.getValue().containsAll(e2.getValue())) { - return false; - } - } - } - } - - /** - * Returns true if the specified {@link LongSet} - * instance contains any elements that are also contained within this - * {@link LongSet} instance - * - * @param other {@link LongSet} to intersect with - * - * @return a boolean indicating whether this {@link LongSet} - * intersects the specified {@link LongSet}. - */ - @SuppressWarnings("null") - public boolean containsAny(LongSet other) - { - if (other == null || other.isEmpty() || other == this) { - return true; - } - if (isEmpty()) { - return false; - } - - if (firstIndices.containsAny(other.firstIndices) && !other.firstIndices.isEmpty()) { - return true; - } - if (other.otherIndices.isEmpty() || otherIndices.isEmpty()) { - return false; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return false; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return false; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c == 0 && e1.getValue().containsAny(e2.getValue())) { - return true; - } - } - } - - /** - * Returns true if the specified {@link LongSet} - * instance contains at least minElements elements that are - * also contained within this {@link LongSet} instance - * - * @param other {@link LongSet} instance to intersect with - * @param minElements minimum number of elements to be contained within this - * {@link LongSet} instance - * - * @return a boolean indicating whether this {@link LongSet} - * intersects the specified {@link LongSet}. - * - * @throws IllegalArgumentException if minElements < 1 - */ - @SuppressWarnings("null") - public boolean containsAtLeast(LongSet other, long minElements) - { - if (minElements < 1) { - throw new IllegalArgumentException(); - } - if (this == other) { - return size() >= minElements; - } - if (other == null || other.isEmpty() || isEmpty() || size() < minElements) { - return false; - } - - long res = firstIndices.intersectionSize(other.firstIndices); - if (res >= minElements) { - return true; - } - if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { - return false; - } - Iterator> itr1 = otherIndices.entrySet().iterator(); - Iterator> itr2 = other.otherIndices.entrySet().iterator(); - Entry e1 = null; - Entry e2 = null; - int c = 0; - while (true) { - if (c <= 0) { - if (itr1.hasNext()) { - e1 = itr1.next(); - } else { - return false; - } - } - if (c >= 0) { - if (itr2.hasNext()) { - e2 = itr2.next(); - } else { - return false; - } - } - - c = e1.getKey().compareTo(e2.getKey()); - if (c == 0) { - res += e1.getValue().intersectionSize(e2.getValue()); - if (res >= minElements) { - return true; - } - } - } - } - - /** - * Removes all of the elements from this set. The set will be empty after - * this call returns. - */ - public void clear() - { - firstIndices.clear(); - otherIndices.clear(); - } - - /** - * @return an array containing all the elements in this set, in the same - * order. - */ - public long[] toArray() - { - if (isEmpty()) { - return null; - } - return toArray(new long[(int) size()]); - } - - /** - * Returns an array containing all of the elements in this set. - *

- * If this set fits in the specified array with room to spare (i.e., the - * array has more elements than this set), the element in the array - * immediately following the end of the set are left unchanged. - * - * @param a the array into which the elements of this set are to be - * stored. - * - * @return the array containing all the elements in this set - * - * @throws NullPointerException if the specified array is null - * @throws IllegalArgumentException if this set does not fit in the specified array - */ - public long[] toArray(long[] a) - { - if (a.length < size()) { - throw new IllegalArgumentException(); - } - if (isEmpty()) { - return a; - } - ExtendedLongIterator itr = longIterator(); - int i = 0; - while (itr.hasNext()) { - a[i++] = itr.next(); - } - return a; - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - ExtendedLongIterator itr = longIterator(); - if (!itr.hasNext()) { - return "[]"; - } - - StringBuilder sb = new StringBuilder(); - sb.append('['); - for (; ; ) { - long e = itr.next(); - sb.append(e); - if (!itr.hasNext()) { - return sb.append(']').toString(); - } - sb.append(", "); - } - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(LongSet o) - { - //TODO - throw new RuntimeException("TODO"); - } - - /** - * A {@link Iterator} -like interface that allows to "skip" some elements of the set - */ - public class ExtendedLongIterator - { - /** - * @uml.property name="itr" - * @uml.associationEnd - */ - protected IntIterator itr; - protected Iterator> otherItrs; - protected long first = 0; - /** - * @uml.property name="current" - * @uml.associationEnd - */ - protected IntSet current = null; - - private ExtendedLongIterator() - { - itr = firstIndices.iterator(); - otherItrs = otherIndices.entrySet().iterator(); - first = 0; - } - - protected void nextItr() - { - Entry e = otherItrs.next(); - current = e.getValue(); - itr = e.getValue().iterator(); - first = e.getKey().longValue(); - } - - /** - * @return true if the iterator has more elements. - */ - public boolean hasNext() - { - return otherItrs.hasNext() || itr.hasNext(); - } - - /** - * @return the next element in the iteration. - * - * @throws NoSuchElementException iteration has no more elements. - */ - public long next() - { - if (!itr.hasNext()) { - nextItr(); - } - return first + itr.next(); - } - - /** - * Removes from the underlying collection the last element returned by - * the iterator (optional operation). This method can be called only - * once per call to next. The behavior of an iterator is - * unspecified if the underlying collection is modified while the - * iteration is in progress in any way other than by calling this - * method. - * - * @throws UnsupportedOperationException if the remove operation is not supported by - * this Iterator. - * @throws IllegalStateException if the next method has not yet been called, - * or the remove method has already been called - * after the last call to the next method. - */ - public void remove() - { - itr.remove(); - if (current != null && current.isEmpty()) { - otherItrs.remove(); - } - } - - /** - * Skips all the elements before the the specified element, so that - * {@link #next()} gives the given element or, if it does not exist, the - * element immediately after according to the sorting provided by this - * set. - *

- * If element is less than the next element, it does - * nothing - * - * @param element first element to not skip - */ - public void skipAllBefore(long element) - { - while (element >= first + SUBSET_SIZE) { - if (otherItrs.hasNext()) { - nextItr(); - } else { - itr.skipAllBefore(SUBSET_SIZE - 1); // no next - assert !itr.hasNext(); - return; - } - } - if (element < first) { - return; - } - itr.skipAllBefore((int) (element - first)); - } - } - - /** - * Iteration over the union of all indices, reverse order - */ - private class ReverseLongIterator extends ExtendedLongIterator - { - private ReverseLongIterator() - { - super(); - otherItrs = otherIndices.descendingMap().entrySet().iterator(); - nextItr(); - } - - @Override - protected void nextItr() - { - if (otherItrs.hasNext()) { - Entry e = otherItrs.next(); - current = e.getValue(); - itr = e.getValue().descendingIterator(); - first = e.getKey().longValue(); - } else { - itr = firstIndices.descendingIterator(); - current = null; - first = 0; - } - } - - @Override - public void skipAllBefore(long element) - { - while (element <= first) { - nextItr(); - } - if (element > first + SUBSET_SIZE) { - return; - } - itr.skipAllBefore((int) (element - first)); - } - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java deleted file mode 100755 index 3c1529e204be..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java +++ /dev/null @@ -1,2052 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers.matrix; - -import io.druid.extendedset.intset.IntSet; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Formatter; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; - -/** - * Very similar to {@link IntSet} but for pairs of ints, that is a binary matrix - * - * @author Alessandro Colantonio - * @version $Id$ - * @see IntSet - */ -public class BinaryMatrix implements Cloneable, Comparable -{ - /** - * set of all rows - */ - private final List rows = new ArrayList(); - - /** - * {@link IntSet} instance to create empty rows - * - * @uml.property name="template" - * @uml.associationEnd - */ - private final IntSet template; - - /** - * used to cache the returned value - */ - private final int[] resultCache = new int[2]; - - /** - * Creates an empty matrix. The matrix is internally represented by putting - * rows (transactions) in sequence. The provided constructor allows to - * specify which {@link IntSet} instance must be used to internally - * represent rows. - * - * @param template {@link IntSet} instance to create empty rows - */ - public BinaryMatrix(IntSet template) - { - this.template = template; - } - - /** - * @return {@link IntSet} instance internally used to represent rows - */ - public IntSet emptyRow() - { - return template.empty(); - } - - /** - * Remove null cells at the end of {@link #rows} - */ - private void fixRows() - { - int last = rows.size() - 1; - while (last >= 0 && rows.get(last) == null) { - rows.remove(last--); - } - } - - /** - * Generates the intersection matrix - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #retainAll(BinaryMatrix) - */ - public BinaryMatrix intersection(BinaryMatrix other) - { - BinaryMatrix res = empty(); - final int rowCount = Math.min(rows.size(), other.rows.size()); - for (int i = 0; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null || s2 == null) { - res.rows.add(null); - } else { - IntSet r = s1.intersection(s2); - if (r.isEmpty()) { - res.rows.add(null); - } else { - res.rows.add(r); - } - } - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - res.fixRows(); - return res; - } - - /** - * Generates the union matrix - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #addAll(BinaryMatrix) - */ - public BinaryMatrix union(BinaryMatrix other) - { - BinaryMatrix res = empty(); - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null) { - if (s2 == null) { - res.rows.add(null); - } else { - res.rows.add(s2.clone()); - } - } else { - if (s2 == null) { - res.rows.add(s1.clone()); - } else { - res.rows.add(s1.union(s2)); - } - } - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - for (; i < rows.size(); i++) { - IntSet s = rows.get(i); - res.rows.add(s == null ? null : s.clone()); - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - for (; i < other.rows.size(); i++) { - IntSet s = other.rows.get(i); - res.rows.add(s == null ? null : s.clone()); - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - return res; - } - - /** - * Generates the difference matrix - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #removeAll(BinaryMatrix) - */ - public BinaryMatrix difference(BinaryMatrix other) - { - BinaryMatrix res = empty(); - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null) { - res.rows.add(null); - } else { - if (s2 == null) { - res.rows.add(s1.clone()); - } else { - IntSet r = s1.difference(s2); - res.rows.add(r.isEmpty() ? null : r); - } - } - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - for (; i < rows.size(); i++) { - IntSet s = rows.get(i); - res.rows.add(s == null ? null : s.clone()); - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - res.fixRows(); - return res; - } - - /** - * Generates the symmetric difference matrix - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the result of the operation - * - * @see #flip(int, int) - */ - public BinaryMatrix symmetricDifference(BinaryMatrix other) - { - BinaryMatrix res = empty(); - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null) { - if (s2 == null) { - res.rows.add(null); - } else { - res.rows.add(s2.clone()); - } - } else { - if (s2 == null) { - res.rows.add(s1.clone()); - } else { - res.rows.add(s1.symmetricDifference(s2)); - } - } - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - for (; i < rows.size(); i++) { - IntSet s = rows.get(i); - res.rows.add(s == null ? null : s.clone()); - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - for (; i < other.rows.size(); i++) { - IntSet s = other.rows.get(i); - res.rows.add(s == null ? null : s.clone()); - assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); - } - res.fixRows(); - return res; - } - - /** - * Generates the complement matrix, namely flipping all the cells. - * - * @return the complement matrix - * - * @see BinaryMatrix#complement() - */ - public BinaryMatrix complemented() - { - BinaryMatrix res = empty(); - - final int maxCol = maxCol(); - - for (int i = 0; i < rows.size(); i++) { - IntSet s = rows.get(i); - - if (s == null) { - s = template.empty(); - s.fill(0, maxCol); - } else { - s.add(maxCol + 1); - s.complemented(); - if (s.isEmpty()) { - s = null; - } - } - - res.rows.add(s); - } - - res.fixRows(); - return res; - } - - /** - * Complements the current matrix. - * - * @see BinaryMatrix#complemented() - */ - public void complement() - { - final int maxCol = maxCol(); - - for (int i = 0; i < rows.size(); i++) { - IntSet s = rows.get(i); - - if (s == null) { - s = template.empty(); - s.fill(0, maxCol - 1); - rows.set(i, s); - } else { - s.add(maxCol + 1); - s.complement(); - if (s.isEmpty()) { - rows.set(i, null); - } - } - } - - fixRows(); - } - - /** - * Returns true if the specified {@link BinaryMatrix} instance - * contains any cell that is also contained within this {@link BinaryMatrix} - * instance - * - * @param other {@link BinaryMatrix} to intersect with - * - * @return a boolean indicating whether this {@link BinaryMatrix} intersects - * the specified {@link BinaryMatrix}. - */ - public boolean containsAny(BinaryMatrix other) - { - final int rowCount = Math.min(rows.size(), other.rows.size()); - for (int i = 0; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 != null && s2 != null) { - if (s1.containsAny(s2)) { - return true; - } - } - } - return false; - } - - /** - * Returns true if the specified {@link BinaryMatrix} instance - * contains at least minElements cells that are also contained - * within this {@link BinaryMatrix} instance - * - * @param other {@link BinaryMatrix} instance to intersect with - * @param minCells minimum number of cells to be contained within this - * {@link BinaryMatrix} instance - * - * @return a boolean indicating whether this {@link BinaryMatrix} intersects - * the specified {@link BinaryMatrix}. - * - * @throws IllegalArgumentException if minElements < 1 - */ - public boolean containsAtLeast(BinaryMatrix other, int minCells) - { - // special cases - if (minCells < 1) { - throw new IllegalArgumentException(); - } - int size = size(); - if ((size < minCells) || other == null || other.isEmpty() || isEmpty()) { - return false; - } - if (this == other) { - return size >= minCells; - } - - // exact count before the last row - int res = 0; - final int last = Math.min(rows.size(), other.rows.size()) - 1; - for (int i = 0; i < last; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 != null && s2 != null) { - res += s1.intersectionSize(s2); - if (res >= minCells) { - return true; - } - } - } - - // last row more efficient! - IntSet l1 = rows.get(last); - IntSet l2 = other.rows.get(last); - if (l1 == null || l2 == null) { - return false; - } - return l1.containsAtLeast(l2, minCells - res); - } - - /** - * Computes the intersection matrix size. - *

- * This is faster than calling {@link #intersection(BinaryMatrix)} and then - * {@link #size()} - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the size - */ - public int intersectionSize(BinaryMatrix other) - { - int res = 0; - final int rowCount = Math.min(rows.size(), other.rows.size()); - for (int i = 0; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 != null && s2 != null) { - res += s1.intersectionSize(s2); - } - } - return res; - } - - /** - * Computes the union matrix size. - *

- * This is faster than calling {@link #union(BinaryMatrix)} and then - * {@link #size()} - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the size - */ - public int unionSize(BinaryMatrix other) - { - return other == null ? size() : size() + other.size() - intersectionSize(other); - } - - /** - * Computes the symmetric difference matrix size. - *

- * This is faster than calling {@link #symmetricDifference(BinaryMatrix)} - * and then {@link #size()} - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the size - */ - public int symmetricDifferenceSize(BinaryMatrix other) - { - return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); - } - - /** - * Computes the difference matrix size. - *

- * This is faster than calling {@link #difference(BinaryMatrix)} and then - * {@link #size()} - * - * @param other {@link BinaryMatrix} instance that represents the right - * operand - * - * @return the size - */ - public int differenceSize(BinaryMatrix other) - { - return other == null ? size() : size() - intersectionSize(other); - } - - /** - * Computes the complement set size. - *

- * This is faster than calling {@link #complemented()} and then - * {@link #size()} - * - * @return the size - */ - public int complementSize() - { - final int maxCol = maxCol(); - int res = 0; - for (int i = 0; i < rows.size(); i++) { - IntSet s = rows.get(i); - res += maxCol + 1; - if (s != null) { - res -= s.size(); - } - } - return res; - } - - /** - * Generates an empty matrix of the same dimension - * - * @return the empty matrix - */ - public BinaryMatrix empty() - { - return new BinaryMatrix(template); - } - - /** - * See the clone() of {@link Object} - * - * @return cloned object - */ - @Override - public BinaryMatrix clone() - { - BinaryMatrix res = empty(); - for (IntSet r : rows) { - res.rows.add(r == null ? null : r.clone()); - } - return res; - } - - /** - * Computes the compression factor of the equivalent bitmap representation - * (1 means not compressed, namely a memory footprint similar to - * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) - * - * @return the compression factor - */ - public double bitmapCompressionRatio() - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * Computes the compression factor of the equivalent integer collection (1 - * means not compressed, namely a memory footprint similar to - * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) - * - * @return the compression factor - */ - public double collectionCompressionRatio() - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * @return a {@link CellIterator} instance to iterate over the matrix - */ - public CellIterator iterator() - { - if (isEmpty()) { - return new CellIterator() - { - @Override - public boolean hasNext() {return false;} - - @Override - public int[] next() {throw new NoSuchElementException();} - - @Override - public void remove() {throw new IllegalStateException();} - - @Override - public void skipAllBefore(int row, int col) {return;} - }; - } - - return new CellIterator() - { - private final int[] itrResultCache = new int[2]; - int curRow = 0; - IntSet.IntIterator curRowItr; - - { - while (rows.get(curRow) == null) { - curRow++; - } - curRowItr = rows.get(curRow).iterator(); - itrResultCache[0] = curRow; - } - - @Override - public int[] next() - { - if (!curRowItr.hasNext()) { - IntSet s; - while ((s = rows.get(++curRow)) == null) {/**/} - curRowItr = s.iterator(); - itrResultCache[0] = curRow; - } - itrResultCache[1] = curRowItr.next(); - return itrResultCache; - } - - @Override - public boolean hasNext() - { - return curRow < rows.size() - 1 || curRowItr.hasNext(); - } - - @Override - public void skipAllBefore(int row, int col) - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - @Override - public void remove() - { - throw new UnsupportedOperationException("TODO"); //TODO - } - }; - } - - /** - * @return a {@link CellIterator} instance to iterate over the matrix in - * descending order - */ - public CellIterator descendingIterator() - { - if (isEmpty()) { - return new CellIterator() - { - @Override - public boolean hasNext() {return false;} - - @Override - public int[] next() {throw new NoSuchElementException();} - - @Override - public void remove() {throw new IllegalStateException();} - - @Override - public void skipAllBefore(int row, int col) {return;} - }; - } - - return new CellIterator() - { - final int minRow; - private final int[] itrResultCache = new int[2]; - int curRow = rows.size() - 1; - IntSet.IntIterator curRowItr; - - { - int m = 0; - while (rows.get(m) == null) { - m++; - } - minRow = m; - curRowItr = rows.get(curRow).descendingIterator(); - itrResultCache[0] = curRow; - } - - @Override - public int[] next() - { - if (!curRowItr.hasNext()) { - IntSet s; - while ((s = rows.get(--curRow)) == null) {/**/} - curRowItr = s.descendingIterator(); - itrResultCache[0] = curRow; - } - itrResultCache[1] = curRowItr.next(); - return itrResultCache; - } - - @Override - public boolean hasNext() - { - return curRow > minRow || curRowItr.hasNext(); - } - - @Override - public void skipAllBefore(int row, int col) - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - @Override - public void remove() - { - throw new UnsupportedOperationException("TODO"); //TODO - } - }; - } - - /** - * Prints debug info about the given {@link BinaryMatrix} implementation - * - * @return a string that describes the internal representation of the - * instance - */ - public String debugInfo() - { - if (isEmpty()) { - return "empty"; - } - - StringBuilder s = new StringBuilder(); - Formatter f = new Formatter(s); - - String format = String.format("%%%dd) ", (int) Math.log10(rows.size()) + 1); - for (int i = 0; i < rows.size(); i++) { - f.format(format, i); - s.append(rows.get(i) == null ? "-" : rows.get(i).toString()); - s.append('\n'); - } - - return s.toString(); - } - - /** - * Adds to the matrix all the cells of the specified sub-matrix, both - * corners included. - * - * @param fromRow first row of the sub-matrix - * @param fromCol first column of the sub-matrix - * @param toRow last row of the sub-matrix - * @param toCol last column of the sub-matrix - */ - public void fill(int fromRow, int fromCol, int toRow, int toCol) - { - if (fromRow > toRow) { - throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); - } - if (fromCol > toCol) { - throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); - } - - for (int r = rows.size(); r <= toRow; r++) { - rows.add(null); - } - - for (int r = fromRow; r <= toRow; r++) { - IntSet s = rows.get(r); - if (s == null) { - rows.set(r, s = template.empty()); - } - s.fill(fromCol, toCol); - } - } - - /** - * Removes from the set all the cells of the specified sub-matrix, both - * corners included. - * - * @param fromRow first row of the sub-matrix - * @param fromCol first column of the sub-matrix - * @param toRow last row of the sub-matrix - * @param toCol last column of the sub-matrix - */ - public void clear(int fromRow, int fromCol, int toRow, int toCol) - { - if (fromRow > toRow) { - throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); - } - if (fromCol > toCol) { - throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); - } - - for (int r = Math.min(toRow, rows.size() - 1); r >= fromRow; r--) { - IntSet s = rows.get(r); - if (s == null) { - continue; - } - s.clear(fromCol, toCol); - if (s.isEmpty()) { - rows.set(r, null); - } - } - fixRows(); - } - - /** - * Adds the cell if it not existing, or removes it if existing - * - * @param row row of the cell to flip - * @param col column of the cell to flip - * - * @see #symmetricDifference(BinaryMatrix) - */ - public void flip(int row, int col) - { - while (row >= rows.size()) { - rows.add(null); - } - IntSet r = rows.get(row); - if (r == null) { - rows.set(row, r = template.empty()); - } - r.flip(col); - if (r.isEmpty()) { - rows.set(row, null); - fixRows(); - } - } - - /** - * Gets the ith cell of the matrix. - * IMPORTANT: each call returns an array of two elements, where the - * first element is the row, while the second element is the column of the - * current cell. In order to reduce the produced heap garbage, there is only - * one array instantiated for each {@link BinaryMatrix} instance, - * whose content is overridden at each method call. - * - * @param i position of the cell in the sorted matrix - * - * @return the ith cell of the matrix, as a pair - * <row,column> - * - * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to - * {@link #size()} - */ - public int[] get(int i) - { - for (int r = 0; r < rows.size(); r++) { - IntSet s = rows.get(r); - if (s == null) { - continue; - } - int ss = s.size(); - if (ss <= i) { - i -= ss; - } else { - resultCache[0] = r; - resultCache[1] = s.get(i); - return resultCache; - } - } - throw new NoSuchElementException(); - } - - /** - * Provides position of cell within the matrix. - *

- * It returns -1 if the cell does not exist within the set. - * - * @param row row of the cell - * @param col column of the cell - * - * @return the cell position - */ - public int indexOf(int row, int col) - { - if (row >= rows.size() || rows.get(row) == null) { - return -1; - } - int res = rows.get(row).indexOf(col); - if (res == -1) { - return -1; - } - for (int r = 0; r < row; r++) { - IntSet s = rows.get(r); - if (s == null) { - continue; - } - res += s.size(); - } - return res; - } - - /** - * Converts a given matrix of boolean n x m into an instance - * of the current class. - * - * @param a array to use to generate the new instance - * - * @return the converted collection - */ - public BinaryMatrix convert(boolean[][] a) - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * Returns the first (lowest) cell currently in this set. IMPORTANT: - * each call returns an array of two elements, where the first element is - * the row, while the second element is the column of the current cell. In - * order to reduce the produced heap garbage, there is only one array - * instantiated for each {@link BinaryMatrix} instance, whose content is - * overridden at each method call. - * - * @return the first (lowest) cell currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public int[] first() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - - // find the first non-empty row - int i = 0; - IntSet s; - while ((s = rows.get(i)) == null) { - i++; - } - - // prepare the result - resultCache[0] = i; - resultCache[1] = s.first(); - return resultCache; - } - - /** - * Returns the last (highest) cell currently in this set. IMPORTANT: - * each call returns an array of two elements, where the first element is - * the row, while the second element is the column of the current cell. In - * order to reduce the produced heap garbage, there is only one array - * instantiated for each {@link BinaryMatrix} instance, whose content is - * overridden at each method call. - * - * @return the last (highest) cell currently in this set - * - * @throws NoSuchElementException if this set is empty - */ - public int[] last() - { - if (isEmpty()) { - throw new NoSuchElementException(); - } - resultCache[0] = rows.size() - 1; - resultCache[1] = rows.get(rows.size() - 1).last(); - return resultCache; - } - - /** - * @return the number of cells in this matrix (its cardinality) - */ - public int size() - { - int res = 0; - for (IntSet s : rows) { - if (s != null) { - res += s.size(); - } - } - return res; - } - - /** - * @return true if this matrix contains no cells - */ - public boolean isEmpty() - { - return rows.isEmpty(); - } - - /** - * Returns true if this set contains the specified cell. - * - * @param row row of the cell - * @param col column of the cell - * - * @return true if this matrix contains the specified cell - */ - public boolean contains(int row, int col) - { - return row >= 0 && col >= 0 && row < rows.size() - && rows.get(row) != null && rows.get(row).contains(col); - } - - /** - * Adds the specified cell to this matrix if it is not already present. It - * ensures that matrices never contain duplicate cells. - * - * @param row row of the cell - * @param col column of the cell - * - * @return true if this matrix did not already contain the - * specified cell - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean add(int row, int col) - { - while (row >= rows.size()) { - rows.add(null); - } - IntSet r = rows.get(row); - if (r == null) { - rows.set(row, r = template.empty()); - } - return r.add(col); - } - - /** - * Adds the specified cells to this matrix, if not already present. The - * cells are represented by a given row and a set of columns. - * - * @param row index of the row - * @param cols indices of the columns - * - * @return true if this matrix did not already contain the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean addAll(int row, IntSet cols) - { - while (row >= rows.size()) { - rows.add(null); - } - IntSet r = rows.get(row); - if (r == null) { - rows.set(row, r = template.empty()); - } - return r.addAll(cols); - } - - /** - * Adds the specified cells to this matrix, if not already present. The - * cells are represented by a given set of rows and a given column - * - * @param rowSet indices of the rows - * @param col index of the column - * - * @return true if this matrix did not already contain the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean addAll(IntSet rowSet, int col) - { - if (rowSet == null || rowSet.isEmpty()) { - return false; - } - - // prepare the space - final int l = rowSet.last(); - while (l >= rows.size()) { - rows.add(null); - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int r = itr.next(); - IntSet s = rows.get(r); - if (s == null) { - rows.set(r, template.convert(col)); - res = true; - } else { - res |= s.add(col); - } - } - return res; - } - - /** - * Adds the specified cells to this matrix, if not already present. The - * cells are represented by the Cartesian product of a given set of rows and - * columns - * - * @param rowSet indices of the rows - * @param colSet indices of the columns - * - * @return true if this matrix did not already contain the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean addAll(IntSet rowSet, IntSet colSet) - { - if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { - return false; - } - - // prepare the space - final int l = rowSet.last(); - while (l >= rows.size()) { - rows.add(null); - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int row = itr.next(); - IntSet cols = rows.get(row); - if (cols == null) { - IntSet newCols = template.empty(); - newCols.addAll(colSet); - rows.set(row, newCols); - res = true; - } else { - res |= cols.addAll(colSet); - } - } - return res; - } - - /** - * Removes the specified cell from this matrix if it is present. - * - * @param row row of the cell - * @param col column of the cell - * - * @return true if this matrix contained the specified cell - * - * @throws UnsupportedOperationException if the remove operation is not supported by this - * matrix - */ - public boolean remove(int row, int col) - { - if (row < 0 || col < 0 || row >= rows.size()) { - return false; - } - IntSet r = rows.get(row); - if (r == null) { - return false; - } - if (r.remove(col)) { - if (r.isEmpty()) { - rows.set(row, null); - fixRows(); - } - return true; - } - return false; - } - - /** - * Removes the specified cells from this matrix. The cells are represented by - * a given row and a set of columns. - * - * @param row index of the row - * @param cols indices of the columns - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * removed from this matrix - */ - public boolean removeAll(int row, IntSet cols) - { - if (row < 0 || row >= rows.size()) { - return false; - } - IntSet r = rows.get(row); - if (r == null) { - return false; - } - if (r.removeAll(cols)) { - if (r.isEmpty()) { - rows.set(row, null); - fixRows(); - } - return true; - } - return false; - } - - /** - * Removes the specified cells from this matrix. The cells are represented - * by a given set of rows and a given column - * - * @param rowSet indices of the rows - * @param col index of the column - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean removeAll(IntSet rowSet, int col) - { - if (rowSet == null || rowSet.isEmpty()) { - return false; - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int r = itr.next(); - IntSet s = rows.get(r); - if (s == null) { - continue; - } - res |= s.remove(col); - if (s.isEmpty()) { - rows.set(r, null); - } - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Removes the specified cells from this matrix. The cells are represented - * by the Cartesian product of a given set of rows and columns - * - * @param rowSet indices of the rows - * @param colSet indices of the columns - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean removeAll(IntSet rowSet, IntSet colSet) - { - if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { - return false; - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int r = itr.next(); - IntSet s = rows.get(r); - if (s == null) { - continue; - } - res |= s.removeAll(colSet); - if (s.isEmpty()) { - rows.set(r, null); - } - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Retains the specified cells from this matrix. The cells are represented by - * a given row and a set of columns. - * - * @param row index of the row - * @param cols indices of the columns - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * removed from this matrix - */ - public boolean retainAll(int row, IntSet cols) - { - if (isEmpty()) { - return false; - } - if (row < 0 || row >= rows.size()) { - clear(); - return true; - } - - IntSet r = rows.get(row); - if (r == null) { - clear(); - return true; - } - boolean res = false; - for (int i = 0; i < rows.size(); i++) { - if (i == row) { - continue; - } - final IntSet r1 = rows.get(i); - if (r1 != null) { - res = true; - rows.set(i, null); - } - } - res |= r.retainAll(cols); - fixRows(); - return res; - } - - /** - * Removes the specified cells from this matrix. The cells are represented - * by a given set of rows and a given column - * - * @param rowSet indices of the rows - * @param col index of the column - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean retainAll(IntSet rowSet, int col) - { - if (isEmpty()) { - return false; - } - if (rowSet == null || rowSet.isEmpty()) { - clear(); - return false; - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - int i = 0; - int r = itr.next(); - do { - IntSet rr = rows.get(i); - if (rr == null) { - i++; - } else if (i < r) { - rows.set(i, null); - res = true; - i++; - } else if (i > r) { - r = itr.next(); - } else { - if (!rr.contains(col)) { - rows.set(i, null); - res = true; - } else if (rr.size() > 1) { - rr.clear(); - rr.add(col); - res = true; - } - i++; - r = itr.next(); - } - } while (i < rows.size() && itr.hasNext()); - res |= i < rows.size(); - for (; i < rows.size(); i++) { - rows.set(i, null); - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Removes the specified cells from this matrix. The cells are represented - * by the Cartesian product of a given set of rows and columns - * - * @param rowSet indices of the rows - * @param colSet indices of the columns - * - * @return true if this matrix contains at least one of the - * specified cells - * - * @throws IllegalArgumentException if some property of the specified cell prevents it from being - * added to this matrix - */ - public boolean retainAll(IntSet rowSet, IntSet colSet) - { - if (isEmpty()) { - return false; - } - if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { - clear(); - return false; - } - - boolean res = false; - IntSet.IntIterator itr = rowSet.iterator(); - int i = 0; - int r = itr.next(); - do { - IntSet rr = rows.get(i); - if (rr == null) { - i++; - } else if (i < r) { - rows.set(i, null); - res = true; - i++; - } else if (i > r) { - r = itr.next(); - } else { - res |= rr.retainAll(colSet); - if (rr.isEmpty()) { - rows.set(i, null); - } - i++; - r = itr.next(); - } - } while (i < rows.size() && itr.hasNext()); - res |= i < rows.size(); - for (; i < rows.size(); i++) { - rows.set(i, null); - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Returns true if this matrix contains all of the cells of the - * specified collection. - * - * @param other matrix to be checked for containment in this matrix - * - * @return true if this matrix contains all of the cells of the - * specified collection - * - * @throws NullPointerException if the specified collection contains one or more null cells - * and this matrix does not permit null cells (optional), or if - * the specified collection is null - * @see #contains(int, int) - */ - public boolean containsAll(BinaryMatrix other) - { - if (other == null || other.isEmpty() || other == this) { - return true; - } - if (isEmpty() || rows.size() < other.rows.size()) { - return false; - } - - for (int i = 0; i < other.rows.size(); i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s2 == null) { - continue; - } - if (s1 == null || !s1.containsAll(s2)) { - return false; - } - } - return true; - } - - /** - * Returns true if this matrix contains all of the cells of the - * specified collection. - * - * @param rowSet indices of the rows - * @param colSet indices of the columns - * - * @return true if this matrix contains all of the cells of the - * specified collection - */ - public boolean containsAll(IntSet rowSet, IntSet colSet) - { - if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { - return true; - } - if (isEmpty()) { - return false; - } - - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int i = itr.next(); - IntSet cols = rows.get(i); - if (cols == null || !cols.containsAll(colSet)) { - return false; - } - } - return true; - } - - /** - * Returns true if this matrix contains all of the cells of the - * specified collection. - * - * @param row index of the row - * @param colSet indices of the columns - * - * @return true if this matrix contains all of the cells of the - * specified collection - */ - public boolean containsAll(int row, IntSet colSet) - { - if (colSet == null || colSet.isEmpty()) { - return true; - } - if (isEmpty() || row < 0 || row >= rows.size()) { - return false; - } - IntSet cols = rows.get(row); - return cols != null && cols.containsAll(colSet); - } - - /** - * Returns true if this matrix contains all of the cells of the - * specified collection. - * - * @param rowSet indices of the rows - * @param col index of the column - * - * @return true if this matrix contains all of the cells of the - * specified collection - */ - public boolean containsAll(IntSet rowSet, int col) - { - if (rowSet == null || rowSet.isEmpty()) { - return true; - } - if (isEmpty() || col < 0) { - return false; - } - - IntSet.IntIterator itr = rowSet.iterator(); - while (itr.hasNext()) { - int i = itr.next(); - IntSet cols = rows.get(i); - if (cols == null || !cols.contains(col)) { - return false; - } - } - return true; - } - - /** - * Adds all of the cells in the specified collection to this matrix if - * they're not already present. - * - * @param other matrix containing cells to be added to this matrix - * - * @return true if this matrix changed as a result of the call - * - * @throws NullPointerException if the specified collection contains one or more null cells - * and this matrix does not permit null cells, or if the - * specified collection is null - * @throws IllegalArgumentException if some property of an cell of the specified collection - * prevents it from being added to this matrix - * @see #add(int, int) - */ - public boolean addAll(BinaryMatrix other) - { - boolean res = false; - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s2 == null) { - continue; - } - if (s1 == null) { - rows.set(i, s2.clone()); - res = true; - } else { - res |= s1.addAll(s2); - } - assert rows.get(i) == null || !rows.get(i).isEmpty(); - } - res |= i < other.rows.size(); - for (; i < other.rows.size(); i++) { - IntSet s = other.rows.get(i); - rows.add(s == null ? null : s.clone()); - assert rows.get(i) == null || !rows.get(i).isEmpty(); - } - return res; - } - - /** - * Retains only the cells in this matrix that are contained in the specified - * collection. In other words, removes from this matrix all of its cells - * that are not contained in the specified collection. - * - * @param other matrix containing cells to be retained in this matrix - * - * @return true if this matrix changed as a result of the call - * - * @throws NullPointerException if this matrix contains a null cell and the specified - * collection does not permit null cells (optional), or if the - * specified collection is null - * @see #remove(int, int) - */ - public boolean retainAll(BinaryMatrix other) - { - boolean res = false; - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null) { - continue; - } - if (s2 == null) { - rows.set(i, null); - res = true; - } else { - res |= s1.retainAll(s2); - if (s1.isEmpty()) { - rows.set(i, null); - } - } - assert rows.get(i) == null || !rows.get(i).isEmpty(); - } - res |= i < rows.size(); - for (; i < rows.size(); i++) { - rows.set(i, null); - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Removes from this matrix all of its cells that are contained in the - * specified collection. - * - * @param other matrix containing cells to be removed from this matrix - * - * @return true if this matrix changed as a result of the call - * - * @throws NullPointerException if this matrix contains a null cell and the specified - * collection does not permit null cells (optional), or if the - * specified collection is null - * @see #remove(int, int) - * @see #contains(int, int) - */ - public boolean removeAll(BinaryMatrix other) - { - boolean res = false; - final int rowCount = Math.min(rows.size(), other.rows.size()); - int i = 0; - for (; i < rowCount; i++) { - IntSet s1 = rows.get(i); - IntSet s2 = other.rows.get(i); - if (s1 == null || s2 == null) { - continue; - } - res |= s1.removeAll(s2); - if (s1.isEmpty()) { - rows.set(i, null); - } - assert rows.get(i) == null || !rows.get(i).isEmpty(); - } - if (i < rows.size()) { - return res; - } - if (res) { - fixRows(); - } - return res; - } - - /** - * Removes all of the cells from this matrix. The matrix will be empty after - * this call returns. - * - * @throws UnsupportedOperationException if the clear method is not supported by this matrix - */ - public void clear() - { - rows.clear(); - } - - /** - * @return an array containing all the cells in this matrix - */ - public boolean[][] toArray() - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * Returns an array containing all of the cells in this matrix. - *

- * If this matrix fits in the specified array with room to spare (i.e., the - * array has more cells than this matrix), the cell in the array immediately - * following the end of the matrix are left unchanged. - * - * @param a the array into which the cells of this matrix are to be - * stored. - * - * @return the array containing all the cells in this matrix - * - * @throws NullPointerException if the specified array is null - * @throws IllegalArgumentException if this matrix does not fit in the specified array - */ - public boolean[][] toArray(boolean[][] a) - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(BinaryMatrix o) - { - throw new UnsupportedOperationException("TODO"); //TODO - } - - /** - * Gets a copy of the row with the given index - * - * @param row the row index - * - * @return the content of the row - */ - public IntSet getRow(int row) - { - if (row < 0) { - throw new IllegalArgumentException("negative row index: " + row); - } - if (row >= rows.size()) { - return template.empty(); - } - IntSet res = rows.get(row); - if (res == null) { - return template.empty(); - } - return res.clone(); - } - - // /** - // * Computes the power-set of the current matrix. - // *

- // * It is a particular implementation of the algorithm Apriori (see: - // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - // * Association Rules in Large Databases, in Proceedings of the - // * 20th International Conference on Very Large Data Bases, - // * p.487-499, 1994). The returned power-set does not contain the - // * empty matrix. - // *

- // * The sub-matrices composing the power-set are returned in a list that is - // * sorted according to the lexicographical order provided by the integer - // * matrix. - // * - // * @return the power-set - // * @see #powerSet(int, int) - // * @see #powerSetSize() - // */ - // public List powerSet(); - // - // /** - // * Computes a subset of the power-set of the current matrix, composed by - // * those sub-matrices that have cardinality between min and - // * max. - // *

- // * It is a particular implementation of the algorithm Apriori (see: - // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining - // * Association Rules in Large Databases, in Proceedings of the - // * 20th International Conference on Very Large Data Bases, - // * p.487-499, 1994). The power-set does not contains the empty - // * matrix. - // *

- // * The sub-matrices composing the power-set are returned in a list that is - // * sorted according to the lexicographical order provided by the integer - // * matrix. - // * - // * @param min - // * minimum sub-matrix size (greater than zero) - // * @param max - // * maximum sub-matrix size - // * @return the power-set - // * @see #powerSet() - // * @see #powerSetSize(int, int) - // */ - // public List powerSet(int min, int max); - // - // /** - // * Computes the power-set size of the current matrix. - // *

- // * The power-set does not contains the empty matrix. - // * - // * @return the power-set size - // * @see #powerSet() - // */ - // public int powerSetSize(); - // - // /** - // * Computes the power-set size of the current matrix, composed by those - // * sub-matrices that have cardinality between min and - // * max. - // *

- // * The returned power-set does not contain the empty matrix. - // * - // * @param min - // * minimum sub-matrix size (greater than zero) - // * @param max - // * maximum sub-matrix size - // * @return the power-set size - // * @see #powerSet(int, int) - // */ - // public int powerSetSize(int min, int max); - // - // /** - // * Computes the Jaccard similarity coefficient between this matrix and the - // * given matrix. - // *

- // * The coefficient is defined as - // * |A intersection B| / |A union B|. - // * - // * @param other - // * the other matrix - // * @return the Jaccard similarity coefficient - // * @see #jaccardDistance(BinaryMatrix) - // */ - // public double jaccardSimilarity(BinaryMatrix other); - // - // /** - // * Computes the Jaccard distance between this matrix and the given matrix. - // *

- // * The coefficient is defined as 1 - - // * {@link #jaccardSimilarity(BinaryMatrix)}. - // * - // * @param other - // * the other matrix - // * @return the Jaccard distance - // * @see #jaccardSimilarity(BinaryMatrix) - // */ - // public double jaccardDistance(BinaryMatrix other); - // - // /** - // * Computes the weighted version of the Jaccard similarity coefficient - // * between this matrix and the given matrix. - // *

- // * The coefficient is defined as - // * sum of min(A_i, B_i) / sum of max(A_i, B_i). - // * - // * @param other - // * the other matrix - // * @return the weighted Jaccard similarity coefficient - // * @see #weightedJaccardDistance(BinaryMatrix) - // */ - // public double weightedJaccardSimilarity(BinaryMatrix other); - // - // /** - // * Computes the weighted version of the Jaccard distance between this - // matrix - // * and the given matrix. - // *

- // * The coefficient is defined as 1 - - // * {@link #weightedJaccardSimilarity(BinaryMatrix)}. - // * - // * @param other - // * the other matrix - // * @return the weighted Jaccard distance - // * @see #weightedJaccardSimilarity(BinaryMatrix) - // */ - // public double weightedJaccardDistance(BinaryMatrix other); - - /** - * Gets a copy of the column with the given index - * - * @param col the column index - * - * @return the content of the column - */ - public IntSet getCol(int col) - { - if (col < 0) { - throw new IllegalArgumentException("negative column index: " + col); - } - IntSet res = template.empty(); - for (int row = 0; row < rows.size(); row++) { - final IntSet r = rows.get(row); - if (r != null && r.contains(col)) { - res.add(row); - } - } - return res; - } - - /** - * Generated a transposed matrix - * - * @return the transposed matrix - */ - public BinaryMatrix transposed() - { - BinaryMatrix res = empty(); - for (int row = 0; row < rows.size(); row++) { - IntSet r = rows.get(row); - if (r == null) { - continue; - } - IntSet.IntIterator itr = r.iterator(); - while (itr.hasNext()) { - res.add(itr.next(), row); - } - } - return res; - } - - /** - * Generates an ASCII-art matrix representation - */ - @Override - public String toString() - { - StringBuilder s = new StringBuilder(); - - final int maxCol = maxCol(); - - // initial line - s.append('+'); - for (int i = 0; i <= maxCol; i++) { - s.append('-'); - } - s.append("+\n"); - - // cells - for (IntSet row : rows) { - s.append('|'); - int col = 0; - if (row != null) { - IntSet.IntIterator itr = row.iterator(); - while (itr.hasNext()) { - int c = itr.next(); - while (col++ < c) { - s.append(' '); - } - s.append('*'); - } - } - while (col++ <= maxCol) { - s.append(' '); - } - s.append("|\n"); - } - - // final line - s.append('+'); - for (int i = 0; i <= maxCol; i++) { - s.append('-'); - } - s.append("+\n"); - - return s.toString(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof BinaryMatrix)) { - return false; - } - return rows.equals(((BinaryMatrix) obj).rows); - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - int h = 1; - for (IntSet s : rows) { - h = (h << 5) - h; - if (s != null) { - h += s.hashCode(); - } - } - return h; - } - - /** - * @return the greatest non-empty row - */ - public int maxRow() - { - return rows.size() - 1; - } - - /** - * @return the greatest non-empty column - */ - public int maxCol() - { - int res = 0; - for (IntSet row : rows) { - if (row != null) { - assert !row.isEmpty(); - res = Math.max(res, row.last()); - } - } - return res; - } - - /** - * @return the index set of non-empty rows - */ - public IntSet involvedRows() - { - IntSet res = template.empty(); - for (int i = 0; i < rows.size(); i++) { - if (rows.get(i) != null) { - res.add(i); - } - } - return res; - } - - /** - * @return the index set of non-empty columns - */ - public IntSet involvedCols() - { - IntSet res = template.empty(); - for (int i = 0; i < rows.size(); i++) { - res.addAll(rows.get(i)); - } - return res; - } - - /** - * An {@link Iterator}-like interface - */ - public interface CellIterator - { - /** - * @return true if the iterator has more cells. - */ - boolean hasNext(); - - /** - * Returns the next cell in the iteration. IMPORTANT: each - * iteration returns an array of two elements, where the first element - * is the row, while the second element is the column of the current - * cell. In order to reduce the produced heap garbage, there is only - * one array instantiated for each iterator, whose content is - * overridden at each iteration. - * - * @return the next cell in the iteration. - * - * @throws NoSuchElementException iteration has no more cells. - */ - int[] next(); - - /** - * Removes from the underlying matrix the last cell returned by the - * iterator (optional operation). This method can be called only once - * per call to next. The behavior of an iterator is unspecified - * if the underlying collection is modified while the iteration is in - * progress in any way other than by calling this method. - * - * @throws UnsupportedOperationException if the remove operation is not supported by - * this Iterator. - * @throws IllegalStateException if the next method has not yet been called, - * or the remove method has already been called - * after the last call to the next method. - */ - void remove(); - - /** - * Skips all the cells before the the specified cell, so that - * {@link #next()} gives the given cell or, if it does not exist, the - * cell immediately after according to the sorting provided by this set. - *

- * If cell is less than the next cell, it does nothing - * - * @param row row of the cell - * @param col column of the cell - */ - public void skipAllBefore(int row, int col); - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java deleted file mode 100755 index dcdb34205a11..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.wrappers.matrix; - -/** - * A class for representing a single transaction-item relationship. This class - * is mainly used in {@link PairSet} to iterate over the cells of a - * binary matrix. - * - * @param transaction type - * @param item type - * - * @author Alessandro Colantonio - * @version $Id: Pair.java 140 2011-02-07 21:30:29Z cocciasik $ - * @see PairSet - */ -public class Pair implements java.io.Serializable -{ - /** - * generated ID - */ - private static final long serialVersionUID = 328985131584539749L; - - /** - * the transaction - */ - public final T transaction; - - /** - * the item - */ - public final I item; - - /** - * Creates a new transaction-item pair - * - * @param transaction - * @param item - */ - public Pair(T transaction, I item) - { - this.transaction = transaction; - this.item = item; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - // 524287 * i = (i << 19) - i, where 524287 is prime. - // This hash function avoids transactions and items to overlap, - // since "item" can often stay in 32 - 19 = 13 bits. Therefore, it is - // better than multiplying by 31. - final int hi = item.hashCode(); - final int ht = transaction.hashCode(); - return (hi << 19) - hi + ht; - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (obj == null) { - return false; - } - if (this == obj) { - return true; - } - if (!(obj instanceof Pair)) { - return false; - } - @SuppressWarnings("unchecked") - Pair other = (Pair) obj; - return transaction.equals(other.transaction) && item.equals(other.item); - } - - /** - * {@inheritDoc} - */ - @Override - public String toString() - { - return "(" + transaction + ", " + item + ")"; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java deleted file mode 100755 index c68ffd1607c6..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java +++ /dev/null @@ -1,448 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset.wrappers.matrix; - -import java.io.Serializable; -import java.util.AbstractCollection; -import java.util.AbstractMap; -import java.util.AbstractSet; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Set; - -/** - * An class that associates a value to each pair within a {@link PairSet} instance. It is not as fast as {@link HashMap} , but requires much less memory. - * - * @param < T > transaction type - * @param < I > item type - * @param < V > type of the value to associate - * - * @author Alessandro Colantonio - * @version $Id: PairMap.java 153 2011-05-30 16:39:57Z cocciasik $ - * @see PairSet - */ -public class PairMap extends AbstractMap, V> implements Serializable, Cloneable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = 4699094886888004702L; - - /** - * all existing keys - * - * @uml.property name="keys" - * @uml.associationEnd - */ - private final PairSet keys; - - /** - * values related to existing keys, according to the ordering provided by {@link #keys} - */ - private final ArrayList values; - - /** - * Creates an empty map - * - * @param keys {@link PairSet} instance internally used to store indices. If - * not empty, {@link #get(Object)} will return null - * for each existing pair if we do not also put a value. - */ - public PairMap(PairSet keys) - { - this.keys = keys; - values = new ArrayList(keys.size()); - for (int i = 0; i < keys.size(); i++) { - values.add(null); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - keys.clear(); - values.clear(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsKey(Object key) - { - return keys.contains(key); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsValue(Object value) - { - return values.contains(value); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public V get(Object key) - { - if (key == null || !(key instanceof Pair)) { - return null; - } - int index = keys.indexOf((Pair) key); - if (index < 0) { - return null; - } - return values.get(index); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return keys.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public V put(Pair key, V value) - { - boolean isNew = keys.add(key); - int index = keys.indexOf(key); - Object old = null; - if (isNew) { - values.add(index, value); - } else { - old = values.set(index, value); - } - return (V) old; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public V remove(Object key) - { - if (key == null || !(key instanceof Pair)) { - return null; - } - int index = keys.indexOf((Pair) key); - if (index < 0) { - return null; - } - keys.remove(key); - return values.remove(index); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return keys.size(); - } - - /** - * {@inheritDoc} - */ - @Override - public PairMap clone() - { - // NOTE: do not use super.clone() since it is 10 times slower! - PairMap cloned = new PairMap(keys.clone()); - cloned.values.clear(); - cloned.values.addAll(values); - return cloned; - } - - /** - * {@inheritDoc} - */ - @Override - public Set> keySet() - { - return new AbstractSet>() - { - @Override - public boolean add(Pair e) - { - throw new UnsupportedOperationException(); - } - - @Override - public void clear() - { - PairMap.this.clear(); - } - - @Override - public boolean contains(Object o) - { - return keys.contains(o); - } - - @Override - public boolean containsAll(Collection c) - { - return keys.containsAll(c); - } - - @Override - public boolean isEmpty() - { - return keys.isEmpty(); - } - - @Override - public Iterator> iterator() - { - return new Iterator>() - { - Iterator> itr = keys.iterator(); - - @Override - public boolean hasNext() - { - return itr.hasNext(); - } - - @Override - public Pair next() - { - return itr.next(); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - @Override - public boolean remove(Object o) - { - throw new UnsupportedOperationException(); - } - - @Override - public int size() - { - return keys.size(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public Collection values() - { - return new AbstractCollection() - { - - @Override - public boolean add(V e) - { - throw new UnsupportedOperationException(); - } - - @Override - public void clear() - { - PairMap.this.clear(); - } - - @Override - public boolean contains(Object o) - { - return values.contains(o); - } - - @Override - public boolean isEmpty() - { - return keys.isEmpty(); - } - - @Override - public Iterator iterator() - { - return new Iterator() - { - Iterator itr = values.iterator(); - - @Override - public boolean hasNext() - { - return itr.hasNext(); - } - - @Override - public V next() - { - return itr.next(); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - @Override - public boolean remove(Object o) - { - throw new UnsupportedOperationException(); - } - - @Override - public int size() - { - return values.size(); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public Set, V>> entrySet() - { - return new AbstractSet, V>>() - { - @Override - public boolean add(Entry, V> e) - { - V res = PairMap.this.put(e.getKey(), e.getValue()); - return res != e.getValue(); - } - - @Override - public void clear() - { - PairMap.this.clear(); - } - - @Override - public boolean contains(Object o) - { - return o != null - && o instanceof Entry - && PairMap.this.containsKey(((Entry) o).getKey()) - && PairMap.this.containsValue(((Entry) o).getValue()); - } - - @Override - public boolean isEmpty() - { - return keys.isEmpty(); - } - - @Override - public Iterator, V>> iterator() - { - return new Iterator, V>>() - { - final Iterator> keyItr = keys.iterator(); - int valueIndex = -1; - - @Override - public boolean hasNext() - { - return keyItr.hasNext(); - } - - @Override - public Entry, V> next() - { - final Pair key = keyItr.next(); - valueIndex++; - - return new Entry, V>() - { - @Override - public Pair getKey() - { - return key; - } - - @Override - public V getValue() - { - return values.get(valueIndex); - } - - @Override - public V setValue(V value) - { - return values.set(valueIndex, value); - } - - @Override - public String toString() - { - return "{" + getKey() + "=" + getValue() + "}"; - } - }; - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - - @Override - public boolean remove(Object o) - { - throw new UnsupportedOperationException(); - } - - @Override - public int size() - { - return keys.size(); - } - }; - } -} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java deleted file mode 100755 index 41cf34b507e6..000000000000 --- a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java +++ /dev/null @@ -1,1403 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset.wrappers.matrix; - -import io.druid.extendedset.AbstractExtendedSet; -import io.druid.extendedset.ExtendedSet; -import io.druid.extendedset.intset.IntSet; -import io.druid.extendedset.wrappers.IndexedSet; -import io.druid.extendedset.wrappers.IntegerSet; -import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; - -import java.io.Serializable; -import java.util.AbstractCollection; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * A set of pairs internally represented by a binary matrix.

This class can be used to represent a set of transactions, where each transaction is a set of items. Rows are transactions, columns are the items involved with each transaction. - * - * @param < T > transaction type - * @param < I > item type - * - * @author Alessandro Colantonio - * @version $Id: PairSet.java 153 2011-05-30 16:39:57Z cocciasik $ - * @see Pair - * @see IntSet - */ -public class PairSet extends AbstractExtendedSet> implements Serializable -{ - /** - * generated serial ID - */ - private static final long serialVersionUID = 7902458899512666217L; - - /** - * binary matrix - * - * @uml.property name="matrix" - * @uml.associationEnd - */ - private final BinaryMatrix matrix; - - /** - * all possible transactions - * - * @uml.property name="allTransactions" - * @uml.associationEnd - */ - private final IndexedSet allTransactions; - - /** - * all possible items - * - * @uml.property name="allItems" - * @uml.associationEnd - */ - private final IndexedSet allItems; - - /** - * Initializes the set by specifying all possible transactions and items. - * - * @param matrix {@link BinaryMatrix} instance used to internally represent the matrix - * @param transactions collection of all possible transactions. The specified - * order will be preserved within when iterating over the - * {@link PairSet} instance. - * @param items collection of all possible items. The specified order - * will be preserved within each transaction {@link PairSet}. - */ - public PairSet(BinaryMatrix matrix, Collection transactions, Collection items) - { - if (transactions == null || items == null) { - throw new NullPointerException(); - } - this.matrix = matrix; - - IntSet tmp = matrix.emptyRow(); - if (transactions instanceof IndexedSet) { - allTransactions = (IndexedSet) transactions; - } else { - allTransactions = new IndexedSet(tmp.empty(), transactions).universe(); //.unmodifiable(); - } - if (items instanceof IndexedSet) { - allItems = (IndexedSet) items; - } else { - allItems = new IndexedSet(tmp.empty(), items).universe(); //.unmodifiable(); - } - } - - /** - * Initializes the set by specifying all possible transactions and items. - * - * @param matrix {@link BinaryMatrix} instance used to internally represent the - * matrix - * @param pairs arrays n x 2 of pairs of transactions (first) and items (second). - */ - public PairSet(BinaryMatrix matrix, final Object[][] pairs) - { - this(matrix, new AbstractCollection>() - { - @Override - public Iterator> iterator() - { - return new Iterator>() - { - int i = 0; - - @SuppressWarnings("unchecked") - @Override - public Pair next() {return new Pair(pairs[i][0], pairs[i++][1]);} - - @Override - public boolean hasNext() {return i < pairs.length;} - - @Override - public void remove() {throw new UnsupportedOperationException();} - }; - } - - @Override - public int size() {return pairs.length;} - }); - } - - /** - * Converts a generic collection of transaction-item pairs to a - * {@link PairSet} instance. - * - * @param matrix {@link IntSet} instance used to internally represent the set - * @param pairs collection of {@link Pair} instances - */ - public PairSet(BinaryMatrix matrix, Collection> pairs) - { - if (pairs == null) { - throw new RuntimeException("null pair set"); - } - if (pairs.isEmpty()) { - throw new RuntimeException("empty pair set"); - } - - // identify all possible transactions and items and their frequencies - final Map ts = new HashMap(); - final Map is = new HashMap(); - for (Pair p : pairs) { - Integer f; - - f = ts.get(p.transaction); - f = f == null ? 1 : f + 1; - ts.put(p.transaction, f); - - f = is.get(p.item); - f = f == null ? 1 : f + 1; - is.put(p.item, f); - } - - // sort transactions and items by descending frequencies - List> sortedPairs = new ArrayList>(pairs); - Collections.sort(sortedPairs, new Comparator>() - { - @Override - public int compare(Pair o1, Pair o2) - { - int r = ts.get(o2.transaction).compareTo(ts.get(o1.transaction)); - if (r == 0) { - r = is.get(o2.item).compareTo(is.get(o1.item)); - } - return r; - } - }); - List sortedTransactions = new ArrayList(ts.keySet()); - Collections.sort(sortedTransactions, new Comparator() - { - @Override - public int compare(T o1, T o2) - { - return ts.get(o2).compareTo(ts.get(o1)); - } - }); - List sortedItems = new ArrayList(is.keySet()); - Collections.sort(sortedItems, new Comparator() - { - @Override - public int compare(I o1, I o2) - { - return is.get(o2).compareTo(is.get(o1)); - } - }); - - // identify all transactions and items - this.matrix = matrix; - matrix.add(0, 0); - allTransactions = new IndexedSet(matrix.getRow(0), sortedTransactions).universe(); // .unmodifiable(); - allItems = new IndexedSet(matrix.getRow(0), sortedItems).universe(); // .unmodifiable(); - matrix.clear(); - - // create the matrix - for (Pair p : sortedPairs) { - add(p); - } - } - - /** - * Wraps a {@link BinaryMatrix} instance with a {@link PairSet} instance. - *

- * NOTE: the maximum item and transaction IDs are those existing in - * the binary matrix when the wrapping take place - * - * @param b a {@link BinaryMatrix} instance to wrap - * - * @return a new {@link PairSet} instance, indexed by the given matrix - */ - public static PairSet createFromBinaryMatrix(BinaryMatrix b) - { - // TODO this is a little bit costly since PairSet will allocate an array - // and a HashMap of Integers to map elements of BinaryMatrix... - // Think about a IntegerPairSet class or to an "fake" IntegerIndexedSet - // just for this purpose. - - IntegerSet t = new IntegerSet(b.emptyRow()); - t.intSet().add(b.maxRow() + 1); - t.intSet().complement(); - - IntegerSet i = new IntegerSet(b.emptyRow()); - i.intSet().add(b.maxCol() + 1); - i.intSet().complement(); - - return new PairSet(b, t, i); - } - - /** - * maps a transaction to its index and returns -1 if not found - */ - private int transactionToIndex(T t) - { - Integer r = allTransactions.absoluteIndexOf(t); - return r == null ? -1 : r.intValue(); - } - - /** - * maps an item to its index and returns -1 if not found - */ - private int itemToIndex(I i) - { - Integer r = allItems.absoluteIndexOf(i); - return r == null ? -1 : r.intValue(); - } - - /** - * maps a pair of indices to the corresponding {@link Pair} - */ - private Pair indexToPair(int[] i) - { - return new Pair(allTransactions.absoluteGet(i[0]), allItems.absoluteGet(i[1])); - } - - /** - * A shortcut for new PairSet<T, I>(matrix, mapping) - * - * @param bm {@link BinaryMatrix} instance to link - * - * @return the new {@link PairSet} with the given {@link BinaryMatrix} - * instance and the same mapping of this - */ - private PairSet createFromIndices(BinaryMatrix bm) - { - return new PairSet(bm, allTransactions, allItems); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet clone() - { - return createFromIndices(matrix.clone()); - } - - /** - * Checks if the given collection is a instance of {@link PairSet} with - * the same index mappings - * - * @param c collection to check - * - * @return true if the given collection is a instance of - * {@link PairSet} with the same index mappings - */ - private boolean hasSameIndices(Collection c) - { - return c != null - && (c instanceof PairSet) - && (allTransactions == ((PairSet) c).allTransactions) - && (allItems == ((PairSet) c).allItems); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean add(Pair e) - { - return add(e.transaction, e.item); - } - - /** - * Adds a single transaction-item pair - * - * @param transaction the transaction of the pair - * @param item the item of the pair - * - * @return true if the set has been changed - */ - public boolean add(T transaction, I item) - { - return matrix.add(transactionToIndex(transaction), itemToIndex(item)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean addAll(Collection> c) - { - return matrix.addAll(convert(c).matrix); - } - - /** - * Add the pairs obtained from the Cartesian product of transactions - * and items - * - * @param trans collection of transactions - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean addAll(Collection trans, Collection items) - { - if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { - return false; - } - return matrix.addAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); - } - - /** - * Add the pairs obtained from the Cartesian product of transactions - * and items - * - * @param trans the given transaction - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean addAll(T trans, Collection items) - { - if (trans == null || items == null || items.isEmpty()) { - return false; - } - return matrix.addAll(transactionToIndex(trans), allItems.convert(items).indices()); - } - - /** - * Add the pairs obtained from the Cartesian product of transactions - * and items - * - * @param trans collection of transactions - * @param item the given item - * - * @return true if the set set has been changed - */ - public boolean addAll(Collection trans, I item) - { - if (trans == null || trans.isEmpty() || item == null) { - return false; - } - return matrix.addAll(allTransactions.convert(trans).indices(), itemToIndex(item)); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear() - { - matrix.clear(); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean contains(Object o) - { - return o != null - && o instanceof Pair - && contains(((Pair) o).transaction, ((Pair) o).item); - } - - /** - * Checks if the given transaction-item pair is contained within the set - * - * @param transaction the transaction of the pair - * @param item the item of the pair - * - * @return true if the given transaction-item pair is contained - * within the set - */ - public boolean contains(T transaction, I item) - { - int t = transactionToIndex(transaction); - if (t < 0) { - return false; - } - int i = itemToIndex(item); - if (i < 0) { - return false; - } - return matrix.contains(t, i); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAll(Collection c) - { - return matrix.containsAll(convert(c).matrix); - } - - /** - * Checks if the pairs obtained from the Cartesian product of - * transactions and items are contained - * - * @param trans collection of transactions - * @param items collection of items - * - * @return true if the pairs set set has been changed - */ - public boolean containsAll(Collection trans, Collection items) - { - if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { - return true; - } - if (isEmpty()) { - return false; - } - return matrix.containsAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); - } - - /** - * Checks if the pairs obtained from the Cartesian product of - * transactions and items are contained - * - * @param trans the transaction - * @param items collection of items - * - * @return true if the pairs set set has been changed - */ - public boolean containsAll(T trans, Collection items) - { - if (trans == null || items == null || items.isEmpty()) { - return true; - } - if (isEmpty()) { - return false; - } - return matrix.containsAll(transactionToIndex(trans), allItems.convert(items).indices()); - } - - /** - * Checks if the pairs obtained from the Cartesian product of - * transactions and items are contained - * - * @param trans collection of transactions - * @param item the item - * - * @return true if the pairs set set has been changed - */ - public boolean containsAll(Collection trans, I item) - { - if (trans == null || trans.isEmpty() || item == null) { - return true; - } - if (isEmpty()) { - return false; - } - return matrix.containsAll(allTransactions.convert(trans).indices(), itemToIndex(item)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean isEmpty() - { - return matrix.isEmpty(); - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator> iterator() - { - return new ExtendedIterator>() - { - CellIterator itr = matrix.iterator(); - - @Override - public Pair next() {return indexToPair(itr.next());} - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public void remove() {itr.remove();} - - @Override - public void skipAllBefore(Pair element) - { - itr.skipAllBefore( - transactionToIndex(element.transaction), - itemToIndex(element.item) - ); - } - }; - } - - /** - * {@inheritDoc} - */ - @Override - public ExtendedIterator> descendingIterator() - { - return new ExtendedIterator>() - { - CellIterator itr = matrix.descendingIterator(); - - @Override - public Pair next() {return indexToPair(itr.next());} - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public void remove() {itr.remove();} - - @Override - public void skipAllBefore(Pair element) - { - itr.skipAllBefore( - transactionToIndex(element.transaction), - itemToIndex(element.item) - ); - } - }; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public boolean remove(Object o) - { - return o instanceof Pair - && remove(((Pair) o).transaction, ((Pair) o).item); - } - - /** - * Removes a single transaction-item pair - * - * @param transaction the transaction of the pair - * @param item the item of the pair - * - * @return true if the pair set has been changed - */ - public boolean remove(T transaction, I item) - { - return matrix.remove(transactionToIndex(transaction), itemToIndex(item)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean removeAll(Collection c) - { - return matrix.removeAll(convert(c).matrix); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean retainAll(Collection c) - { - return matrix.retainAll(convert(c).matrix); - } - - /** - * Removes the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans collection of transactions - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean removeAll(Collection trans, Collection items) - { - if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { - return false; - } - return matrix.removeAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); - } - - /** - * Removes the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans a transaction - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean removeAll(T trans, Collection items) - { - if (trans == null || items == null || items.isEmpty()) { - return false; - } - return matrix.removeAll(transactionToIndex(trans), allItems.convert(items).indices()); - } - - /** - * Removes the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans collection of transactions - * @param item collection of items - * - * @return true if the set set has been changed - */ - public boolean removeAll(Collection trans, I item) - { - if (trans == null || trans.isEmpty() || item == null) { - return false; - } - return matrix.removeAll(allTransactions.convert(trans).indices(), itemToIndex(item)); - } - - /** - * Retains the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans collection of transactions - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean retainAll(Collection trans, Collection items) - { - if (isEmpty()) { - return false; - } - if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { - clear(); - return true; - } - return matrix.retainAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); - } - - /** - * Retains the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans the transaction - * @param items collection of items - * - * @return true if the set set has been changed - */ - public boolean retainAll(T trans, Collection items) - { - if (isEmpty()) { - return false; - } - if (trans == null || items == null || items.isEmpty()) { - clear(); - return true; - } - return matrix.retainAll(transactionToIndex(trans), allItems.convert(items).indices()); - } - - /** - * Retains the pairs obtained from the Cartesian product of transactions and - * items - * - * @param trans collection of transactions - * @param item the item - * - * @return true if the set set has been changed - */ - public boolean retainAll(Collection trans, I item) - { - if (isEmpty()) { - return false; - } - if (trans == null || trans.isEmpty() || item == null) { - clear(); - return true; - } - return matrix.retainAll(allTransactions.convert(trans).indices(), itemToIndex(item)); - } - - /** - * {@inheritDoc} - */ - @Override - public int size() - { - return matrix.size(); - } - - /** - * Gets the set of all possible transactions that can be contained within - * the set - * - * @return the set of all possible transactions that can be contained within - * the set - */ - public IndexedSet allTransactions() - { - return allTransactions; - } - - /** - * Gets the set of all possible items that can be contained within each - * transaction - * - * @return the set of all possible items that can be contained within each - * transaction - */ - public IndexedSet allItems() - { - return allItems; - } - - /** - * {@inheritDoc} - */ - @Override - public int hashCode() - { - return matrix.hashCode(); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean equals(Object obj) - { - if (this == obj) { - return true; - } - if (!(obj instanceof PairSet)) { - return false; - } - final PairSet other = (PairSet) obj; - return hasSameIndices(other) && matrix.equals(other.matrix); - } - - /** - * Lists all items contained within a given transaction - * - * @param transaction the given transaction - * - * @return items contained within the given transaction - */ - public IndexedSet itemsOf(T transaction) - { - IndexedSet res = allItems.empty(); - res.indices().addAll(matrix.getRow(transactionToIndex(transaction))); - return res; - } - - /** - * Lists all transactions involved with a specified item - * - * @param item the given item - * - * @return transactions involved with a specified item - */ - public IndexedSet transactionsOf(I item) - { - IndexedSet res = allTransactions.empty(); - res.indices().addAll(matrix.getCol(itemToIndex(item))); - return res; - } - - /** - * Gets the set of transactions in {@link #allTransactions()} that contains - * at least one item - * - * @return the set of transactions in {@link #allTransactions()} that - * contains at least one item - */ - public IndexedSet involvedTransactions() - { - IndexedSet res = allTransactions.empty(); - res.indices().addAll(matrix.involvedRows()); - return res; - } - - /** - * Gets the set of items in {@link #allItems()} that are contained in at - * least one transaction - * - * @return the set of items in {@link #allItems()} that are contained in at - * least one transaction - */ - public IndexedSet involvedItems() - { - IndexedSet res = allItems.empty(); - res.indices().addAll(matrix.involvedCols()); - return res; - } - - /** - * Gets the ith element of the set - * - * @param index position of the element in the sorted set - * - * @return the ith element of the set - * - * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to - * {@link #size()} - */ - @Override - public Pair get(int index) - { - return indexToPair(matrix.get(index)); - } - - /** - * Provides position of element within the set. - *

- * It returns -1 if the element does not exist within the set. - * - * @param element element of the set - * - * @return the element position - */ - @Override - public int indexOf(Pair element) - { - return matrix.indexOf( - transactionToIndex(element.transaction), - itemToIndex(element.item) - ); - } - - /** - * {@inheritDoc} - */ - @Override - public String debugInfo() - { - StringBuilder s = new StringBuilder(); - - s.append("possible transactions: "); - s.append(allTransactions); - s.append('\n'); - s.append("possible items: "); - s.append(allItems); - s.append('\n'); - - s.append("pairs:\n"); - s.append(matrix.toString()); - s.append("info: " + matrix.debugInfo()); - - return s.toString(); - } - - /** - * {@inheritDoc} - */ - @Override - public double bitmapCompressionRatio() - { - return matrix.bitmapCompressionRatio(); - } - - /** - * {@inheritDoc} - */ - @Override - public double collectionCompressionRatio() - { - return matrix.collectionCompressionRatio(); - } - - /** - * Returns the set of indices. Modifications to this set are reflected to - * this {@link PairSet} instance. Trying to perform operation on - * out-of-bound indices will throw an {@link IllegalArgumentException} - * exception. - * - * @return the index set - */ - public BinaryMatrix matrix() - { - return matrix; - } - -// /** -// * Extracts a subset represented by a certain range of transactions and -// * items, according to the ordering provided by {@link #allTransactions()} -// * and {@link #allItems()}. -// * -// * @param fromTransaction -// * the first transaction of the range (if null it -// * represents the first one) -// * @param toTransaction -// * the last transaction of the range (if null it -// * represents the last one) -// * @param fromItem -// * the first item of the range (if null it -// * represents the first one) -// * @param toItem -// * the last item of the range (if null it represents -// * the last one) -// * @return the specified subset -// */ -// public PairSet subSet(T fromTransaction, T toTransaction, I fromItem, I toItem) { -// BinaryMatrix mask = matrix.empty(); -// mask.fill( -// transactionToIndex(fromTransaction), -// itemToIndex(fromItem), -// transactionToIndex(toTransaction), -// itemToIndex(toItem)); -// return new PairSet(matrix.intersection(mask), allTransactions, allItems); -// } -// -// /** -// * Extracts a subset represented by a collection of transactions and items -// * -// * @param involvedTransactions -// * involved transactions (if null, it represents all -// * transactions in {@link #allTransactions()}) -// * @param involvedItems -// * involved items (if null, it represents all items -// * in {@link #allItems()}) -// * @return all the transaction-item pairs that represent the specified -// * subset -// */ -// public PairSet subSet(Collection involvedTransactions, Collection involvedItems) { -// BinaryMatrix mask = matrix.empty(); -// mask.addAll( -// allTransactions.convert(involvedTransactions).indices(), -// allItems.convert(involvedItems).indices()); -// return new PairSet(matrix.intersection(mask), allTransactions, allItems); -// } - - /** - * {@inheritDoc} - */ - @Override - public PairSet empty() - { - return createFromIndices(matrix.empty()); - } - - /** - * {@inheritDoc} - */ - @Override - public void complement() - { - matrix.complement(); - } - - /** - * {@inheritDoc} - */ - @Override - public Comparator> comparator() - { - return new Comparator>() - { - @Override - public int compare(Pair o1, Pair o2) - { - int t1 = transactionToIndex(o1.transaction); - int t2 = transactionToIndex(o2.transaction); - int r = t1 < t2 ? -1 : (t1 == t2 ? 0 : 1); - if (r == 0) { - int i1 = itemToIndex(o1.item); - int i2 = itemToIndex(o2.item); - r = i1 < i2 ? -1 : (i1 == i2 ? 0 : 1); - } - return r; - } - }; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public PairSet convert(Collection c) - { - if (c == null) { - return empty(); - } - - // useless to convert... - if (hasSameIndices(c)) { - return (PairSet) c; - } - - // convert - PairSet res = empty(); - for (Pair p : (Collection>) c) { - res.matrix.add(transactionToIndex(p.transaction), itemToIndex(p.item)); - } - return res; - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public PairSet convert(Object... e) - { - return (PairSet) super.convert(e); - } - - /** - * {@inheritDoc} - */ - @Override - public void clear(Pair from, Pair to) - { - matrix.clear( - transactionToIndex(from.transaction), - itemToIndex(from.item), - transactionToIndex(to.transaction), - itemToIndex(to.item) - ); - } - - /** - * {@inheritDoc} - */ - @Override - public int complementSize() - { - return matrix.complementSize(); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet complemented() - { - return createFromIndices(matrix.complemented()); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet difference(Collection> other) - { - return other == null ? clone() : createFromIndices(matrix.difference(convert(other).matrix)); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAny(Collection> other) - { - return other == null || matrix.containsAny(convert(other).matrix); - } - - /** - * {@inheritDoc} - */ - @Override - public boolean containsAtLeast(Collection> other, int minElements) - { - return other != null && !other.isEmpty() && matrix.containsAtLeast(convert(other).matrix, minElements); - } - - /** - * {@inheritDoc} - */ - @Override - public int differenceSize(Collection> other) - { - return other == null ? (int) size() : (int) matrix.differenceSize(convert(other).matrix); - } - - /** - * {@inheritDoc} - */ - @Override - public void fill(Pair from, Pair to) - { - matrix.fill( - transactionToIndex(from.transaction), - itemToIndex(from.item), - transactionToIndex(to.transaction), - itemToIndex(to.item) - ); - } - - /** - * {@inheritDoc} - */ - @Override - public void flip(Pair e) - { - matrix.flip( - transactionToIndex(e.transaction), - itemToIndex(e.item) - ); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public PairSet subSet(Pair fromElement, Pair toElement) - { - return (PairSet) super.subSet(fromElement, toElement); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public PairSet headSet(Pair toElement) - { - return (PairSet) super.headSet(toElement); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public PairSet tailSet(Pair fromElement) - { - return (PairSet) super.tailSet(fromElement); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet intersection(Collection> c) - { - return c == null ? empty() : createFromIndices(matrix.intersection(convert(c).matrix)); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public List> powerSet() - { - return (List>) super.powerSet(); - } - - /** - * {@inheritDoc} - */ - @SuppressWarnings("unchecked") - @Override - public List> powerSet(int min, int max) - { - return (List>) super.powerSet(min, max); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet symmetricDifference(Collection> other) - { - return other == null ? clone() : createFromIndices(matrix.symmetricDifference(convert(other).matrix)); - } - - /** - * {@inheritDoc} - */ - @Override - public int symmetricDifferenceSize(Collection> other) - { - return other == null ? (int) size() : (int) matrix.symmetricDifferenceSize(convert(other).matrix); - } - - /** - * {@inheritDoc} - */ - @Override - public PairSet union(Collection> other) - { - return other == null ? clone() : createFromIndices(matrix.union(convert(other).matrix)); - } - - /** - * {@inheritDoc} - */ - @Override - public int unionSize(Collection> other) - { - return other == null ? (int) size() : (int) matrix.unionSize(convert(other).matrix); - } - -// /** -// * {@inheritDoc} -// */ -// @Override -// public PairSet unmodifiable() { -// return new PairSet(allTransactions, allItems, maxTransactionCount, maxItemCount, indices.unmodifiable()); -// } - - /** - * {@inheritDoc} - */ - @Override - public Pair first() - { - return indexToPair(matrix.first()); - } - - /** - * {@inheritDoc} - */ - @Override - public Pair last() - { - return indexToPair(matrix.last()); - } - - /** - * {@inheritDoc} - */ - @Override - public int compareTo(ExtendedSet> o) - { - return matrix.compareTo(convert(o).matrix); - } - - /** - * @return a transposed {@link PairSet} instance - */ - public PairSet transposed() - { - return new PairSet(matrix.transposed(), allItems, allTransactions); - } - - /** - * Creates a new {@link PairSet} instance with the union of all possible - * transactions and items as result for {@link #allTransactions()} and - * {@link #allItems()}, respectively, and the union of pairs. - * - * @param other the other {@link PairSet} instance to merge - * - * @return the merged {@link PairSet} instance - */ - public PairSet merged(PairSet other) - { - if (other == null) { - return clone(); - } - - // compute the new universe - Set newAllTransactions = new LinkedHashSet(allTransactions); - Set newAllItems = new LinkedHashSet(allItems); - newAllTransactions.addAll(other.allTransactions); - newAllItems.addAll(other.allItems); - - // compute the union of pairs - PairSet res = new PairSet( - matrix.clone(), - newAllTransactions, - newAllItems - ); - if (!other.isEmpty()) { - res.addAll(other); - } - return res; - } - - /** - * Creates a new {@link PairSet} instance with only non-empty transactions - * and items. - * - * @return the compacted {@link PairSet} instance - */ - public PairSet compacted() - { - // trivial case - if (isEmpty()) { - return empty(); - } - - // compute the new universe - final Set newAllTransactions = new LinkedHashSet(involvedTransactions()); - final Set newAllItems = new LinkedHashSet(involvedItems()); - if (newAllTransactions.size() == allTransactions.size() - && newAllItems.size() == allItems.size()) { - return clone(); - } - - // compute the union of pairs - PairSet res = new PairSet( - matrix.empty(), - newAllTransactions, - newAllItems - ); - res.addAll(this); - return res; - } - - -// // -// // COMPRESSED OBJECT SERIALIZATION -// // -// -// private static class ZipObjectOutputStream extends ObjectOutputStream { -// private GZIPOutputStream out; -// ZipObjectOutputStream(ObjectOutputStream out) throws IOException {this(new GZIPOutputStream(out));} -// ZipObjectOutputStream(GZIPOutputStream out) throws IOException {super(out); this.out = out;} -// @Override public void close() throws IOException {out.flush(); out.finish();} -// } -// -// private static class ZipObjectInputStream extends ObjectInputStream { -// ZipObjectInputStream(ObjectInputStream in) throws IOException {super(new GZIPInputStream(in));} -// } -// -// private void writeObject(ObjectOutputStream out) throws IOException { -// if (out instanceof ZipObjectOutputStream) { -// out.defaultWriteObject(); -// } else { -// ObjectOutputStream oos = new ZipObjectOutputStream(out); -// oos.writeObject(this); -// oos.close(); -// } -// } -// -// private transient Object serialize; -// -// @SuppressWarnings("unused") -// private Object readResolve() throws ObjectStreamException { -// if (serialize == null) -// serialize = this; -// return serialize; -// } -// -// private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { -// if (in instanceof ZipObjectInputStream) { -// in.defaultReadObject(); -// } else { -// ObjectInputStream ois = new ZipObjectInputStream(in); -// serialize = ois.readObject(); -// } -// } -} diff --git a/extendedset/src/test/java/io/druid/extendedset/Debug.java b/extendedset/src/test/java/io/druid/extendedset/Debug.java deleted file mode 100755 index 7344af17e940..000000000000 --- a/extendedset/src/test/java/io/druid/extendedset/Debug.java +++ /dev/null @@ -1,1858 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package io.druid.extendedset; - -import io.druid.extendedset.ExtendedSet.ExtendedIterator; -import io.druid.extendedset.intset.AbstractIntSet; -import io.druid.extendedset.intset.ArraySet; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.FastSet; -import io.druid.extendedset.intset.HashIntSet; -import io.druid.extendedset.intset.IntSet; -import io.druid.extendedset.utilities.IntSetStatistics; -import io.druid.extendedset.utilities.random.MersenneTwister; -import io.druid.extendedset.wrappers.GenericExtendedSet; -import io.druid.extendedset.wrappers.IndexedSet; -import io.druid.extendedset.wrappers.IntegerSet; -import io.druid.extendedset.wrappers.matrix.BinaryMatrix; -import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; - -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - -//import it.uniroma3.mat.extendedset.intset.Concise2Set; - - -/** - * Test class for {@link ConciseSet}, {@link FastSet}, and {@link IndexedSet}. - * - * @author Alessandro Colantonio - * @version $Id: Debug.java 155 2011-05-30 22:27:00Z cocciasik $ - */ -public class Debug -{ - /** - * Checks if a {@link ExtendedSet} instance and a {@link TreeSet} instance - * contains the same elements. {@link TreeSet} is used because it is the - * most similar class to {@link ExtendedSet}. - * - * @param type of elements within the set - * @param bits bit-set to check - * @param items {@link TreeSet} instance that must contain the same elements - * of the bit-set - * - * @return true if the given {@link ConciseSet} and - * {@link TreeSet} are equals in terms of contained elements - */ - private static boolean checkContent(ExtendedSet bits, SortedSet items) - { - if (bits.size() != items.size()) { - return false; - } - if (bits.isEmpty() && items.isEmpty()) { - return true; - } - for (T i : bits) { - if (!items.contains(i)) { - return false; - } - } - for (T i : items) { - if (!bits.contains(i)) { - return false; - } - } - if (!bits.last().equals(items.last())) { - return false; - } - if (!bits.first().equals(items.first())) { - return false; - } - return true; - } - - /** - * Generates an empty set of the specified class - * - * @param c the given class - * - * @return the empty set - */ - private static > X empty(Class c) - { - try { - return c.newInstance(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Stress test for {@link ConciseSet#add(Integer)} - *

- * It starts from a very sparse set (most of the words will be 0's - * sequences) and progressively become very dense (words first - * become 0's sequences with 1 set bit and there will be almost one - * word per item, then words become literals, and finally they - * become 1's sequences and drastically reduce in number) - */ - private static void testForAdditionStress(Class> c) - { - ExtendedSet previousBits = empty(c); - ExtendedSet currentBits = empty(c); - TreeSet currentItems = new TreeSet(); - - Random rnd = new MersenneTwister(); - - // add 100000 random numbers - for (int i = 0; i < 100000; i++) { - // random number to add - int item = rnd.nextInt(10000 + 1); - - // keep the previous results - previousBits = currentBits; - currentBits = currentBits.clone(); - - // add the element - System.out.format("Adding %d...\n", item); - boolean itemExistsBefore = currentItems.contains(item); - boolean itemAdded = currentItems.add(item); - boolean itemExistsAfter = currentItems.contains(item); - boolean bitExistsBefore = currentBits.contains(item); - boolean bitAdded = currentBits.add(item); - boolean bitExistsAfter = currentBits.contains(item); - if (itemAdded ^ bitAdded) { - System.out.println("wrong add() result"); - return; - } - if (itemExistsBefore ^ bitExistsBefore) { - System.out.println("wrong contains() before"); - return; - } - if (itemExistsAfter ^ bitExistsAfter) { - System.out.println("wrong contains() after"); - return; - } - - // check the list of elements - if (!checkContent(currentBits, currentItems)) { - System.out.println("add() error"); - System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); - System.out.println("\tcorrect: " + currentItems.toString()); - System.out.println("\twrong: " + currentBits.toString()); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - - // check the representation - ExtendedSet otherBits = previousBits.convert(currentItems); - if (otherBits.hashCode() != currentBits.hashCode()) { - System.out.println("Representation error"); - System.out.println(currentBits.debugInfo()); - System.out.println(otherBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - - // check the union size - ExtendedSet singleBitSet = empty(c); - singleBitSet.add(item); - if (currentItems.size() != currentBits.unionSize(singleBitSet)) { - System.out.println("Size error"); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - } - - System.out.println("Final"); - System.out.println(currentBits.debugInfo()); - - System.out.println(); - System.out.println(IntSetStatistics.summary()); - } - - /** - * Stress test for {@link ConciseSet#remove(Object)} - *

- * It starts from a very dense set (most of the words will be 1's - * sequences) and progressively become very sparse (words first - * become 1's sequences with 1 unset bit and there will be few - * words per item, then words become literals, and finally they - * become 0's sequences and drastically reduce in number) - * - * @param c class to test - */ - private static void testForRemovalStress(Class> c) - { - ExtendedSet previousBits = empty(c); - ExtendedSet currentBits = empty(c); - TreeSet currentItems = new TreeSet(); - - Random rnd = new MersenneTwister(); - - // create a 1-filled bitset - currentBits.add((1 << MatrixIntSet.COL_POW) * 5 - 1); - currentBits.complement(); - currentItems.addAll(currentBits); - if (currentItems.size() != (1 << MatrixIntSet.COL_POW) * 5 - 1) { - System.out.println("Unexpected error!"); - System.out.println(currentBits.size()); - System.out.println(currentItems.size()); - return; - } - - // remove 100000 random numbers - for (int i = 0; i < 100000 & !currentBits.isEmpty(); i++) { - // random number to remove - int item = rnd.nextInt(10000 + 1); - - // keep the previous results - previousBits = currentBits; - currentBits = currentBits.clone(); - - // remove the element - System.out.format("Removing %d...\n", item); - boolean itemExistsBefore = currentItems.contains(item); - boolean itemRemoved = currentItems.remove(item); - boolean itemExistsAfter = currentItems.contains(item); - boolean bitExistsBefore = currentBits.contains(item); - boolean bitRemoved = currentBits.remove(item); - boolean bitExistsAfter = currentBits.contains(item); - if (itemRemoved ^ bitRemoved) { - System.out.println("wrong remove() result"); - return; - } - if (itemExistsBefore ^ bitExistsBefore) { - System.out.println("wrong contains() before"); - return; - } - if (itemExistsAfter ^ bitExistsAfter) { - System.out.println("wrong contains() after"); - return; - } - - // check the list of elements - if (!checkContent(currentBits, currentItems)) { - System.out.println("remove() error"); - System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - - return; - } - - // check the representation - ExtendedSet otherBits = empty(c); - otherBits.addAll(currentItems); - if (otherBits.hashCode() != currentBits.hashCode()) { - System.out.println("Representation error"); - System.out.println(currentBits.debugInfo()); - System.out.println(otherBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - - return; - } - - // check the union size - ExtendedSet singleBitSet = empty(c); - singleBitSet.add(item); - if (currentItems.size() != currentBits.differenceSize(singleBitSet)) { - System.out.println("Size error"); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - - return; - } - } - - System.out.println("Final"); - System.out.println(currentBits.debugInfo()); - - System.out.println(); - System.out.println(IntSetStatistics.summary()); - } - - /** - * Random operations on random sets. - *

- * It randomly chooses among {@link ConciseSet#addAll(Collection)}, - * {@link ConciseSet#removeAll(Collection)}, and - * {@link ConciseSet#retainAll(Collection)}, and perform the operation over - * random sets - * - * @param c class to test - */ - private static void testForRandomOperationsStress(Class> c, boolean testFillAndClear) - { - ExtendedSet bitsLeft = empty(c); - ExtendedSet bitsRight = empty(c); - SortedSet itemsLeft = new TreeSet(); - SortedSet itemsRight = new TreeSet(); - - Random r = new MersenneTwister(); - final int maxCardinality = 1000; - - // random operation loop - for (int i = 0; i < 1000000; i++) { - System.out.format("Test %,d (%,d): ", i, System.currentTimeMillis()); - - RandomNumbers rn; - switch (r.nextInt(3)) { - case 0: - rn = new RandomNumbers.Uniform( - r.nextInt(maxCardinality), - r.nextDouble() * 0.999, - r.nextInt(maxCardinality / 10) - ); - break; - case 1: - rn = new RandomNumbers.Zipfian( - r.nextInt(maxCardinality), - r.nextDouble() * 0.9, - r.nextInt(maxCardinality / 10), - 2 - ); - break; - case 2: - rn = new RandomNumbers.Markovian( - r.nextInt(maxCardinality), - r.nextDouble() * 0.999, - r.nextInt(maxCardinality / 10) - ); - break; - default: - throw new RuntimeException("unexpected"); - } - - /* - * fill() and clear() - */ - if (testFillAndClear) { - bitsRight.clear(); - itemsRight.clear(); - Iterator itr1 = rn.generate().iterator(); - Iterator itr2 = rn.generate().iterator(); - while (itr1.hasNext() && itr2.hasNext()) { - ExtendedSet clone = bitsRight.clone(); - Integer from = itr1.next(); - Integer to = itr2.next(); - if (from.compareTo(to) > 0) { - Integer s = from; - from = to; - to = s; - } - - boolean fill = r.nextBoolean(); - if (fill) { - for (int j = from; j <= to; j++) { - itemsRight.add(j); - } - bitsRight.fill(from, to); - } else { - for (int j = from; j <= to; j++) { - itemsRight.remove(j); - } - bitsRight.clear(from, to); - } - - if (!checkContent(bitsLeft, itemsLeft)) { - System.out.println("FILL/CLEAR ERROR!"); - System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); - System.out.println("itemsLeft:"); - System.out.println(itemsLeft); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - - System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); - for (Integer x : bitsLeft) { - if (!itemsLeft.contains(x)) { - System.out.println("itemsLeft does not contain " + x); - } - } - for (Integer x : itemsLeft) { - if (!bitsLeft.contains(x)) { - System.out.println("itemsLeft does not contain " + x); - } - } - System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); - System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); - - return; - } - ExtendedSet app = empty(c); - app.addAll(itemsRight); - if (bitsRight.hashCode() != app.hashCode()) { - System.out.println("FILL/CLEAR FORMAT ERROR!"); - System.out.println("fill: " + fill); - System.out.println("from " + from + " to " + to); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("Append:"); - System.out.println(app.debugInfo()); - System.out.println("Clone:"); - System.out.println(clone.debugInfo()); - return; - } - } - } - - - /* - * contains(), add(), and remove() - */ - bitsRight.clear(); - itemsRight.clear(); - for (Integer e : rn.generate()) { - if (itemsRight.contains(e) ^ bitsRight.contains(e)) { - System.out.println("CONTAINS ERROR!"); - System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); - System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - return; - } - ExtendedSet clone = bitsRight.clone(); - boolean resItems = itemsRight.add(e); - boolean resBits = bitsRight.add(e); - ExtendedSet app = empty(c); - app.addAll(itemsRight); - if (bitsRight.hashCode() != app.hashCode()) { - System.out.println("ADD ERROR!"); - System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); - System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("Append:"); - System.out.println(app.debugInfo()); - System.out.println("Clone:"); - System.out.println(clone.debugInfo()); - return; - } - if (resItems != resBits) { - System.out.println("ADD BOOLEAN ERROR!"); - System.out.println("itemsRight.add(" + e + "): " + resItems); - System.out.println("bitsRight.add(" + e + "): " + resBits); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - return; - } - } - for (Integer e : rn.generate()) { - ExtendedSet clone = bitsRight.clone(); - boolean resItems = itemsRight.remove(e); - boolean resBits = bitsRight.remove(e); - ExtendedSet app = empty(c); - app.addAll(itemsRight); - if (bitsRight.hashCode() != app.hashCode()) { - System.out.println("REMOVE ERROR!"); - System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); - System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("Append:"); - System.out.println(app.debugInfo()); - System.out.println("Clone:"); - System.out.println(clone.debugInfo()); - return; - } - if (resItems != resBits) { - System.out.println("REMOVE BOOLEAN ERROR!"); - System.out.println("itemsRight.remove(" + e + "): " + resItems); - System.out.println("bitsRight.remove(" + e + "): " + resBits); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("Clone:"); - System.out.println(clone.debugInfo()); - return; - } - } - for (Integer e : rn.generate()) { - ExtendedSet clone = bitsRight.clone(); - if (!itemsRight.remove(e)) { - itemsRight.add(e); - } - bitsRight.flip(e); - ExtendedSet app = empty(c); - app.addAll(itemsRight); - if (bitsRight.hashCode() != app.hashCode()) { - System.out.println("FLIP ERROR!"); - System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); - System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("Append:"); - System.out.println(app.debugInfo()); - System.out.println("Clone:"); - System.out.println(clone.debugInfo()); - return; - } - } - - // new right operand - itemsRight = rn.generate(); - bitsRight.clear(); - bitsRight.addAll(itemsRight); - - /* - * check for content correctness, first(), and last() - */ - if (!checkContent(bitsRight, itemsRight)) { - System.out.println("RIGHT OPERAND ERROR!"); - System.out.println("Same elements: " + (itemsRight.toString().equals(bitsRight.toString()))); - System.out.println("itemsRight:"); - System.out.println(itemsRight); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - - System.out.println("itemsRight.size(): " + itemsRight.size() + " ?= bitsRight.size(): " + bitsRight.size()); - for (Integer x : bitsRight) { - if (!itemsRight.contains(x)) { - System.out.println("itemsRight does not contain " + x); - } - } - for (Integer x : itemsRight) { - if (!bitsRight.contains(x)) { - System.out.println("itemsRight does not contain " + x); - } - } - System.out.println("bitsRight.last(): " + bitsRight.last() + " ?= itemsRight.last(): " + itemsRight.last()); - System.out.println("bitsRight.first(): " + bitsRight.first() + " ?= itemsRight.first(): " + itemsRight.first()); - - return; - } - - /* - * containsAll() - */ - boolean bitsRes = bitsLeft.containsAll(bitsRight); - boolean itemsRes = itemsLeft.containsAll(itemsRight); - if (bitsRes != itemsRes) { - System.out.println("CONTAINS_ALL ERROR!"); - System.out.println("bitsLeft.containsAll(bitsRight): " + bitsRes); - System.out.println("itemsLeft.containsAll(itemsRight): " + itemsRes); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("bitsLeft.intersection(bitsRight)"); - System.out.println(bitsLeft.intersection(bitsRight)); - System.out.println("itemsLeft.retainAll(itemsRight)"); - itemsLeft.retainAll(itemsRight); - System.out.println(itemsLeft); - return; - } - - /* - * containsAny() - */ - bitsRes = bitsLeft.containsAny(bitsRight); - itemsRes = true; - for (Integer x : itemsRight) { - itemsRes = itemsLeft.contains(x); - if (itemsRes) { - break; - } - } - if (bitsRes != itemsRes) { - System.out.println("bitsLeft.containsAny(bitsRight): " + bitsRes); - System.out.println("itemsLeft.containsAny(itemsRight): " + itemsRes); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("bitsLeft.intersection(bitsRight)"); - System.out.println(bitsLeft.intersection(bitsRight)); - System.out.println("itemsLeft.retainAll(itemsRight)"); - itemsLeft.retainAll(itemsRight); - System.out.println(itemsLeft); - return; - } - - /* - * containsAtLeast() - */ - int l = 1 + r.nextInt(bitsRight.size() + 1); - bitsRes = bitsLeft.containsAtLeast(bitsRight, l); - int itemsResCnt = 0; - for (Integer x : itemsRight) { - if (itemsLeft.contains(x)) { - itemsResCnt++; - } - if (itemsResCnt >= l) { - break; - } - } - if (bitsRes != (itemsResCnt >= l)) { - System.out.println("bitsLeft.containsAtLeast(bitsRight, " + l + "): " + bitsRes); - System.out.println("itemsLeft.containsAtLeast(itemsRight, " + l + "): " + (itemsResCnt >= l)); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - System.out.println("bitsRight:"); - System.out.println(bitsRight.debugInfo()); - System.out.println("bitsLeft.intersection(bitsRight)"); - System.out.println(bitsLeft.intersection(bitsRight)); - System.out.println("itemsLeft.retainAll(itemsRight)"); - itemsLeft.retainAll(itemsRight); - System.out.println(itemsLeft); - return; - } - - /* - * Perform a random operation with the previous set: - * addAll() and unionSize() - * removeAll() and differenceSize() - * retainAll() and intersectionSize() - * symmetricDifference() and symmetricDifferenceSize() - * complement() and complementSize() - */ - ExtendedSet alternative = null; - int operationSize = 0; - boolean resItems = true, resBits = true; - switch (1 + r.nextInt(5)) { - case 1: - System.out.format(" union of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); - System.out.flush(); - operationSize = bitsLeft.unionSize(bitsRight); - resItems = itemsLeft.addAll(itemsRight); - alternative = bitsLeft.union(bitsRight); - resBits = bitsLeft.addAll(bitsRight); - break; - - case 2: - System.out.format(" difference of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); - System.out.flush(); - operationSize = bitsLeft.differenceSize(bitsRight); - resItems = itemsLeft.removeAll(itemsRight); - alternative = bitsLeft.difference(bitsRight); - resBits = bitsLeft.removeAll(bitsRight); - break; - - case 3: - System.out.format(" intersection of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); - System.out.flush(); - operationSize = bitsLeft.intersectionSize(bitsRight); - resItems = itemsLeft.retainAll(itemsRight); - alternative = bitsLeft.intersection(bitsRight); - resBits = bitsLeft.retainAll(bitsRight); - break; - - case 4: - System.out.format( - " symmetric difference of %d elements with %d elements... ", - itemsLeft.size(), - itemsRight.size() - ); - System.out.flush(); - operationSize = bitsLeft.symmetricDifferenceSize(bitsRight); - TreeSet temp = new TreeSet(itemsRight); - temp.removeAll(itemsLeft); - itemsLeft.removeAll(itemsRight); - itemsLeft.addAll(temp); - bitsLeft = bitsLeft.symmetricDifference(bitsRight); - alternative = bitsLeft; - break; - - case 5: - System.out.format(" complement of %d elements... ", itemsLeft.size()); - System.out.flush(); - operationSize = bitsLeft.complementSize(); - if (!itemsLeft.isEmpty()) { - if ((bitsLeft instanceof IntegerSet) && (((IntegerSet) bitsLeft).intSet() instanceof MatrixIntSet)) { - BinaryMatrix m = ((MatrixIntSet) ((IntegerSet) bitsLeft).intSet()).matrix; - int x = m.maxCol(); - for (int rx = m.maxRow(); rx >= 0; rx--) { - for (int cx = x; cx >= 0; cx--) { - if (!itemsLeft.add(MatrixIntSet.toInt(rx, cx))) { - itemsLeft.remove(MatrixIntSet.toInt(rx, cx)); - } - } - } - } else { - for (int j = itemsLeft.last(); j >= 0; j--) { - if (!itemsLeft.add(j)) { - itemsLeft.remove(j); - } - } - } - } - bitsLeft.complement(); - alternative = bitsLeft; - break; - default: - throw new RuntimeException("Unexpected error!"); - } - - // check the list of elements - if (!checkContent(bitsLeft, itemsLeft)) { - System.out.println("OPERATION ERROR!"); - System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); - System.out.println("itemsLeft:"); - System.out.println(itemsLeft); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - - System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); - for (Integer x : bitsLeft) { - if (!itemsLeft.contains(x)) { - System.out.println("itemsLeft does not contain " + x); - } - } - for (Integer x : itemsLeft) { - if (!bitsLeft.contains(x)) { - System.out.println("itemsLeft does not contain " + x); - } - } - System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); - System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); - - return; - } - - // check the size - if (itemsLeft.size() != operationSize) { - System.out.println("OPERATION SIZE ERROR"); - System.out.println("Wrong size: " + operationSize); - System.out.println("Correct size: " + itemsLeft.size()); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - return; - } - - // check the boolean result - if (resItems != resBits) { - System.out.println("OPERATION BOOLEAN ERROR!"); - System.out.println("resItems: " + resItems); - System.out.println("resBits: " + resBits); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - return; - } - - // check the internal representation of the result - ExtendedSet x = bitsLeft.empty(); - x.addAll(itemsLeft); - if (x.hashCode() != bitsLeft.hashCode()) { - System.out.println("Internal representation error!"); - System.out.println("FROM APPEND:"); - System.out.println(x.debugInfo()); - System.out.println("FROM OPERATION:"); - System.out.println(bitsLeft.debugInfo()); - return; - } - - // check similar results - if (!bitsLeft.equals(alternative)) { - System.out.println("ALTERNATIVE OPERATION ERROR!"); - System.out.println("bitsLeft:"); - System.out.println(bitsLeft.debugInfo()); - System.out.println("alternative:"); - System.out.println(alternative.debugInfo()); - return; - } - - System.out.println("done."); - } - } - - /** - * Stress test (addition) for {@link #subSet(Integer, Integer)} - */ - private static void testForSubSetAdditionStress() - { - IntegerSet previousBits = new IntegerSet(new ConciseSet()); - IntegerSet currentBits = new IntegerSet(new ConciseSet()); - TreeSet currentItems = new TreeSet(); - - Random rnd = new MersenneTwister(); - - for (int j = 0; j < 100000; j++) { - // keep the previous result - previousBits = currentBits; - currentBits = currentBits.clone(); - - // generate a new subview - int min = rnd.nextInt(10000); - int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); - int item = min + rnd.nextInt((max - 1) - min + 1); - System.out.println("Adding " + item + " to the subview from " + min + " to " + max + " - 1"); - SortedSet subBits = currentBits.subSet(min, max); - SortedSet subItems = currentItems.subSet(min, max); - boolean subBitsResult = subBits.add(item); - boolean subItemsResult = subItems.add(item); - - if (subBitsResult != subItemsResult - || subBits.size() != subItems.size() - || !subBits.toString().equals(subItems.toString())) { - System.out.println("Subset error!"); - return; - } - - if (!checkContent(currentBits, currentItems)) { - System.out.println("Subview not correct!"); - System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - - // check the representation - IntegerSet otherBits = new IntegerSet(new ConciseSet()); - otherBits.addAll(currentItems); - if (otherBits.hashCode() != currentBits.hashCode()) { - System.out.println("Representation not correct!"); - System.out.println(currentBits.debugInfo()); - System.out.println(otherBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - } - - System.out.println(currentBits.debugInfo()); - System.out.println(IntSetStatistics.summary()); - } - - /** - * Stress test (addition) for {@link ConciseSet#subSet(Integer, Integer)} - */ - private static void testForSubSetRemovalStress() - { - IntegerSet previousBits = new IntegerSet(new ConciseSet()); - IntegerSet currentBits = new IntegerSet(new ConciseSet()); - TreeSet currentItems = new TreeSet(); - - // create a 1-filled bitset - currentBits.add(10001); - currentBits.complement(); - currentItems.addAll(currentBits); - if (currentItems.size() != 10001) { - System.out.println("Unexpected error!"); - return; - } - - Random rnd = new MersenneTwister(); - - for (int j = 0; j < 100000; j++) { - // keep the previous result - previousBits = currentBits; - currentBits = currentBits.clone(); - - // generate a new subview - int min = rnd.nextInt(10000); - int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); - int item = rnd.nextInt(10000 + 1); - System.out.println("Removing " + item + " from the subview from " + min + " to " + max + " - 1"); - SortedSet subBits = currentBits.subSet(min, max); - SortedSet subItems = currentItems.subSet(min, max); - boolean subBitsResult = subBits.remove(item); - boolean subItemsResult = subItems.remove(item); - - if (subBitsResult != subItemsResult - || subBits.size() != subItems.size() - || !subBits.toString().equals(subItems.toString())) { - System.out.println("Subset error!"); - return; - } - - if (!checkContent(currentBits, currentItems)) { - System.out.println("Subview not correct!"); - System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); - System.out.println("Original: " + currentItems); - System.out.println(currentBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - - // check the representation - IntegerSet otherBits = new IntegerSet(new ConciseSet()); - otherBits.addAll(currentItems); - if (otherBits.hashCode() != currentBits.hashCode()) { - System.out.println("Representation not correct!"); - System.out.println(currentBits.debugInfo()); - System.out.println(otherBits.debugInfo()); - System.out.println(previousBits.debugInfo()); - return; - } - } - - System.out.println(currentBits.debugInfo()); - System.out.println(IntSetStatistics.summary()); - } - - /** - * Random operations on random sub sets. - *

- * It randomly chooses among all operations and performs the operation over - * random sets - */ - private static void testForSubSetRandomOperationsStress() - { - IntegerSet bits = new IntegerSet(new ConciseSet()); - IntegerSet bitsPrevious = new IntegerSet(new ConciseSet()); - TreeSet items = new TreeSet(); - - Random rnd = new MersenneTwister(); - - // random operation loop - for (int i = 0; i < 100000; i++) { - System.out.print("Test " + i + ": "); - - // new set - bitsPrevious = bits.clone(); - if (!bitsPrevious.toString().equals(bits.toString())) { - throw new RuntimeException("clone() error!"); - } - bits.clear(); - items.clear(); - final int size = 1 + rnd.nextInt(10000); - final int min = 1 + rnd.nextInt(10000 - 1); - final int max = min + rnd.nextInt(10000 - min + 1); - final int minSub = 1 + rnd.nextInt(10000 - 1); - final int maxSub = minSub + rnd.nextInt(10000 - minSub + 1); - for (int j = 0; j < size; j++) { - int item = min + rnd.nextInt(max - min + 1); - bits.add(item); - items.add(item); - } - - // perform base checks - SortedSet bitsSubSet = bits.subSet(minSub, maxSub); - SortedSet itemsSubSet = items.subSet(minSub, maxSub); - if (!bitsSubSet.toString().equals(itemsSubSet.toString())) { - System.out.println("toString() difference!"); - System.out.println("value: " + bitsSubSet.toString()); - System.out.println("actual: " + itemsSubSet.toString()); - return; - } - if (bitsSubSet.size() != itemsSubSet.size()) { - System.out.println("size() difference!"); - System.out.println("value: " + bitsSubSet.size()); - System.out.println("actual: " + itemsSubSet.size()); - System.out.println("bits: " + bits.toString()); - System.out.println("items: " + items.toString()); - System.out.println("bitsSubSet: " + bitsSubSet.toString()); - System.out.println("itemsSubSet: " + itemsSubSet.toString()); - return; - } - if (!itemsSubSet.isEmpty() && (!bitsSubSet.first().equals(itemsSubSet.first()))) { - System.out.println("first() difference!"); - System.out.println("value: " + bitsSubSet.first()); - System.out.println("actual: " + itemsSubSet.first()); - System.out.println("bits: " + bits.toString()); - System.out.println("items: " + items.toString()); - System.out.println("bitsSubSet: " + bitsSubSet.toString()); - System.out.println("itemsSubSet: " + itemsSubSet.toString()); - return; - } - if (!itemsSubSet.isEmpty() && (!bitsSubSet.last().equals(itemsSubSet.last()))) { - System.out.println("last() difference!"); - System.out.println("value: " + bitsSubSet.last()); - System.out.println("actual: " + itemsSubSet.last()); - System.out.println("bits: " + bits.toString()); - System.out.println("items: " + items.toString()); - System.out.println("bitsSubSet: " + bitsSubSet.toString()); - System.out.println("itemsSubSet: " + itemsSubSet.toString()); - return; - } - - // perform the random operation - boolean resBits = false; - boolean resItems = false; - boolean exceptionBits = false; - boolean exceptionItems = false; - switch (1 + rnd.nextInt(4)) { - case 1: - System.out.format(" addAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); - try { - resBits = bitsSubSet.addAll(bitsPrevious); - } - catch (Exception e) { - bits.clear(); - System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); - exceptionBits = true; - } - try { - resItems = itemsSubSet.addAll(bitsPrevious); - } - catch (Exception e) { - items.clear(); - System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); - exceptionItems = true; - } - break; - - case 2: - System.out.format(" removeAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); - try { - resBits = bitsSubSet.removeAll(bitsPrevious); - } - catch (Exception e) { - bits.clear(); - System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); - exceptionBits = true; - } - try { - resItems = itemsSubSet.removeAll(bitsPrevious); - } - catch (Exception e) { - items.clear(); - System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); - exceptionItems = true; - } - break; - - case 3: - System.out.format(" retainAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); - try { - resBits = bitsSubSet.retainAll(bitsPrevious); - } - catch (Exception e) { - bits.clear(); - System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); - exceptionBits = true; - } - try { - resItems = itemsSubSet.retainAll(bitsPrevious); - } - catch (Exception e) { - items.clear(); - System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); - exceptionItems = true; - } - break; - - case 4: - System.out.format(" clear() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); - try { - bitsSubSet.clear(); - } - catch (Exception e) { - bits.clear(); - System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); - exceptionBits = true; - } - try { - itemsSubSet.clear(); - } - catch (Exception e) { - items.clear(); - System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); - exceptionItems = true; - } - break; - } - - if (exceptionBits != exceptionItems) { - System.out.println("Incorrect exception!"); - return; - } - - if (resBits != resItems) { - System.out.println("Incorrect results!"); - System.out.println("resBits: " + resBits); - System.out.println("resItems: " + resItems); - return; - } - - if (!checkContent(bits, items)) { - System.out.println("Subview not correct!"); - System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); - System.out.println("Same elements: " + (items.toString().equals(bits.toString()))); - System.out.println("Original: " + items); - System.out.println(bits.debugInfo()); - System.out.println(bitsPrevious.debugInfo()); - return; - } - - // check the representation - IntegerSet otherBits = new IntegerSet(new ConciseSet()); - otherBits.addAll(items); - if (otherBits.hashCode() != bits.hashCode()) { - System.out.println("Representation not correct!"); - System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); - System.out.println(bits.debugInfo()); - System.out.println(otherBits.debugInfo()); - System.out.println(bitsPrevious.debugInfo()); - return; - } - - System.out.println("done."); - } - } - - /** - * Test the method {@link ExtendedSet#compareTo(ExtendedSet)} - * - * @param c class to test - */ - private static void testForComparatorSimple(Class> c) - { - ExtendedSet bitsLeft = empty(c); - ExtendedSet bitsRight = empty(c); - - bitsLeft.add(1); - bitsLeft.add(2); - bitsLeft.add(3); - bitsLeft.add(100); - bitsRight.add(1000000); - System.out.println("A: " + bitsLeft); - System.out.println("B: " + bitsRight); - System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); - System.out.println(); - - bitsLeft.add(1000000); - bitsRight.add(1); - bitsRight.add(2); - bitsRight.add(3); - System.out.println("A: " + bitsLeft); - System.out.println("B: " + bitsRight); - System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); - System.out.println(); - - bitsLeft.remove(100); - System.out.println("A: " + bitsLeft); - System.out.println("B: " + bitsRight); - System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); - System.out.println(); - - bitsRight.remove(1); - System.out.println("A: " + bitsLeft); - System.out.println("B: " + bitsRight); - System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); - System.out.println(); - - bitsLeft.remove(1); - bitsLeft.remove(2); - System.out.println("A: " + bitsLeft); - System.out.println("B: " + bitsRight); - System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); - System.out.println(); - } - - /** - * Another test for {@link ExtendedSet#compareTo(ExtendedSet)} - * - * @param c class to test - */ - private static void testForComparatorComplex(Class> c) - { - ExtendedSet bitsLeft = empty(c); - ExtendedSet bitsRight = empty(c); - - Random rnd = new MersenneTwister(); - for (int i = 0; i < 10000; i++) { - // empty numbers - BigInteger correctLeft = BigInteger.ZERO; - BigInteger correctRight = BigInteger.ZERO; - bitsLeft.clear(); - bitsRight.clear(); - - int size = 10 + rnd.nextInt(10000); - RandomNumbers rn; - if (rnd.nextBoolean()) { - rn = new RandomNumbers.Uniform(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); - } else { - rn = new RandomNumbers.Markovian(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); - } - bitsLeft.addAll(rn.generate()); - if (rnd.nextBoolean()) { - bitsRight.addAll(bitsLeft); - bitsRight.add(rnd.nextInt(size)); - } else { - bitsRight.addAll(rn.generate()); - } - for (int x : bitsLeft.descending()) { - correctLeft = correctLeft.setBit(x); - } - for (int x : bitsRight) { - correctRight = correctRight.setBit(x); - } - - // compare them! - boolean correct = bitsLeft.compareTo(bitsRight) == correctLeft.compareTo(correctRight); - System.out.println(i + ": " + correct); - if (!correct) { - System.out.println("ERROR!"); - System.out.println("bitsLeft: " + bitsLeft); - System.out.println(" " + bitsLeft.debugInfo()); - System.out.println("bitsRight: " + bitsRight); - System.out.println(" " + bitsRight.debugInfo()); - int maxLength = Math.max(correctLeft.bitLength(), correctRight.bitLength()); - System.out.format("correctLeft.toString(2): %" + maxLength + "s\n", correctLeft.toString(2)); - System.out.format("correctRight.toString(2): %" + maxLength + "s\n", correctRight.toString(2)); - System.out.println("correctLeft.compareTo(correctRight): " + correctLeft.compareTo(correctRight)); - System.out.println("bitsLeft.compareTo(bitsRight): " + bitsLeft.compareTo(bitsRight)); - - Iterator itrLeft = bitsLeft.descendingIterator(); - Iterator itrRight = bitsRight.descendingIterator(); - while (itrLeft.hasNext() && itrRight.hasNext()) { - int l = itrLeft.next(); - int r = itrRight.next(); - if (l != r) { - System.out.println("l != r --> " + l + ", " + r); - break; - } - } - return; - } - } - System.out.println("Done!"); - } - - /** - * Stress test for {@link ExtendedSet#descendingIterator()} - * - * @param c class to test - */ - private static void testForDescendingIterator(Class> c) - { - ExtendedSet bits = empty(c); - - Random rnd = new MersenneTwister(); - for (int i = 0; i < 100000; i++) { - int n = rnd.nextInt(10000); - System.out.print(i + ": add " + n); - bits.add(n); - - Set x = new HashSet(bits); - Set y = new HashSet(); - try { - for (Integer e : bits.descending()) { - y.add(e); - } - } - catch (Exception e) { - System.out.println("\nERROR!"); - System.out.println(e.getMessage()); - System.out.println(bits.debugInfo()); - break; - } - boolean correct = x.equals(y); - System.out.println(" --> " + correct); - if (!correct) { - System.out.println(bits.debugInfo()); - System.out.print("result: "); - for (Integer e : bits.descending()) { - System.out.print(e + ", "); - } - System.out.println(); - break; - } - } - - System.out.println("Done!"); - } - - /** - * Stress test for {@link ConciseSet#get(int)} - * - * @param c class to test - */ - private static void testForPosition(Class> c) - { - ExtendedSet bits = empty(c); - - Random rnd = new MersenneTwister(31); - for (int i = 0; i < 1000; i++) { - // new set - bits.clear(); - final int size = 1 + rnd.nextInt(10000); - final int min = 1 + rnd.nextInt(10000 - 1); - final int max = min + rnd.nextInt(10000 - min + 1); - for (int j = 0; j < size; j++) { - int item = min + rnd.nextInt(max - min + 1); - bits.add(item); - } - - // check correctness - String good = bits.toString(); - StringBuilder other = new StringBuilder(); - int s = bits.size(); - other.append('['); - for (int j = 0; j < s; j++) { - other.append(bits.get(j)); - if (j < s - 1) { - other.append(", "); - } - } - other.append(']'); - - if (good.equals(other.toString())) { - System.out.println(i + ") OK"); - } else { - System.out.println("ERROR"); - System.out.println(bits.debugInfo()); - System.out.println(bits); - System.out.println(other); - return; - } - - int pos = 0; - for (Integer x : bits) { - if (bits.indexOf(x) != pos) { - System.out.println("ERROR! " + pos + " != " + bits.indexOf(x) + " for element " + x); - System.out.println(bits.debugInfo()); - return; - } - pos++; - } - } - } - - /** - * Test for {@link ExtendedIterator#skipAllBefore(Object)} - * - * @param c class to test - */ - private static void testForSkip(Class> c) - { - ExtendedSet bits = empty(c); - - Random rnd = new MersenneTwister(31); - for (int i = 0; i < 10000; i++) { - int max = rnd.nextInt(10000); - bits = bits.convert(new RandomNumbers.Uniform( - rnd.nextInt(1000), - rnd.nextDouble() * 0.999, - rnd.nextInt(100) - ).generate()); - - for (int j = 0; j < 100; j++) { - int skip = rnd.nextInt(max + 1); - boolean reverse = rnd.nextBoolean(); - System.out.format("%d) size=%d, skip=%d, reverse=%b ---> ", (i * 100) + j + 1, bits.size(), skip, reverse); - - ExtendedIterator itr1, itr2; - if (!reverse) { - itr1 = bits.iterator(); - itr2 = bits.iterator(); - while (itr1.hasNext() && itr1.next() < skip) {/* nothing */} - } else { - itr1 = bits.descendingIterator(); - itr2 = bits.descendingIterator(); - while (itr1.hasNext() && itr1.next() > skip) {/* nothing */} - } - if (!itr1.hasNext()) { - System.out.println("Skipped!"); - continue; - } - itr2.skipAllBefore(skip); - itr2.next(); - Integer i1, i2; - if (!(i1 = itr1.next()).equals(i2 = itr2.next())) { - System.out.println("Error!"); - System.out.println("i1 = " + i1); - System.out.println("i2 = " + i2); - System.out.println(bits.debugInfo()); - return; - } - System.out.println("OK!"); - } - } - System.out.println("Done!"); - } - - /** - * Test launcher - * - * @param args ID of the test to execute - */ - public static void main(String[] args) - { - // NOTE: the most complete test is TestCase.RANDOM_OPERATION_STRESS -// TestCase testCase = TestCase.ADDITION_STRESS; -// TestCase testCase = TestCase.REMOVAL_STRESS; -// TestCase testCase = TestCase.RANDOM_OPERATION_STRESS; -// TestCase testCase = TestCase.FILL_CLEAR_STRESS; -// TestCase testCase = TestCase.SKIP; - TestCase testCase = TestCase.POSITION; -// TestCase testCase = TestCase.COMPARATOR_COMPLEX; -// TestCase testCase = TestCase.DESCENDING_ITERATOR; - -// Class> classToTest = IntegerHashSet.class; -// Class> classToTest = IntegerFastSet.class; -// Class> classToTest = IntegerConciseSet.class; -// Class> classToTest = IntegerConcise2Set.class; -// Class> classToTest = IntegerConcisePlusSet.class; -// Class> classToTest = IntegerWAHSet.class; -// Class> classToTest = ListSet.class; -// Class> classToTest = LinkedSet.class; - Class> classToTest = MatrixSet.class; - - if (args != null && args.length > 0) { - try { - testCase = TestCase.values()[Integer.parseInt(args[0])]; - } - catch (NumberFormatException ignore) { - // nothing to do - } - } - - switch (testCase) { - case ADDITION_STRESS: - testForAdditionStress(classToTest); - break; - case REMOVAL_STRESS: - testForRemovalStress(classToTest); - break; - case RANDOM_OPERATION_STRESS: - testForRandomOperationsStress(classToTest, false); - break; - case FILL_CLEAR_STRESS: - testForRandomOperationsStress(classToTest, true); - break; - case SUBSET_ADDITION_STRESS_CONCISESET: - testForSubSetAdditionStress(); - break; - case SUBSET_REMOVAL_STRESS_CONCISESET: - testForSubSetRemovalStress(); - break; - case SUBSET_RANDOM_OPERATION_STRESS_CONCISESET: - testForSubSetRandomOperationsStress(); - break; - case COMPARATOR_SIMPLE: - testForComparatorSimple(classToTest); - break; - case COMPARATOR_COMPLEX: - testForComparatorComplex(classToTest); - break; - case DESCENDING_ITERATOR: - testForDescendingIterator(classToTest); - break; - case POSITION: - testForPosition(classToTest); - break; - case SKIP: - testForSkip(classToTest); - } - } - - /** - * @author alessandrocolantonio - */ - private enum TestCase - { - /** - * @uml.property name="aDDITION_STRESS" - * @uml.associationEnd - */ - ADDITION_STRESS, - /** - * @uml.property name="rEMOVAL_STRESS" - * @uml.associationEnd - */ - REMOVAL_STRESS, - /** - * @uml.property name="rANDOM_OPERATION_STRESS" - * @uml.associationEnd - */ - RANDOM_OPERATION_STRESS, - /** - * @uml.property name="fILL_CLEAR_STRESS" - * @uml.associationEnd - */ - FILL_CLEAR_STRESS, - /** - * @uml.property name="sUBSET_ADDITION_STRESS_CONCISESET" - * @uml.associationEnd - */ - SUBSET_ADDITION_STRESS_CONCISESET, - /** - * @uml.property name="sUBSET_REMOVAL_STRESS_CONCISESET" - * @uml.associationEnd - */ - SUBSET_REMOVAL_STRESS_CONCISESET, - /** - * @uml.property name="sUBSET_RANDOM_OPERATION_STRESS_CONCISESET" - * @uml.associationEnd - */ - SUBSET_RANDOM_OPERATION_STRESS_CONCISESET, - /** - * @uml.property name="cOMPARATOR_SIMPLE" - * @uml.associationEnd - */ - COMPARATOR_SIMPLE, - /** - * @uml.property name="cOMPARATOR_COMPLEX" - * @uml.associationEnd - */ - COMPARATOR_COMPLEX, - /** - * @uml.property name="dESCENDING_ITERATOR" - * @uml.associationEnd - */ - DESCENDING_ITERATOR, - /** - * @uml.property name="pOSITION" - * @uml.associationEnd - */ - POSITION, - /** - * @uml.property name="sKIP" - * @uml.associationEnd - */ - SKIP,; - } - - @SuppressWarnings("unused") - private static class ListSet extends GenericExtendedSet - { - ListSet() - { - super(ArrayList.class); - } - } - - @SuppressWarnings("unused") - private static class LinkedSet extends GenericExtendedSet - { - LinkedSet() - { - super(LinkedList.class); - } - } - - @SuppressWarnings("unused") - private static class IntegerHashSet extends IntegerSet - { - IntegerHashSet() {super(new IntSetStatistics(new HashIntSet()));} - } - - @SuppressWarnings("unused") - private static class IntegerFastSet extends IntegerSet - { - IntegerFastSet() {super(new IntSetStatistics(new FastSet()));} - } - - @SuppressWarnings("unused") - private static class IntegerConciseSet extends IntegerSet - { - IntegerConciseSet() {super(new IntSetStatistics(new ConciseSet()));} - } - - // @SuppressWarnings("unused") -// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new IntSetStatistics(new Concise2Set()));}} - @SuppressWarnings("unused") - private static class IntegerWAHSet extends IntegerSet - { - IntegerWAHSet() {super(new IntSetStatistics(new ConciseSet(true)));} - } - - @SuppressWarnings("unused") - private static class IntegerArraySet extends IntegerSet - { - IntegerArraySet() {super(new IntSetStatistics(new ArraySet()));} - } - - // @SuppressWarnings("unused") - private static class MatrixSet extends IntegerSet - { - MatrixSet() {super(new MatrixIntSet());} - } - - /** - * @author alessandrocolantonio - */ - final static class MatrixIntSet extends AbstractIntSet - { - final static int COL_POW = 10; - /** - * @uml.property name="matrix" - * @uml.associationEnd - */ - BinaryMatrix matrix = new BinaryMatrix(new FastSet()); - - final static int toInt(int row, int col) {return (row << COL_POW) + col;} - - final static int toRow(int index) {return index >>> COL_POW;} - - final static int toCol(int index) {return index & (0xFFFFFFFF >>> -COL_POW);} - - IntSet convert(BinaryMatrix m) - { - MatrixIntSet res = new MatrixIntSet(); - res.matrix = m; - return res; - } - - BinaryMatrix convert(IntSet s) - { - return ((MatrixIntSet) s).matrix; - } - - @Override - public IntSet convert(int... a) - { - MatrixIntSet res = new MatrixIntSet(); - for (int i : a) { - res.add(i); - } - return res; - } - - @Override - public IntSet convert(Collection c) - { - MatrixIntSet res = new MatrixIntSet(); - for (int i : c) { - res.add(i); - } - return res; - } - - @Override - public boolean add(int i) {return matrix.add(toRow(i), toCol(i));} - - @Override - public boolean addAll(IntSet c) {return matrix.addAll(convert(c));} - - @Override - public double bitmapCompressionRatio() {return matrix.bitmapCompressionRatio();} - - @Override - public void clear(int from, int to) {matrix.clear(toRow(from), toCol(from), toRow(to), toCol(to));} - - @Override - public void clear() {matrix.clear();} - - @Override - public double collectionCompressionRatio() {return matrix.collectionCompressionRatio();} - - @Override - public void complement() {matrix.complement();} - - @Override - public int complementSize() {return matrix.complementSize();} - - @Override - public IntSet complemented() {return convert(matrix.complemented());} - - @Override - public boolean contains(int i) {return matrix.contains(toRow(i), toCol(i));} - - @Override - public boolean containsAll(IntSet c) {return matrix.containsAll(convert(c));} - - @Override - public boolean containsAny(IntSet other) {return matrix.containsAny(convert(other));} - - @Override - public boolean containsAtLeast(IntSet other, int minElements) - { - return matrix.containsAtLeast( - convert(other), - minElements - ); - } - - @Override - public IntSet difference(IntSet other) {return convert(matrix.difference(convert(other)));} - - @Override - public int differenceSize(IntSet other) {return matrix.differenceSize(convert(other));} - - @Override - public IntSet empty() {return new MatrixIntSet();} - - @Override - public void fill(int from, int to) {matrix.fill(toRow(from), toCol(from), toRow(to), toCol(to));} - - @Override - public int first() {return toInt(matrix.first()[0], matrix.first()[1]);} - - @Override - public void flip(int e) {matrix.flip(toRow(e), toCol(e));} - - @Override - public int get(int i) {return toInt(matrix.get(i)[0], matrix.get(i)[1]);} - - @Override - public int indexOf(int e) {return matrix.indexOf(toRow(e), toCol(e));} - - @Override - public IntSet intersection(IntSet other) {return convert(matrix.intersection(convert(other)));} - - @Override - public int intersectionSize(IntSet other) {return matrix.intersectionSize(convert(other));} - - @Override - public boolean isEmpty() {return matrix.isEmpty();} - - @Override - public int last() {return toInt(matrix.last()[0], matrix.last()[1]);} - - @Override - public boolean remove(int i) {return matrix.remove(toRow(i), toCol(i));} - - @Override - public boolean removeAll(IntSet c) {return matrix.removeAll(convert(c));} - - @Override - public boolean retainAll(IntSet c) {return matrix.retainAll(convert(c));} - - @Override - public int size() {return matrix.size();} - - @Override - public IntSet symmetricDifference(IntSet other) {return convert(matrix.symmetricDifference(convert(other)));} - - @Override - public int symmetricDifferenceSize(IntSet other) {return matrix.symmetricDifferenceSize(convert(other));} - - @Override - public IntSet union(IntSet other) {return convert(matrix.union(convert(other)));} - - @Override - public int unionSize(IntSet other) {return matrix.unionSize(convert(other));} - - @Override - public int compareTo(IntSet o) {return matrix.compareTo(convert(o));} - - @Override - public double jaccardDistance(IntSet other) {return 0;} - - @Override - public double jaccardSimilarity(IntSet other) {return 0;} - - @Override - public double weightedJaccardDistance(IntSet other) {return 0;} - - @Override - public double weightedJaccardSimilarity(IntSet other) {return 0;} - - @Override - public List powerSet() {return null;} - - @Override - public List powerSet(int min, int max) {return null;} - - @Override - public int powerSetSize() {return 0;} - - @Override - public int powerSetSize(int min, int max) {return 0;} - - @Override - public IntIterator iterator() - { - return new IntIterator() - { - CellIterator itr = matrix.iterator(); - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public int next() - { - int[] c = itr.next(); - return toInt(c[0], c[1]); - } - - @Override - public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} - - @Override - public void remove() {itr.remove();} - - @Override - public IntIterator clone() {throw new UnsupportedOperationException();} - }; - } - - @Override - public IntIterator descendingIterator() - { - return new IntIterator() - { - CellIterator itr = matrix.descendingIterator(); - - @Override - public boolean hasNext() {return itr.hasNext();} - - @Override - public int next() - { - int[] c = itr.next(); - return toInt(c[0], c[1]); - } - - @Override - public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} - - @Override - public void remove() {itr.remove();} - - @Override - public IntIterator clone() {throw new UnsupportedOperationException();} - }; - } - - @Override - public IntSet clone() - { - MatrixIntSet res = new MatrixIntSet(); - res.matrix = matrix.clone(); - return res; - } - - @Override - public int hashCode() {return matrix.hashCode();} - - @Override - public boolean equals(Object obj) {return matrix.equals(((MatrixIntSet) obj).matrix);} - - @Override - public String debugInfo() - { - return super.toString() + "\n" + matrix.debugInfo(); - } - } -} - diff --git a/extendedset/src/test/java/io/druid/extendedset/Performance.java b/extendedset/src/test/java/io/druid/extendedset/Performance.java deleted file mode 100755 index 9aa99c40da17..000000000000 --- a/extendedset/src/test/java/io/druid/extendedset/Performance.java +++ /dev/null @@ -1,496 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset; - -import io.druid.extendedset.intset.ArraySet; -import io.druid.extendedset.intset.ConciseSet; -import io.druid.extendedset.intset.FastSet; -import io.druid.extendedset.wrappers.GenericExtendedSet; -import io.druid.extendedset.wrappers.IntegerSet; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.Locale; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; - -//import it.uniroma3.mat.extendedset.intset.Concise2Set; - -/** - * Class for performance evaluation. - * - * @author Alessandro Colantonio - * @version $Id: Performance.java 155 2011-05-30 22:27:00Z cocciasik $ - */ -public class Performance -{ - /** - * number of times to repeat each test - */ - private final static int REPETITIONS = 5; - /** - * minimum element - */ - private final static int SHIFT = 1000; - /** - * test results - */ - private final static Map, Double>> TIME_VALUES = new TreeMap, Double>>(); - /** - * time measurement, in nanoseconds - */ - private static long lastExecTime = -1; -// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new Concise2Set());}} -// private static class IntegerWAHSet extends IntegerSet {IntegerWAHSet() {super(new WAHSet());}} - - /** - * Start time measurement - */ - private static void startTimer() - { - lastExecTime = System.nanoTime(); - } - - /** - * Stop time measurement - * - * @param c class being tested - * @param name method name - * @param div division factor (elapsed time and allocated memory will be - * divided by this number) - */ - private static void endTimer(Class c, String name, long div) - { - // final time - double t = ((double) (System.nanoTime() - lastExecTime)) / div; - Map, Double> measure = TIME_VALUES.get(name); - if (measure == null) { - TIME_VALUES.put(name, measure = new HashMap, Double>()); - } - - Double old = measure.get(c); - if (old == null || old > t) { - measure.put(c, t); - } - } - - /** - * Perform the time test - * - * @param classToTest class of the {@link Collection} instance to test - * @param leftOperand collection of integers representing the left operand - * {@link Collection} - * @param rightOperand collection of integers representing the right operand - * {@link Collection} - */ - @SuppressWarnings("unchecked") - private static void testClass( - Class classToTest, - Collection leftOperand, - Collection rightOperand - ) - { - // collections used for the test cases - Collection[] cAddAndRemove = new Collection[REPETITIONS]; - Collection[] cAddAll = new Collection[REPETITIONS]; - Collection[] cRemoveAll = new Collection[REPETITIONS]; - Collection[] cRetainAll = new Collection[REPETITIONS]; - Collection[] cRighOperand = new Collection[REPETITIONS]; - IntegerSet[] cLeftOperand = new IntegerSet[REPETITIONS]; - IntegerSet[] cUnionResults = new IntegerSet[REPETITIONS]; - IntegerSet[] cDifferenceResults = new IntegerSet[REPETITIONS]; - IntegerSet[] cIntersectionResults = new IntegerSet[REPETITIONS]; - - // CREATION - for (int i = 0; i < REPETITIONS; i++) { - try { - cAddAndRemove[i] = (Collection) classToTest.newInstance(); - cAddAll[i] = (Collection) classToTest.newInstance(); - cRemoveAll[i] = (Collection) classToTest.newInstance(); - cRetainAll[i] = (Collection) classToTest.newInstance(); - cRighOperand[i] = (Collection) classToTest.newInstance(); - cLeftOperand[i] = (IntegerSet) classToTest.newInstance(); - } - catch (Exception e) { - throw new RuntimeException(e); - } - } - - // APPEND - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - for (Integer x : rightOperand) { - cRighOperand[i].add(x); - } - for (Integer x : leftOperand) { - cAddAndRemove[i].add(x); - cLeftOperand[i].add(x); - cAddAll[i].add(x); - cRetainAll[i].add(x); - cRemoveAll[i].add(x); - } - endTimer(classToTest, "00) append()", (5 * leftOperand.size() + rightOperand.size())); - } - -// List xxx = new ArrayList(rightOperand); -// List yyy = new ArrayList(leftOperand); -// Collections.shuffle(xxx); -// Collections.shuffle(yyy); -// for (int i = 0; i < REPETITIONS; i++) { -// cRighOperand[i].clear(); -// cAddAndRemove[i].clear(); -// cLeftOperand[i].clear(); -// cAddAll[i].clear(); -// cRetainAll[i].clear(); -// cRemoveAll[i].clear(); -// } -// -// // ADDITION -// for (int i = 0; i < REPETITIONS; i++) { -// startTimer(); -// for (Integer x : xxx) -// cRighOperand[i].add(x); -// for (Integer x : yyy) { -// cAddAndRemove[i].add(x); -// cLeftOperand[i].add(x); -// cAddAll[i].add(x); -// cRetainAll[i].add(x); -// cRemoveAll[i].add(x); -// } -// endTimer(classToTest, "01) add()", (5 * leftOperand.size() + rightOperand.size())); -// } - - // REMOVAL - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - for (Integer x : rightOperand) { - cAddAndRemove[i].remove(x); - } - endTimer(classToTest, "02) remove()", rightOperand.size()); - } - - // CONTAINS - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - for (Integer x : rightOperand) { - cAddAll[i].contains(x); - } - endTimer(classToTest, "03) contains()", rightOperand.size()); - } - - // CONTAINS ALL - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cAddAll[i].containsAll(cRighOperand[i]); - endTimer(classToTest, "04) containsAll()", 1); - } - - // UNION - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cAddAll[i].addAll(cRighOperand[i]); - endTimer(classToTest, "05) addAll()", 1); - } - - // DIFFERENCE - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cRemoveAll[i].removeAll(cRighOperand[i]); - endTimer(classToTest, "06) removeAll()", 1); - } - - // INTERSECTION - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cRetainAll[i].retainAll(cRighOperand[i]); - endTimer(classToTest, "07) retainAll()", 1); - } - - // UNION - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cUnionResults[i] = cLeftOperand[i].union(cRighOperand[i]); - endTimer(classToTest, "08) union()", 1); - } - - // DIFFERENCE - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cDifferenceResults[i] = cLeftOperand[i].difference(cRighOperand[i]); - endTimer(classToTest, "09) difference()", 1); - } - - // INTERSECTION - for (int i = 0; i < REPETITIONS; i++) { - startTimer(); - cIntersectionResults[i] = cLeftOperand[i].intersection(cRighOperand[i]); - endTimer(classToTest, "10) intersection()", 1); - } - } - - /** - * Summary information - */ - private static void printSummary(int cardinality, double density, Class[] classes) - { - for (Entry, Double>> e : TIME_VALUES.entrySet()) { - // method name - System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); - System.out.print(e.getKey()); - for (Class c : classes) { - Double op = e.getValue().get(c); - System.out.format("\t%12d", (op == null ? 0 : op.intValue())); - } - System.out.println(); - } - } - - /** - * TEST - * - * @param args - */ - public static void main(String[] args) - { - boolean calcMemory = false; - boolean calcTime = true; - - boolean calcUniform = true; - boolean calcMarkovian = false; - boolean calcZipfian = false; - - int minCardinality = 10000; - int maxCardinality = 10000; - - /* - * MEMORY - */ - for (int i = 0; calcMemory && i < 3; i++) { - System.out.println(); - switch (i) { - case 0: - if (!calcUniform) { - continue; - } - System.out.println("#MEMORY UNIFORM"); - break; - case 1: - if (!calcMarkovian) { - continue; - } - System.out.println("#MEMORY MARKOVIAN"); - break; - case 2: - if (!calcZipfian) { - continue; - } - System.out.println("#MEMORY ZIPFIAN"); - break; - default: - throw new RuntimeException("unexpected"); - } - System.out.println("#cardinality\tdensity\tFastSet\tConciseSet\tWAHSet\tConcise2Set"); - for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { - for (double density = .0001; density < 1D; density *= 1.7) { - System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); - - Collection integers; - switch (i) { - case 0: - integers = new RandomNumbers.Uniform(cardinality, density, SHIFT).generate(); - break; - case 1: - integers = new RandomNumbers.Markovian(cardinality, density, SHIFT).generate(); - break; - case 2: - integers = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2).generate(); - break; - default: - throw new RuntimeException("unexpected"); - } - - IntegerSet s0 = new IntegerSet(new FastSet()); - s0.addAll(integers); - System.out.format("%7d\t", (int) (s0.collectionCompressionRatio() * cardinality)); - - IntegerSet s1 = new IntegerSet(new ConciseSet()); - s1.addAll(integers); - System.out.format("%7d\t", (int) (s1.collectionCompressionRatio() * cardinality)); - - IntegerSet s2 = new IntegerSet(new WAHSet()); - s2.addAll(integers); - System.out.format("%7d\t", (int) (s2.collectionCompressionRatio() * cardinality)); - -// IntegerSet s3 = new IntegerSet(new Concise2Set()); -// s3.addAll(integers); -// System.out.format("%7d\n", (int) (s3.collectionCompressionRatio() * cardinality)); - } - } - } - - Class[] classes = new Class[]{ -// ArrayList.class, -// LinkedList.class, -// ArrayListSet.class, -// LinkedListSet.class, -// HashSet.class, -// TreeSet.class, -IntegerArraySet.class, -IntegerFastSet.class, -// IntegerHashSet.class, -// IntegerWAHSet.class, -IntegerConciseSet.class, -// IntegerConcise2Set.class, - }; - - /* - * TIME - */ - for (int i = 0; calcTime && i < 3; i++) { - System.out.println(); - switch (i) { - case 0: - if (!calcUniform) { - continue; - } - System.out.println("#TIME UNIFORM"); - break; - case 1: - if (!calcMarkovian) { - continue; - } - System.out.println("#TIME MARKOVIAN"); - break; - case 2: - if (!calcZipfian) { - continue; - } - System.out.println("#TIME ZIPFIAN"); - break; - default: - throw new RuntimeException("unexpected"); - } - System.out.print("#cardinality\tdensity\toperation"); - for (Class c : classes) { - System.out.print("\t" + c.getSimpleName()); - } - System.out.println(); - for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { - RandomNumbers r; - switch (i) { - case 0: - r = new RandomNumbers.Uniform(cardinality, 0.5, SHIFT); - break; - case 1: - r = new RandomNumbers.Markovian(cardinality, 0.5, SHIFT); - break; - case 2: - r = new RandomNumbers.Zipfian(cardinality, 0.5, SHIFT, 2); - break; - default: - throw new RuntimeException("unexpected"); - } - Collection x = r.generate(), y = r.generate(); - for (Class c : classes) { - testClass(c, x, y); - testClass(c, x, y); - } - for (double density = .0001; density < 1D; density *= 1.2) { -// for (double density = .0001; density < 1D; density *= 1.7) { -// for (double density = .0041; density < 1D; density *= 1.7) { -// for (double density = 0.8272; density > 0.00005; density /= 1.7) { - switch (i) { - case 0: - r = new RandomNumbers.Uniform(cardinality, density, SHIFT); - break; - case 1: - r = new RandomNumbers.Markovian(cardinality, density, SHIFT); - break; - case 2: - r = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2); - break; - default: - throw new RuntimeException("unexpected"); - } - x = r.generate(); - y = r.generate(); - for (Class c : classes) { - testClass(c, x, y); - } - printSummary(cardinality, density, classes); - TIME_VALUES.clear(); - } - } - } - - System.out.println("\nDone!"); - } - - /* test classes */ - private static class WAHSet extends ConciseSet - { - private static final long serialVersionUID = -5048707825606872979L; - - WAHSet() {super(true);} - } - - private static class IntegerArraySet extends IntegerSet - { - IntegerArraySet() {super(new ArraySet());} - } - - // private static class IntegerHashSet extends IntegerSet {IntegerHashSet() {super(new HashIntSet());}} - private static class IntegerFastSet extends IntegerSet - { - IntegerFastSet() {super(new FastSet());} - } - - private static class IntegerConciseSet extends IntegerSet - { - IntegerConciseSet() {super(new ConciseSet());} - } - - /** - * Class to test the sorted array - */ - @SuppressWarnings("unused") - private static class ArrayListSet extends GenericExtendedSet - { - ArrayListSet() - { - super(ArrayList.class); - } - } - - /** - * Class to test the sorted linked lists - */ - @SuppressWarnings("unused") - private static class LinkedListSet extends GenericExtendedSet - { - LinkedListSet() - { - super(LinkedList.class); - } - } -} diff --git a/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java b/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java deleted file mode 100755 index d4b85f1a43ab..000000000000 --- a/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * (c) 2010 Alessandro Colantonio - * - * - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package io.druid.extendedset; - - -import io.druid.extendedset.utilities.random.MersenneTwister; - -import java.util.Collection; -import java.util.Random; -import java.util.SortedSet; -import java.util.TreeSet; - -/** - * Generation of random integer sets - * - * @author Alessandro Colantonio - * @version $Id: RandomNumbers.java 142 2011-02-15 23:12:28Z cocciasik $ - */ -public abstract class RandomNumbers -{ - /** - * pseudo-random number generator - */ - final private static Random RND = new MersenneTwister(); - - /** - * the smallest integer - */ - protected final int min; - - /** - * number of elements within the set - */ - protected final int cardinality; - - /** - * cardinality to range (i.e., {@link #max} - {@link #min} + 1) ratio - */ - protected final double density; - - /** - * Initializes internal data - * - * @param cardinality number of elements of the set (i.e., result of - * {@link Collection#size()} ) - * @param density cardinality to range ratio - * @param min the smallest integer - */ - private RandomNumbers(int cardinality, double density, int min) - { - // parameter check - if (cardinality < 0) { - throw new IllegalArgumentException("cardinality < 0: " + cardinality); - } - if (density < 0D) { - throw new IllegalArgumentException("density < 0: " + density); - } - if (density > 1D) { - throw new IllegalArgumentException("density > 1: " + density); - } - - this.cardinality = cardinality; - this.density = density; - this.min = min; - } - - /** - * Test - * - * @param args - */ - public static void main(String[] args) - { - int size = 100; - System.out.println(new Uniform(size, 0.1, 0).generate()); - System.out.println(new Uniform(size, 0.9, 0).generate()); - System.out.println(new Zipfian(size, 0.1, 0, 2).generate()); - System.out.println(new Zipfian(size, 0.9, 0, 2).generate()); - System.out.println(new Markovian(size, 0.1, 0).generate()); - System.out.println(new Markovian(size, 0.9, 0).generate()); - } - - /** - * Next integer, according to the given probability distribution - * - * @return next pseudo-random integer - */ - protected abstract int next(); - - /** - * Generates the integer set of pseudo-random numbers - * - * @return the integer set - */ - public SortedSet generate() - { - SortedSet res = new TreeSet(); - while (res.size() < cardinality) { - res.add(next()); - } - return res; - } - - /** - * Integral numbers with uniform distribution. - *

- * The maximum number will be (cardinality / density) - 1, - * while the average gap between two consecutive numbers will be - * density * cardinality. - */ - public static class Uniform extends RandomNumbers - { - /** - * the greatest integer - */ - private final int max; - - /** - * Initializes internal data - * - * @param cardinality number of elements of the set (i.e., result of - * {@link Collection#size()} ) - * @param density cardinality to range ratio - * @param min the smallest integer - */ - public Uniform(int cardinality, double density, int min) - { - super(cardinality, density, min); - max = min + (int) (Math.round(cardinality / density)) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public int next() - { - return min + RND.nextInt(max - min + 1); - } - } - - /** - * Integral numbers with Zipfian (power-law) distribution. - *

- * The maximum number will be (cardinality / density) - 1, - * while the average gap between two consecutive numbers will be - * density * cardinality. However, integers will be - * concentrated around the minimum value. - */ - public static class Zipfian extends RandomNumbers - { - /** - * the greatest integer - */ - private final int max; - - /** - * power-law exponent - */ - private final int k; - - /** - * Initializes internal data - * - * @param cardinality number of elements of the set (i.e., result of - * {@link Collection#size()} ) - * @param density cardinality to range ratio - * @param min the smallest integer - * @param k power-law exponent - */ - public Zipfian(int cardinality, double density, int min, int k) - { - super(cardinality, density, min); - this.k = k; - max = min + (int) (Math.round(cardinality / density)) - 1; - } - - /** - * {@inheritDoc} - */ - @Override - public int next() - { - return min + (int) ((max - min + 1) * Math.pow(RND.nextDouble(), k)); - } - } - - /** - * Integral numbers with Markovian distribution. The data will present - * sequences of subsequent integers followed by "gaps". In this case, - * cardinality indicates the probability of switching from a - * sequence to a gap, and vice-versa. For example, density = 0 - * means a set made up of one long sequence of numbers, while - * density = 1 means a set made up of all odd (or even) - * integers. - */ - public static class Markovian extends RandomNumbers - { - private boolean skip = false; - private int next = min; - - /** - * @param cardinality number of elements of the set (i.e., result of - * {@link Collection#size()} ) - * @param density cardinality to range ratio - * @param min the smallest integer - */ - public Markovian(int cardinality, double density, int min) - { - super(cardinality, density, min); - } - - /** - * {@inheritDoc} - */ - @Override - public int next() - { - while (skip ^= RND.nextDouble() < density) { - next++; - } - return min + next++; - } - } -} diff --git a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java deleted file mode 100755 index f2542c1067d2..000000000000 --- a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java +++ /dev/null @@ -1,1972 +0,0 @@ -/* -* Copyright 2012 Metamarkets Group Inc. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package io.druid.extendedset.intset; - -import com.google.common.collect.Lists; -import junit.framework.Assert; -import org.junit.Test; - -import java.nio.IntBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Random; -import java.util.Set; - -/** - */ -public class ImmutableConciseSetTest -{ - public static final int NO_COMPLEMENT_LENGTH = -1; - - @Test - public void testWordIteratorNext1() - { - final int[] ints = {1, 2, 3, 4, 5}; - ConciseSet set = new ConciseSet(); - for (int i : ints) { - set.add(i); - } - ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); - - ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); - Assert.assertEquals(new Integer(0x8000003E), itr.next()); - - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testWordIteratorNext2() - { - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 100000; i++) { - set.add(i); - - } - ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); - - ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); - Assert.assertEquals(new Integer(0x40000C98), itr.next()); - Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - /** - * Advance to middle of a fill - */ - @Test - public void testWordIteratorAdvanceTo1() - { - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 100000; i++) { - set.add(i); - - } - ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); - - ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); - itr.advanceTo(50); - Assert.assertEquals(new Integer(1073744998), itr.next()); - Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - /** - * Advance past a fill directly to a new literal - */ - @Test - public void testWordIteratorAdvanceTo2() - { - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 100000; i++) { - set.add(i); - - } - ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); - - ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); - itr.advanceTo(3225); - Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneLitOneLit() - { - int[] words = {-1, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x40000001), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneLitPureOneFill() - { - int[] words = {-1, 0x40000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x40000005), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneLitDirtyOneFill() - { - int[] words = {-1, 0x42000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(new Integer(0x42000004), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneFillOneLit() - { - int[] words = {0x40000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x40000005), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneFillPureOneFill() - { - int[] words = {0x40000004, 0x40000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x40000009), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactOneFillDirtyOneFill() - { - int[] words = {0x40000004, 0x42000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x40000004), itr.next()); - Assert.assertEquals(new Integer(0x42000004), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroLitZeroLit() - { - int[] words = {0x80000000, 0x80000000, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x00000001), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroLitPureZeroFill() - { - int[] words = {0x80000000, 0x00000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x00000005), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroLitDirtyZeroFill() - { - int[] words = {0x80000000, 0x02000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x80000000), itr.next()); - Assert.assertEquals(new Integer(0x02000004), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroFillZeroLit() - { - int[] words = {0x00000004, 0x80000000, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x00000005), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroFillPureZeroFill() - { - int[] words = {0x00000004, 0x00000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x00000009), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactZeroFillDirtyZeroFill() - { - int[] words = {0x00000004, 0x02000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x00000004), itr.next()); - Assert.assertEquals(new Integer(0x02000004), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleOneBitLitZeroLit() - { - int[] words = {0x80000001, 0x80000000, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x02000001), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactDoubleOneBitLitZeroLit() - { - int[] words = {0x80000003, 0x80000000, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x80000003), itr.next()); - Assert.assertEquals(new Integer(0x80000000), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleOneBitLitPureZeroFill() - { - int[] words = {0x80000001, 0x00000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x02000005), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactDoubleOneBitLitPureZeroFill() - { - int[] words = {0x80000003, 0x00000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x80000003), itr.next()); - Assert.assertEquals(new Integer(0x00000004), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleOneBitLitDirtyZeroFill() - { - int[] words = {0x80000001, 0x02000004, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x80000001), itr.next()); - Assert.assertEquals(new Integer(0x02000004), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleZeroBitLitOneLit() - { - int[] words = {0xFFFFFFFE, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x42000001), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactDoubleZeroBitLitOneLit() - { - int[] words = {0xFFFFFFEE, -1}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0xFFFFFFEE), itr.next()); - Assert.assertEquals(new Integer(-1), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleZeroBitLitPureOneFill() - { - int[] words = {0xFFFFFFFE, 0x40000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0x42000005), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactDoubleZeroBitLitPureOneFill() - { - int[] words = {0xFFFFFFFC, 0x40000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0xFFFFFFFC), itr.next()); - Assert.assertEquals(new Integer(0x40000004), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactSingleZeroBitLitDirtyOneFill() - { - int[] words = {0xFFFFFFFE, 0x42000004}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); - Assert.assertEquals(new Integer(0x42000004), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - @Test - public void testCompactTwoLiterals() - { - int[] words = {0xFFFFFFFE, 0xFFEFFEFF}; - - ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); - ImmutableConciseSet.WordIterator itr = res.newWordIterator(); - - Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); - Assert.assertEquals(new Integer(0xFFEFFEFF), itr.next()); - Assert.assertEquals(itr.hasNext(), false); - } - - /** - * Set 1: zero literal, zero fill with flipped bit 33, literal - * Set 2: zero literal, zero fill with flipped bit 34, literal - *

- * Testing merge - */ - @Test - public void testUnion1() - { - final int[] ints1 = {33, 100000}; - final int[] ints2 = {34, 100000}; - List expected = Arrays.asList(33, 34, 100000); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyUnion(expected, sets); - } - - /** - * Set 1: zero literal, zero fill with flipped bit 33, literal - * Set 2: zero literal, zero fill with flipped bit 34, literal - *

- * Testing merge - */ - @Test - public void testUnion2() - { - final int[] ints1 = {33, 100000}; - final int[] ints2 = {34, 200000}; - List expected = Arrays.asList(33, 34, 100000, 200000); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyUnion(expected, sets); - } - - /** - * Set 1: zero fill, one fill - * Set 2: zero fill, one fill with flipped bit 62 - *

- * Testing merge - */ - @Test - public void testUnion3() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 62; i < 10001; i++) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 63; i < 10002; i++) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 62; i < 10002; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: zero literal, one fill with flipped bit 62 - * Set 2: zero literal, literal, one fill, literal - *

- * Testing merge - */ - @Test - public void testUnion4() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 63; i < 1001; i++) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 64; i < 1002; i++) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 63; i < 1002; i++) { - expected.add(i); - } - - - ConciseSet blah = new ConciseSet(); - for (int i : expected) { - blah.add(i); - } - verifyUnion(expected, sets); - } - - /** - * Set 1: literal - * Set 2: zero fill, zero literal, zero fill with flipped 33 bit, zero fill with flipped 1000000 bit, literal - * Set3: literal, zero fill with flipped 34th bit, literal - *

- * Testing merge - */ - @Test - public void testUnion5() - { - final int[] ints1 = {1, 2, 3, 4, 5}; - final int[] ints2 = {100000, 2405983, 33}; - final int[] ints3 = {0, 4, 5, 34, 333333}; - final List expected = Arrays.asList(0, 1, 2, 3, 4, 5, 33, 34, 100000, 333333, 2405983); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - ConciseSet set3 = new ConciseSet(); - for (int i : ints3) { - set3.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2), - ImmutableConciseSet.newImmutableFromMutable(set3) - ); - - verifyUnion(expected, sets); - } - - /** - * Set 1: literal - * Set 2: literal - *

- * Testing merge - */ - @Test - public void testUnion6() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 30; i++) { - if (i != 28) { - set1.add(i); - } - } - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 30; i++) { - if (i != 27) { - set2.add(i); - } - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 30; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: zero literal, literal, one fill with flipped bit - * Set 2: zero literal, one fill with flipped bit - *

- * Testing merge - */ - @Test - public void testUnion7() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 64; i < 1005; i++) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 63; i < 99; i++) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 63; i < 1005; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: One fill with flipped 27th bit - * Set 2: One fill with flipped 28th bit - *

- * Testing creation of one fill with no flipped bits - */ - @Test - public void testUnion8() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - if (i != 27) { - set1.add(i); - } - } - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - if (i != 28) { - set2.add(i); - } - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1000; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: Literal and one fill - * Set 2: One fill with flipped 28th bit - *

- * Testing creation of one fill with correct flipped bit - */ - @Test - public void testUnion9() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - if (!(i == 27 || i == 28)) { - set1.add(i); - } - } - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - if (i != 28) { - set2.add(i); - } - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1000; i++) { - if (i != 28) { - expected.add(i); - } - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: Multiple literals - * Set 2: Multiple literals - *

- * Testing merge of pure sequences of literals - */ - @Test - public void testUnion10() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i += 2) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 1; i < 1000; i += 2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1000; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: Multiple literals - * Set 2: Zero fill and literal - *

- * Testing skipping of zero fills - */ - @Test - public void testUnion11() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i += 2) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - set2.add(10000); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1000; i += 2) { - expected.add(i); - } - expected.add(10000); - - verifyUnion(expected, sets); - } - - /** - * Set 1: Literal with 4 bits marked - * Set 2: Zero fill with flipped bit 5 - *

- * Testing merge of literal and zero fill with flipped bit - */ - @Test - public void testUnion12() - { - final int[] ints1 = {1, 2, 3, 4}; - final int[] ints2 = {5, 1000}; - final List expected = Arrays.asList(1, 2, 3, 4, 5, 1000); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyUnion(expected, sets); - } - - /** - * Set 1: Literal with bit 0 - * Set 2: One fill with flipped bit 0 - *

- * Testing merge of literal and one fill with flipped bit - */ - @Test - public void testUnion13() - { - List expected = Lists.newArrayList(); - final int[] ints1 = {0}; - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 1; i < 100; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 100; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: Zero fill with flipped bit 0 - * Set 2: One fill with flipped bit 0 - *

- * Testing merge of flipped bits in zero and one fills - */ - @Test - public void testUnion14() - { - List expected = Lists.newArrayList(); - final int[] ints1 = {0, 100}; - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 1; i < 100; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i <= 100; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: Zero fill with flipped bit 1 - * Set 2: Literal with 0th bit marked - * Set 3: One Fill from 1 to 100 with flipped bit 0 - *

- * Testing merge of flipped bits in zero and one fills with a literal - */ - @Test - public void testUnion15() - { - List expected = Lists.newArrayList(); - final int[] ints1 = {1, 100}; - final int[] ints2 = {0}; - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - ConciseSet set3 = new ConciseSet(); - for (int i = 1; i < 100; i++) { - set3.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2), - ImmutableConciseSet.newImmutableFromMutable(set3) - ); - - for (int i = 0; i <= 100; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Testing merge of offset elements - */ - @Test - public void testUnion16() - { - final int[] ints1 = {1001, 1002, 1003}; - final int[] ints2 = {1034, 1035, 1036}; - List expected = Arrays.asList(1001, 1002, 1003, 1034, 1035, 1036); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyUnion(expected, sets); - } - - /** - * Testing merge of same elements - */ - @Test - public void testUnion17() - { - final int[] ints1 = {1, 2, 3, 4, 5}; - final int[] ints2 = {1, 2, 3, 4, 5}; - List expected = Arrays.asList(1, 2, 3, 4, 5); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyUnion(expected, sets); - } - - @Test - public void testUnion18() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - set2.add(1000); - set2.add(10000); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1001; i++) { - expected.add(i); - } - expected.add(10000); - - verifyUnion(expected, sets); - } - - /** - * Set 1: one fill, all ones literal - * Set 2: zero fill, one fill, literal - */ - @Test - public void testUnion19() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 93; i++) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i = 62; i < 1000; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 1000; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - /** - * Set 1: literal, one fill, literal - * Set 2: zero fill, literal that falls within the one fill above, one fill that falls in one fill above, one fill - */ - @Test - public void testUnion20() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 5; i++) { - set1.add(i); - } - for (int i = 31; i < 1000; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 62; i < 68; i++) { - set2.add(i); - } - for (int i = 800; i < 1000; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 0; i < 5; i++) { - expected.add(i); - } - for (int i = 31; i < 1000; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - @Test - public void testUnion21() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 32; i < 93; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 62; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - for (int i = 0; i < 93; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - @Test - public void testUnion22() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 93; i < 1000; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 32; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - for (int i = 0; i < 32; i++) { - expected.add(i); - } - for (int i = 93; i < 1000; i++) { - expected.add(i); - } - - verifyUnion(expected, sets); - } - - private void verifyUnion(List expected, List sets) - { - List actual = Lists.newArrayList(); - ImmutableConciseSet set = ImmutableConciseSet.union(sets); - IntSet.IntIterator itr = set.iterator(); - while (itr.hasNext()) { - actual.add(itr.next()); - } - Assert.assertEquals(expected, actual); - } - - /** - * Testing basic intersection of similar sets - */ - @Test - public void testIntersection1() - { - final int[] ints1 = {33, 100000}; - final int[] ints2 = {33, 100000}; - List expected = Arrays.asList(33, 100000); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyIntersection(expected, sets); - } - - /** - * Set1: literal, zero fill with flip bit, literal - * Set2: literal, zero fill with different flip bit, literal - */ - @Test - public void testIntersection2() - { - final int[] ints1 = {33, 100000}; - final int[] ints2 = {34, 100000}; - List expected = Arrays.asList(100000); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyIntersection(expected, sets); - } - - /** - * Testing intersection of one fills - */ - @Test - public void testIntersection3() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - set1.add(i); - set2.add(i); - expected.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyIntersection(expected, sets); - } - - /** - * Similar to previous test with one bit in the sequence set to zero - */ - @Test - public void testIntersection4() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - set1.add(i); - if (i != 500) { - set2.add(i); - expected.add(i); - } - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyIntersection(expected, sets); - } - - /** - * Testing with disjoint sets - */ - @Test - public void testIntersection5() - { - final int[] ints1 = {33, 100000}; - final int[] ints2 = {34, 200000}; - List expected = Lists.newArrayList(); - - ConciseSet set1 = new ConciseSet(); - for (int i : ints1) { - set1.add(i); - } - ConciseSet set2 = new ConciseSet(); - for (int i : ints2) { - set2.add(i); - } - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - verifyIntersection(expected, sets); - } - - /** - * Set 1: literal, zero fill, literal - * Set 2: one fill, literal that falls within the zero fill above, one fill - */ - @Test - public void testIntersection6() - { - List expected = Lists.newArrayList(); - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 5; i++) { - set1.add(i); - } - for (int i = 1000; i < 1005; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 800; i < 805; i++) { - set2.add(i); - } - for (int i = 806; i < 1005; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - for (int i = 1000; i < 1005; i++) { - expected.add(i); - } - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection7() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 3100; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - set2.add(100); - set2.add(500); - for (int i = 600; i < 700; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(100); - expected.add(500); - for (int i = 600; i < 700; i++) { - expected.add(i); - } - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection8() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 3100; i++) { - set1.add(i); - } - set1.add(4001); - - ConciseSet set2 = new ConciseSet(); - set2.add(100); - set2.add(500); - for (int i = 600; i < 700; i++) { - set2.add(i); - } - set2.add(4001); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(100); - expected.add(500); - for (int i = 600; i < 700; i++) { - expected.add(i); - } - expected.add(4001); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection9() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - set1.add(3005); - set1.add(3008); - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 3007; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(2005); - expected.add(3005); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection10() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 3100; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - - set2.add(500); - set2.add(600); - set2.add(4001); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(500); - expected.add(600); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection11() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - for (int i = 2800; i < 3500; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 3007; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(2005); - for (int i = 2800; i < 3007; i++) { - expected.add(i); - } - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection12() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - for (int i = 2800; i < 3500; i++) { - set1.add(i); - } - set1.add(10005); - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 3007; i++) { - set2.add(i); - } - set2.add(10005); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(2005); - for (int i = 2800; i < 3007; i++) { - expected.add(i); - } - expected.add(10005); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection13() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 100; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection14() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - set2.add(0); - set2.add(3); - set2.add(5); - set2.add(100); - set2.add(101); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(0); - expected.add(3); - expected.add(5); - expected.add(100); - expected.add(101); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection15() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 1000; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - set2.add(0); - set2.add(3); - set2.add(5); - for (int i = 100; i < 500; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(0); - expected.add(3); - expected.add(5); - for (int i = 100; i < 500; i++) { - expected.add(i); - } - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection16() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - - ConciseSet set2 = new ConciseSet(); - set2.add(0); - set2.add(3); - set2.add(5); - set2.add(100); - set2.add(101); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection17() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 0; i < 4002; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - set2.add(4001); - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(4001); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection18() - { - ConciseSet set1 = new ConciseSet(); - for (int i = 32; i < 93; i++) { - set1.add(i); - } - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 62; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - for (int i = 32; i < 62; i++) { - expected.add(i); - } - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersection19() - { - ConciseSet set1 = new ConciseSet(); - set1.add(2005); - - ConciseSet set2 = new ConciseSet(); - for (int i = 0; i < 10000; i++) { - set2.add(i); - } - - List sets = Arrays.asList( - ImmutableConciseSet.newImmutableFromMutable(set1), - ImmutableConciseSet.newImmutableFromMutable(set2) - ); - - List expected = Lists.newArrayList(); - expected.add(2005); - - verifyIntersection(expected, sets); - } - - @Test - public void testIntersectionTerminates() throws Exception - { - verifyIntersection(Arrays.asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet())); - } - - private void verifyIntersection(List expected, List sets) - { - List actual = Lists.newArrayList(); - ImmutableConciseSet set = ImmutableConciseSet.intersection(sets); - IntSet.IntIterator itr = set.iterator(); - while (itr.hasNext()) { - actual.add(itr.next()); - } - Assert.assertEquals(expected, actual); - } - - /** - * Basic complement with no length - */ - @Test - public void testComplement1() - { - final int[] ints = {1, 100}; - List expected = Lists.newArrayList(); - - ConciseSet set = new ConciseSet(); - for (int i : ints) { - set.add(i); - } - - for (int i = 0; i <= 100; i++) { - if (i != 1 && i != 100) { - expected.add(i); - } - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); - } - - /** - * Complement of a single partial word - */ - @Test - public void testComplement2() - { - List expected = Lists.newArrayList(); - - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 15; i++) { - set.add(i); - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); - } - - /** - * Complement of a single partial word with a length set in the same word - */ - @Test - public void testComplement3() - { - List expected = Lists.newArrayList(); - final int length = 21; - - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 15; i++) { - set.add(i); - } - for (int i = 15; i < length; i++) { - expected.add(i); - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of a single partial word with a length set in a different word - */ - @Test - public void testComplement4() - { - List expected = Lists.newArrayList(); - final int length = 41; - - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 15; i++) { - set.add(i); - } - for (int i = 15; i < length; i++) { - expected.add(i); - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of a single partial word with a length set to create a one fill - */ - @Test - public void testComplement5() - { - List expected = Lists.newArrayList(); - final int length = 1001; - - ConciseSet set = new ConciseSet(); - for (int i = 0; i < 15; i++) { - set.add(i); - } - for (int i = 15; i < length; i++) { - expected.add(i); - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of words with a length set to create a one fill - */ - @Test - public void testComplement6() - { - List expected = Lists.newArrayList(); - final int length = 1001; - - ConciseSet set = new ConciseSet(); - for (int i = 65; i <= 100; i++) { - set.add(i); - } - for (int i = 0; i < length; i++) { - if (i < 65 || i > 100) { - expected.add(i); - } - } - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of 2 words with a length in the second word - */ - @Test - public void testComplement7() - { - List expected = Lists.newArrayList(); - final int length = 37; - - ConciseSet set = new ConciseSet(); - for (int i = 0; i <= 35; i++) { - set.add(i); - } - expected.add(36); - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of a one literal with a length set to complement the next bit in the next word - */ - @Test - public void testComplement8() - { - List expected = Lists.newArrayList(); - final int length = 32; - - ConciseSet set = new ConciseSet(); - for (int i = 0; i <= 30; i++) { - set.add(i); - } - expected.add(31); - - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement of a null set with a length - */ - @Test - public void testComplement9() - { - final List lengths = new ArrayList(); - lengths.addAll( - Arrays.asList( - 35, - 31, - 32, - 1, - 0, - 31 * 3, - 1024, - ConciseSetUtils.MAX_ALLOWED_INTEGER - ) - ); - final Random random = new Random(701534702L); - for (int i = 0; i < 10; ++i) { - lengths.add(random.nextInt(ConciseSetUtils.MAX_ALLOWED_INTEGER + 1)); - } - final ImmutableConciseSet emptySet = new ImmutableConciseSet(); - for (final int length : lengths) { - final ImmutableConciseSet complement = ImmutableConciseSet.complement(emptySet, length); - final IntSet.IntIterator intIterator = complement.iterator(); - for (int i = 0; i < length; i++) { - final int n = intIterator.next(); - if (i != n) { - Assert.assertEquals(String.format("Failure at bit [%d] on length [%d]", i, length), i, n); - } - } - NoSuchElementException ex = null; - try { - intIterator.next(); - } - catch (NoSuchElementException e) { - ex = e; - } - Assert.assertNotNull(ex); - } - } - - /** - * Complement of a null set to create a one fill - */ - @Test - public void testComplement10() - { - List expected = Lists.newArrayList(); - final int length = 93; - - for (int i = 0; i < length; i++) { - expected.add(i); - } - - ImmutableConciseSet testSet = new ImmutableConciseSet(); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement with correct last index - */ - @Test - public void testComplement11() - { - List expected = Lists.newArrayList(); - int length = 18930; - for (int i = 0; i < 500; i++) { - expected.add(i); - } - for (int i = 18881; i < length; i++) { - expected.add(i); - } - - ConciseSet set = new ConciseSet(); - for (int i = 500; i <= 18880; i++) { - set.add(i); - } - ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement with empty set and length in first block - */ - @Test - public void testComplement12() - { - List expected = Lists.newArrayList(); - int length = 10; - for (int i = 0; i < 10; i++) { - expected.add(i); - } - - ImmutableConciseSet testSet = new ImmutableConciseSet(); - - verifyComplement(expected, testSet, length); - } - - /** - * Complement with empty list of some length - */ - @Test - public void testComplement13() - { - List expected = Lists.newArrayList(); - int length = 10; - for (int i = 0; i < length; i++) { - expected.add(i); - } - ImmutableConciseSet testSet = new ImmutableConciseSet(); - - verifyComplement(expected, testSet, length); - } - - private void verifyComplement(List expected, ImmutableConciseSet set, int endIndex) - { - List actual = Lists.newArrayList(); - - ImmutableConciseSet res; - if (endIndex == NO_COMPLEMENT_LENGTH) { - res = ImmutableConciseSet.complement(set); - } else { - res = ImmutableConciseSet.complement(set, endIndex); - } - - IntSet.IntIterator itr = res.iterator(); - while (itr.hasNext()) { - actual.add(itr.next()); - } - Assert.assertEquals(expected, actual); - } - - @Test - public void testContains() - { - final ConciseSet conciseSet = new ConciseSet(); - final Random random = new Random(543167436715430L); - final Set integerSet = new HashSet<>(); - int max = -1; - for (int i = 0; i < 100; ++i) { - final int j = random.nextInt(1 << 20); - integerSet.add(j); - conciseSet.add(j); - if (j > max) { - max = j; - } - } - final ImmutableConciseSet immutableConciseSet = ImmutableConciseSet.newImmutableFromMutable(conciseSet); - for (int i = 0; i < max + 10; ++i) { - final String s = Integer.toString(i); - Assert.assertEquals(s, integerSet.contains(i), conciseSet.contains(i)); - Assert.assertEquals(s, integerSet.contains(i), immutableConciseSet.contains(i)); - } - } -} diff --git a/pom.xml b/pom.xml index eaa2df9d7e2e..ad2814b77097 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,6 @@ aws-common java-util bytebuffer-collections - extendedset extensions-core/avro-extensions extensions-core/datasketches diff --git a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java index 42b6285500ea..c5ef97f594d2 100644 --- a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java +++ b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java @@ -19,14 +19,15 @@ package io.druid.segment.data; +import java.nio.ByteBuffer; + import com.google.common.collect.Ordering; + import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.ConciseBitmapFactory; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.collections.bitmap.WrappedImmutableConciseBitmap; -import io.druid.extendedset.intset.ImmutableConciseSet; - -import java.nio.ByteBuffer; +import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; /** */ From 023e26a4f8e3093c55130bfd8134086e3363306c Mon Sep 17 00:00:00 2001 From: dwivedi Date: Wed, 2 Nov 2016 12:29:44 -0700 Subject: [PATCH 4/5] Remove extendedset from NOTICE. --- NOTICE | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NOTICE b/NOTICE index 867fd9ee586a..249f99e12901 100644 --- a/NOTICE +++ b/NOTICE @@ -31,6 +31,7 @@ This product contains a modified version of Metamarkets java-util library * COMMIT TAG: * https://github.com/metamx/java-util/commit/826021f +<<<<<<< Upstream, based on origin/master This product contains a modified version of TestNG 6.8.7 * LICENSE: * http://testng.org/license/ (Apache License, Version 2.0) @@ -45,6 +46,8 @@ This product contains a modified version of Metamarkets extendedset library * COMMIT TAG: * https://github.com/metamx/extendedset/commit/c9d647d +======= +>>>>>>> db11650 Remove extendedset from NOTICE. This product contains a modified version of Metamarkets bytebuffer-collections library * LICENSE: * https://github.com/metamx/bytebuffer-collections/blob/master/LICENSE (Apache License, Version 2.0) From 904088857ef6eae40e8eedcf8f894ad0bb16226e Mon Sep 17 00:00:00 2001 From: dwivedi Date: Tue, 8 Nov 2016 12:59:43 -0800 Subject: [PATCH 5/5] resolving code conflicts and removing

from bytebuffer-collections. --- NOTICE | 11 ------- .../druid/benchmark/LikeFilterBenchmark.java | 10 +++---- .../collections/bitmap/BitmapFactory.java | 2 +- .../collections/bitmap/ImmutableBitmap.java | 6 ++-- .../collections/bitmap/MutableBitmap.java | 10 +++---- .../collections/spatial/ImmutableNode.java | 2 +- .../io/druid/collections/spatial/RTree.java | 30 +++++++++---------- .../spatial/search/PolygonBound.java | 2 +- .../spatial/split/GutmanSplitStrategy.java | 8 ++--- .../split/LinearGutmanSplitStrategy.java | 10 +++---- .../io/druid/segment/filter/LikeFilter.java | 3 +- 11 files changed, 42 insertions(+), 52 deletions(-) diff --git a/NOTICE b/NOTICE index 249f99e12901..1f91649e5559 100644 --- a/NOTICE +++ b/NOTICE @@ -31,23 +31,12 @@ This product contains a modified version of Metamarkets java-util library * COMMIT TAG: * https://github.com/metamx/java-util/commit/826021f -<<<<<<< Upstream, based on origin/master This product contains a modified version of TestNG 6.8.7 * LICENSE: * http://testng.org/license/ (Apache License, Version 2.0) * HOMEPAGE: * http://testng.org/ - -This product contains a modified version of Metamarkets extendedset library - * LICENSE: - * https://github.com/metamx/extendedset/blob/master/LICENSE (Apache License, Version 2.0) - * HOMEPAGE: - * https://github.com/metamx/extendedset - * COMMIT TAG: - * https://github.com/metamx/extendedset/commit/c9d647d -======= ->>>>>>> db11650 Remove extendedset from NOTICE. This product contains a modified version of Metamarkets bytebuffer-collections library * LICENSE: * https://github.com/metamx/bytebuffer-collections/blob/master/LICENSE (Apache License, Version 2.0) diff --git a/benchmarks/src/main/java/io/druid/benchmark/LikeFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/LikeFilterBenchmark.java index def4f51f50ef..a5d0e54b2101 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/LikeFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/LikeFilterBenchmark.java @@ -21,11 +21,11 @@ import com.google.common.base.Function; import com.google.common.collect.FluentIterable; -import com.metamx.collections.bitmap.BitmapFactory; -import com.metamx.collections.bitmap.ImmutableBitmap; -import com.metamx.collections.bitmap.MutableBitmap; -import com.metamx.collections.bitmap.RoaringBitmapFactory; -import com.metamx.collections.spatial.ImmutableRTree; +import io.druid.collections.bitmap.BitmapFactory; +import io.druid.collections.bitmap.ImmutableBitmap; +import io.druid.collections.bitmap.MutableBitmap; +import io.druid.collections.bitmap.RoaringBitmapFactory; +import io.druid.collections.spatial.ImmutableRTree; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.Filter; diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java index 4abcdaf5d4ca..21c034a133a9 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/BitmapFactory.java @@ -37,7 +37,7 @@ public interface BitmapFactory /** * Given a ByteBuffer pointing at a serialized version of a bitmap, * instantiate an immutable mapped bitmap. - *

+ * * When using RoaringBitmap (with the RoaringBitmapFactory class), it is not * necessary for b.limit() to indicate the end of the serialized content * whereas it is critical to set b.limit() appropriately with ConciseSet (with diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java index 11cfe057f57c..7438aafff121 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ImmutableBitmap.java @@ -57,7 +57,7 @@ public interface ImmutableBitmap /** * Compute the bitwise-or of this bitmap with another bitmap. A new bitmap is generated. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param otherBitmap other bitmap @@ -66,7 +66,7 @@ public interface ImmutableBitmap /** * Compute the bitwise-and of this bitmap with another bitmap. A new bitmap is generated. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param otherBitmap other bitmap @@ -75,7 +75,7 @@ public interface ImmutableBitmap /** * Compute the bitwise-andNot of this bitmap with another bitmap. A new bitmap is generated. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param otherBitmap other bitmap diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java index bb5e8054cebe..5f39cddeebe1 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/MutableBitmap.java @@ -34,7 +34,7 @@ public interface MutableBitmap extends ImmutableBitmap /** * Compute the bitwise-or of this bitmap with another bitmap. The current * bitmap is modified whereas the other bitmap is left intact. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param mutableBitmap other bitmap @@ -44,7 +44,7 @@ public interface MutableBitmap extends ImmutableBitmap /** * Compute the bitwise-and of this bitmap with another bitmap. The current * bitmap is modified whereas the other bitmap is left intact. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param mutableBitmap other bitmap @@ -55,7 +55,7 @@ public interface MutableBitmap extends ImmutableBitmap /** * Compute the bitwise-xor of this bitmap with another bitmap. The current * bitmap is modified whereas the other bitmap is left intact. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param mutableBitmap other bitmap @@ -65,7 +65,7 @@ public interface MutableBitmap extends ImmutableBitmap /** * Compute the bitwise-andNot of this bitmap with another bitmap. The current * bitmap is modified whereas the other bitmap is left intact. - *

+ * * Note that the other bitmap should be of the same class instance. * * @param mutableBitmap other bitmap @@ -100,7 +100,7 @@ public interface MutableBitmap extends ImmutableBitmap * Write out a serialized (Immutable) version of the bitmap to the ByteBuffer. We preprend * the serialized bitmap with a 4-byte int indicating the size in bytes. Thus * getSizeInBytes() + 4 bytes are written. - *

+ * * (These 4 bytes are required by ConciseSet but not by RoaringBitmap. * Nevertheless, we always write them for the sake of simplicity, even if it * wastes 4 bytes in some instances.) diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java index a11a8c90e7b0..6f5cf938fff0 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/ImmutableNode.java @@ -36,7 +36,7 @@ * 2 + numDims * Floats.BYTES to 2 + 2 * numDims * Floats.BYTES : maxCoordinates * concise set * rest (children) : Every 4 bytes is storing an offset representing the position of a child. - *

+ * * The child offset is an offset from the initialOffset */ public class ImmutableNode diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java index e3d9b08032ee..68398aa69bec 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/RTree.java @@ -29,7 +29,7 @@ /** * This RTree has been optimized to work with bitmap inverted indexes. - *

+ * * This code will probably make a lot more sense if you read: * http://www.sai.msu.su/~megera/postgres/gist/papers/gutman-rtree.pdf */ @@ -61,18 +61,18 @@ public BitmapFactory getBitmapFactory() /** * This description is from the original paper. - *

+ * * Algorithm Insert: Insert a new index entry E into an R-tree. - *

+ * * I1. [Find position for new record]. Invoke {@link #chooseLeaf(Node, Point)} to select * a leaf node L in which to place E. - *

+ * * I2. [Add records to leaf node]. If L has room for another entry, install E. Otherwise invoke * {@link SplitStrategy} split methods to obtain L and LL containing E and all the old entries of L. - *

+ * * I3. [Propagate changes upward]. Invoke {@link #adjustTree(Node, Node)} on L, also passing LL if a split was * performed. - *

+ * * I4. [Grow tree taller]. If node split propagation caused the root to split, create a new record whose * children are the two resulting nodes. * @@ -152,17 +152,17 @@ private void insertInner(Point point) /** * This description is from the original paper. - *

+ * * Algorithm ChooseLeaf. Select a leaf node in which to place a new index entry E. - *

+ * * CL1. [Initialize]. Set N to be the root node. - *

+ * * CL2. [Leaf check]. If N is a leaf, return N. - *

+ * * CL3. [Choose subtree]. If N is not a leaf, let F be the entry in N whose rectangle * FI needs least enlargement to include EI. Resolve ties by choosing the entry with the rectangle * of smallest area. - *

+ * * CL4. [Descend until a leaf is reached]. Set N to be the child node pointed to by Fp and repeated from CL2. * * @param node - current node to evaluate @@ -198,14 +198,14 @@ private Node chooseLeaf(Node node, Point point) /** * This description is from the original paper. - *

+ * * AT1. [Initialize]. Set N=L. If L was split previously, set NN to be the resulting second node. - *

+ * * AT2. [Check if done]. If N is the root, stop. - *

+ * * AT3. [Adjust covering rectangle in parent entry]. Let P be the parent node of N, and let Ev(N)I be N's entry in P. * Adjust Ev(N)I so that it tightly encloses all entry rectangles in N. - *

+ * * AT4. [Propagate node split upward]. If N has a partner NN resulting from an earlier split, create a new entry * Ev(NN) with Ev(NN)p pointing to NN and Ev(NN)I enclosing all rectangles in NN. Add Ev(NN) to p is there is room. * Otherwise, invoke {@link SplitStrategy} split to product p and pp containing Ev(NN) and all p's old entries. diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java index ec870b9f0936..6bc9bb26eedd 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/search/PolygonBound.java @@ -84,7 +84,7 @@ private static float[] getMaxCoords(float[] abscissa, float[] ordinate) * abscissa[i] is the horizontal coordinate for the i'th corner of the polygon, * and ordinate[i] is the vertical coordinate for the i'th corner. * The polygon must have more than 2 corners, so the length of abscissa or ordinate must be equal or greater than 3. - *

+ * * if the polygon is a rectangular, which corners are {0.0, 0.0}, {0.0, 1.0}, {1.0, 1.0}, {1.0, 0.0}, * the abscissa should be {0.0, 0.0, 1.0, 1.0} and ordinate should be {0.0, 1.0, 1.0, 0.0} */ diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java index ba221ac017ee..fb425f46e100 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/GutmanSplitStrategy.java @@ -50,15 +50,15 @@ public boolean needToSplit(Node node) /** * This algorithm is from the original paper. - *

+ * * Algorithm Split. Divide a set of M+1 index entries into two groups. - *

+ * * S1. [Pick first entry for each group]. Apply Algorithm {@link #pickSeeds(java.util.List)} to choose * two entries to be the first elements of the groups. Assign each to a group. - *

+ * * S2. [Check if done]. If all entries have been assigned, stop. If one group has so few entries that all the rest * must be assigned to it in order for it to have the minimum number m, assign them and stop. - *

+ * * S3. [Select entry to assign]. Invoke Algorithm {@link #pickNext(java.util.List, Node[])} * to choose the next entry to assign. Add it to the group whose covering rectangle will have to be enlarged least to * accommodate it. Resolve ties by adding the entry to the group smaller area, then to the one with fewer entries, then diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java index df61f01060f9..a193f466cff5 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/spatial/split/LinearGutmanSplitStrategy.java @@ -35,15 +35,15 @@ public LinearGutmanSplitStrategy(int minNumChildren, int maxNumChildren, BitmapF /** * This algorithm is from the original paper. - *

+ * * Algorithm LinearPickSeeds. Select two entries to be the first elements of the groups. - *

+ * * LPS1. [Find extreme rectangles along all dimensions]. Along each dimension, find the entry whose rectangle has * the highest low side, and the one with the lowest high side. Record the separation. - *

+ * * LPS2. [Adjust for shape of the rectangle cluster]. Normalize the separations by dividing by the width of the * entire set along the corresponding dimension. - *

+ * * LPS3. [Select the most extreme pair]. Choose the pair with the greatest normalized separation along any dimension. * * @param nodes - nodes to choose from @@ -103,7 +103,7 @@ public Node[] pickSeeds(List nodes) /** * This algorithm is from the original paper. - *

+ * * Algorithm LinearPickNext. PickNext simply choose any of the remaining entries. * * @param nodes - remaining nodes diff --git a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java index 7814eb72453a..00ba94728549 100644 --- a/processing/src/main/java/io/druid/segment/filter/LikeFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/LikeFilter.java @@ -20,7 +20,8 @@ package io.druid.segment.filter; import com.google.common.base.Strings; -import com.metamx.collections.bitmap.ImmutableBitmap; + +import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter;