diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java deleted file mode 100644 index bfa696cad..000000000 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.bloomfilter; - -import static org.apache.datasketches.common.Util.LS; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Buffer; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; - -/** - * This class holds an array of bits suitable for use in a Bloom Filter - * - *
Rounds the number of bits up to the smallest multiple of 64 (one long) - * that is not smaller than the specified number. - */ -abstract class BitArray { - // MAX_BITS using longs, based on array indices being capped at Integer.MAX_VALUE - protected static final long MAX_BITS = Integer.MAX_VALUE * (long) Long.SIZE; - - protected BitArray() {} - - static BitArray heapify(final Buffer mem, final boolean isEmpty) { - return HeapBitArray.heapify(mem, isEmpty); - } - - static BitArray wrap(final Memory mem, final boolean isEmpty) { - return DirectBitArrayR.wrap(mem, isEmpty); - } - - static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { - return DirectBitArray.writableWrap(wmem, isEmpty); - } - - boolean isEmpty() { - return !isDirty() && getNumBitsSet() == 0; - } - - abstract boolean hasMemory(); - - abstract boolean isDirect(); - - abstract boolean isReadOnly(); - - abstract boolean getBit(final long index); - - abstract boolean getAndSetBit(final long index); - - abstract void setBit(final long index); - - abstract long getNumBitsSet(); - - abstract void reset(); - - abstract long getCapacity(); - - abstract int getArrayLength(); - - abstract void union(final BitArray other); - - abstract void intersect(final BitArray other); - - abstract void invert(); - - // prints the raw BitArray as 0s and 1s, one long per row - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < getArrayLength(); ++i) { - sb.append(i + ": ") - .append(printLong(getLong(i))) - .append(LS); - } - return sb.toString(); - } - - long getSerializedSizeBytes() { - // We only really need an int for array length but this will keep everything - // aligned to 8 bytes. - // Always write array length, but write numBitsSet only if empty - return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); - } - - // returns the number of bytes needed for a non-empty BitArray of the requested size - static long getSerializedSizeBytes(final long numBits) { - if (numBits <= 0) { - throw new SketchesArgumentException("Requested number of bits must be strictly positive"); - } - if (numBits > MAX_BITS) { - throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " - + "Requested: " + numBits + ", maximum: " + MAX_BITS); - } - final int numLongs = (int) Math.ceil(numBits / 64.0); - return Long.BYTES * (numLongs + 2L); - } - - abstract protected boolean isDirty(); - - // used to get a long from the array regardless of underlying storage - // NOT used to query individual bits - abstract protected long getLong(final int arrayIndex); - - // used to set a long in the array regardless of underlying storage - // NOT used to set individual bits - abstract protected void setLong(final int arrayIndex, final long value); - - // prints a long as a series of 0s and 1s as little endian - protected static String printLong(final long val) { - final StringBuilder sb = new StringBuilder(); - for (int j = 0; j < Long.SIZE; ++j) { - sb.append((val & (1L << j)) != 0 ? "1" : "0"); - if (j % 8 == 7) { sb.append(" "); } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index 3ea73b9bd..10829d7b7 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -26,6 +26,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.filters.common.BitArray; +import org.apache.datasketches.filters.common.DirectBitArray; +import org.apache.datasketches.filters.common.HeapBitArray; import org.apache.datasketches.memory.Buffer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; diff --git a/src/main/java/org/apache/datasketches/filters/common/BitArray.java b/src/main/java/org/apache/datasketches/filters/common/BitArray.java new file mode 100644 index 000000000..8320a369f --- /dev/null +++ b/src/main/java/org/apache/datasketches/filters/common/BitArray.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.common; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Buffer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class holds an array of bits and should be suitable for use in + * the various membership filters. The representation is not compressed and + * is designed to fit in a single array, meaning that the maximum number + * of bits is limited by the maximize size of an array of longs in Java. + * + *
Rounds the number of bits up to the smallest multiple of 64 (one long) + * that is not smaller than the specified number. + */ +public abstract class BitArray { + + /** + * The maximum number of bits that can be represented using longs, + * based on array indices being capped at Integer.MAX_VALUE + * and allowing room for encoding both the size and the number of bits set. + */ + protected static final long MAX_BITS = (Integer.MAX_VALUE - 1) * (long) Long.SIZE; + + /** + * Constructs a new BitArray. + */ + BitArray() {} + + /** + * Creates a BitArray from a given Buffer. + * + * @param mem The Buffer to heapify. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The heapified BitArray. + */ + public static BitArray heapify(final Buffer mem, final boolean isEmpty) { + return HeapBitArray.heapify(mem, isEmpty); + } + + /** + * Creates a BitArray from a given Memory. + * + * @param mem The Memory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The wrapped BitArray. + */ + public static BitArray wrap(final Memory mem, final boolean isEmpty) { + return DirectBitArrayR.wrap(mem, isEmpty); + } + + /** + * Creates a writable BitArray from a given WritableMemory. + * + * @param wmem The WritableMemory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The writable wrapped BitArray. + */ + public static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { + return DirectBitArray.writableWrap(wmem, isEmpty); + } + + /** + * Checks if the BitArray is empty. + * + * @return True if the BitArray is empty, false otherwise. + */ + public boolean isEmpty() { + return !isDirty() && getNumBitsSet() == 0; + } + + /** + * Checks if the BitArray has a backing Memory. + * + * @return True if the BitArray has a backing Memory, false otherwise. + */ + public abstract boolean hasMemory(); + + /** + * Checks if the BitArray is direct. + * + * @return True if the BitArray is direct, false otherwise. + */ + public abstract boolean isDirect(); + + /** + * Checks if the BitArray is read-only. + * + * @return True if the BitArray is read-only, false otherwise. + */ + public abstract boolean isReadOnly(); + + /** + * Gets the value of a bit at the specified index. + * + * @param index The index of the bit. + * @return The value of the bit at the specified index. + */ + public abstract boolean getBit(final long index); + + /** + * Gets the a specified number of bits starting at the given index. Limited + * to a single long (64 bits). + * + * @param index The starting index. + * @param numBits The number of bits to return. + * @return The value of the requested bits, starting at bit 0 of the result. + */ + public abstract long getBits(final long index, final int numBits); + + /** + * Gets the value of a bit at the specified index and sets it to true. + * + * @param index The index of the bit. + * @return The previous value of the bit at the specified index. + */ + public abstract boolean getAndSetBit(final long index); + + /** + * Assigns the value of a bit at the specified index to true. + * + * @param index The index of the bit. + */ + public abstract void setBit(final long index); + + /** + * Assigns the value of a bit at the specified index to false. + * + * @param index The index of the bit. + */ + public abstract void clearBit(final long index); + + /** + * Assigns the given value of a bit at the specified index. + * + * @param index The index of the bit. + * @param value The value to set the bit to. + */ + public abstract void assignBit(final long index, final boolean value); + + /** + /** + * Sets {@code numBits} starting from {@code index} to the specified value. + * Limited to a single long (64 bits). + * + * @param index the starting index of the range (inclusive) + * @param numBits the number of bits to write + * @param bits the value to set the bits to, starting with bit 0 + */ + public abstract void setBits(final long index, final int numBits, final long bits); + + /** + * Gets the number of bits that are set to true in the BitArray. + * + * @return The number of bits set to true. + */ + public abstract long getNumBitsSet(); + + /** + * Resets the BitArray, setting all bits to false. + */ + public abstract void reset(); + + /** + * Gets the capacity of the BitArray in bits. + * + * @return The capacity of the BitArray in bits + */ + public abstract long getCapacity(); + + /** + * Gets the length of the underlying array in longs. + * + * @return The length of the underlying array in longs. + */ + public abstract int getArrayLength(); + + /** + * Performs a union operation with another BitArray. + * + * @param other The other BitArray to perform the union with. + */ + public abstract void union(final BitArray other); + + /** + * Performs an intersection operation with another BitArray. + * + * @param other The other BitArray to perform the intersection with. + */ + public abstract void intersect(final BitArray other); + + /** + * Inverts the BitArray, flipping all bits. + */ + public abstract void invert(); + + /** + * Returns a string representation of the BitArray. + * + * @return A string representation of the BitArray. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < getArrayLength(); ++i) { + sb.append(i + ": ") + .append(printLong(getLong(i))) + .append(LS); + } + return sb.toString(); + } + + /** + * Gets the serialized size of the BitArray in bytes. + * + * @return The serialized size of the BitArray in bytes. + */ + public long getSerializedSizeBytes() { + // We only really need an int for array length but this will keep everything + // aligned to 8 bytes. + // Always write array length, but write numBitsSet only if empty + return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); + } + + /** + * Gets the serialized size of a non-empty BitArray of the specified size in bytes. + * + * @param numBits The number of bits in the BitArray. + * @return The serialized size of the BitArray in bytes. + * @throws SketchesArgumentException If the requested number of bits is not strictly positive + * or exceeds the maximum allowed. + */ + public static long getSerializedSizeBytes(final long numBits) { + if (numBits <= 0) { + throw new SketchesArgumentException("Requested number of bits must be strictly positive"); + } + if (numBits > MAX_BITS) { + throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " + + "Requested: " + numBits + ", maximum: " + MAX_BITS); + } + final int numLongs = (int) Math.ceil(numBits / 64.0); + return Long.BYTES * (numLongs + 2L); + } + + /** + * Checks if the BitArray has changes not reflected in state variables. + * + * @return True if the BitArray is dirty, false otherwise. + */ + abstract boolean isDirty(); + + /** + * Gets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @return The long value at the specified array index. + */ + abstract long getLong(final int arrayIndex); + + /** + * Sets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @param value The value to set the long to. + */ + abstract void setLong(final int arrayIndex, final long value); + + /** + * Returns a string representation of a long value as a series of 0s and 1s (little endian). + * + * @param val The long value to print. + * @return A string representation of the long value. + */ + public static String printLong(final long val) { + final StringBuilder sb = new StringBuilder(); + for (int j = 0; j < Long.SIZE; ++j) { + sb.append((val & (1L << j)) != 0 ? "1" : "0"); + if (j % 8 == 7) { sb.append(" "); } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java similarity index 62% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java index 77c24f027..25521672e 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java @@ -17,21 +17,21 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; -final class DirectBitArray extends DirectBitArrayR { +public final class DirectBitArray extends DirectBitArrayR { - DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { super(dataLength, 0, wmem); // we'll set numBitsSet_ ourselves so pass 0 // can recompute later if needed numBitsSet_ = storedNumBitsSet; } - DirectBitArray(final int dataLength, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final WritableMemory wmem) { super(dataLength, 0, wmem); wmem_.putInt(0, dataLength_); @@ -39,7 +39,7 @@ final class DirectBitArray extends DirectBitArrayR { wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } - static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { + public static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { if (numBits <= 0) { throw new SketchesArgumentException("Number of bits must be strictly positive. Found: " + numBits); } @@ -58,7 +58,7 @@ static DirectBitArray initialize(final long numBits, final WritableMemory wmem) return new DirectBitArray(arrayLength, wmem); } - static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { + public static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -81,7 +81,7 @@ static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmp } @Override - long getNumBitsSet() { + public long getNumBitsSet() { // update numBitsSet and store in array if (isDirty()) { numBitsSet_ = 0; @@ -95,17 +95,17 @@ long getNumBitsSet() { } @Override - protected boolean isDirty() { + public boolean isDirty() { return numBitsSet_ == -1; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -115,21 +115,83 @@ public boolean isReadOnly() { } @Override - void reset() { + public void reset() { setNumBitsSet(0); wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } @Override - void setBit(final long index) { + public void setBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte val = wmem_.getByte(memoryOffset); - wmem_.setBits(memoryOffset, (byte) (val | (1 << (index & 0x07)))); + wmem_.putByte(memoryOffset, (byte) (val | (1 << (index & 0x07)))); setNumBitsSet(-1); // mark dirty } @Override - boolean getAndSetBit(final long index) { + public void clearBit(final long index) { + final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); + final byte val = wmem_.getByte(memoryOffset); + wmem_.putByte(memoryOffset, (byte) (val & ~(1 << (index & 0x07)))); + setNumBitsSet(-1); // mark dirty + } + + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + + setNumBitsSet(-1); // mark dirty + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + final long maskedVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & ~mask; + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedVal | ((bits << fromOffset) & mask)); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = (1L << fromOffset) - 1; // inverse mask in this case + final long toMask = (1L << (toOffset + 1)) - 1; + + final long maskedFromVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & fromMask; + final long maskedToVal = wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & ~toMask; + + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedFromVal | ((bits << fromOffset) & ~fromMask)); + wmem_.putLong(DATA_OFFSET + (toIndex << 3), maskedToVal | ((bits >>> splitBit) & toMask)); + } + + @Override + public boolean getAndSetBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte mask = (byte) (1 << (index & 0x07)); final byte val = wmem_.getByte(memoryOffset); @@ -143,7 +205,7 @@ boolean getAndSetBit(final long index) { } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -158,7 +220,7 @@ void intersect(final BitArray other) { } @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -173,7 +235,7 @@ void union(final BitArray other) { } @Override - void invert() { + public void invert() { if (isDirty()) { numBitsSet_ = 0; for (int i = 0; i < dataLength_; ++i) { @@ -191,7 +253,7 @@ void invert() { } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { wmem_.putLong(DATA_OFFSET + (arrayIndex << 3), value); } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java similarity index 58% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java index 8acc36be2..6d0d4bad3 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; @@ -35,7 +35,7 @@ public class DirectBitArrayR extends BitArray { final protected WritableMemory wmem_; // for inheritance; we won't write to it protected long numBitsSet_; // could be final here but writable direct will update it - protected DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { + public DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { super(); dataLength_ = dataLength; @@ -53,7 +53,7 @@ protected DirectBitArrayR(final int dataLength, final long storedNumBitsSet, fin // assumes we have a region with only the portion of Memory // the BitArray cares about - static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { + public static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -71,34 +71,73 @@ static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { } @Override - long getCapacity() { + public long getCapacity() { return (long) dataLength_ * Long.SIZE; } @Override - long getNumBitsSet() { + public long getNumBitsSet() { return numBitsSet_; } @Override - protected boolean isDirty() { + public boolean isDirty() { // read-only so necessarily false return false; } @Override - int getArrayLength() { + public int getArrayLength() { return dataLength_; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { if (isEmpty()) { return false; } return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (isEmpty()) { return 0L; } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = ~((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & fromMask) >>> fromOffset; + result |= (wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & toMask) << splitBit; + return result; + } + + @Override + long getLong(final int arrayIndex) { if (isEmpty()) { return 0L; } return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -119,37 +158,52 @@ public boolean isReadOnly() { } @Override - void reset() { + public void reset() { throw new SketchesReadOnlyException("Attempt to call reset() on read-only memory"); } @Override - void setBit(final long index) { + public void setBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); + } + + @Override + public void clearBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call clearBit() on read-only memory"); + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + throw new SketchesReadOnlyException("Attempt to call setBits() on read-only memory"); + } + + @Override + public void assignBit(final long index, final boolean value) { throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); } @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { throw new SketchesReadOnlyException("Attempt to call getAndSetBit() on read-only memory"); } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call intersect() on read-only memory"); } @Override - void union(final BitArray other) { + public void union(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call union() on read-only memory"); } @Override - void invert() { + public void invert() { throw new SketchesReadOnlyException("Attempt to call invert() on read-only memory"); } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { throw new SketchesReadOnlyException("Attempt to call setLong() on read-only memory"); } } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java similarity index 57% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java index 4048b6775..ca81ae073 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import java.util.Arrays; @@ -31,13 +31,13 @@ *
Rounds the number of bits up to the smallest multiple of 64 (one long) * that is not smaller than the specified number. */ -final class HeapBitArray extends BitArray { +public final class HeapBitArray extends BitArray { private long numBitsSet_; // if -1, need to recompute value private boolean isDirty_; final private long[] data_; // creates an array of a given size - HeapBitArray(final long numBits) { + public HeapBitArray(final long numBits) { super(); if (numBits <= 0) { @@ -54,7 +54,7 @@ final class HeapBitArray extends BitArray { } // uses the provided array - HeapBitArray(final long numBitsSet, final long[] data) { + public HeapBitArray(final long numBitsSet, final long[] data) { super(); data_ = data; @@ -64,7 +64,7 @@ final class HeapBitArray extends BitArray { // reads a serialized image, but the BitArray is not fully self-describing so requires // a flag to indicate whether the array is empty - static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { + public static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { final int numLongs = buffer.getInt(); if (numLongs < 0) { throw new SketchesArgumentException("Possible corruption: Must have strictly positive array size. Found: " + numLongs); @@ -85,40 +85,124 @@ static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { } @Override - protected boolean isDirty() { + public boolean isDirty() { return isDirty_; } @Override - boolean hasMemory() { + public boolean hasMemory() { return false; } @Override - boolean isDirect() { + public boolean isDirect() { return false; } @Override - boolean isReadOnly() { return false; } + public boolean isReadOnly() { return false; } // queries a single bit in the array @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (data_[(int) index >>> 6] & (1L << index)) != 0 ? true : false; } + @Override + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return 0; } + + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (data_[fromIndex] & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = ~((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (data_[fromIndex] & fromMask) >>> fromOffset; + result |= (data_[toIndex] & toMask) << splitBit; + return result; + } + // sets a single bit in the array without querying, meaning the method // cannot properly track the number of bits set so set isDirty = true @Override - void setBit(final long index) { + public void setBit(final long index) { data_[(int) index >>> 6] |= 1L << index; isDirty_ = true; } + @Override + public void clearBit(final long index) { + data_[(int) index >>> 6] &= ~(1L << index); + isDirty_ = true; + } + + // assigns a single bit in the array without querying + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return; } + + isDirty_ = true; + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + data_[fromIndex] = (data_[fromIndex] & ~mask) | ((bits << fromOffset) & mask); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = (1L << fromOffset) - 1; // inverse mask in this case + final long toMask = (1L << (toOffset + 1)) - 1; + + data_[fromIndex] = (data_[fromIndex] & fromMask) | ((bits << fromOffset) & ~fromMask); + data_[toIndex] = (data_[toIndex] & ~toMask) | ((bits >>> splitBit) & toMask); + } + // returns existing value of bit @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { final int offset = (int) index >>> 6; final long mask = 1L << index; if ((data_[offset] & mask) != 0) { @@ -134,7 +218,7 @@ boolean getAndSetBit(final long index) { // O(1) if only getAndSetBit() has been used // O(data_.length) if setBit() has ever been used @Override - long getNumBitsSet() { + public long getNumBitsSet() { if (isDirty_) { numBitsSet_ = 0; for (final long val : data_) { @@ -145,14 +229,14 @@ long getNumBitsSet() { } @Override - long getCapacity() { return (long) data_.length * Long.SIZE; } + public long getCapacity() { return (long) data_.length * Long.SIZE; } @Override - int getArrayLength() { return data_.length; } + public int getArrayLength() { return data_.length; } // applies logical OR @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot union bit arrays with unequal lengths"); } @@ -168,7 +252,7 @@ void union(final BitArray other) { // applies logical AND @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -184,7 +268,7 @@ void intersect(final BitArray other) { // applies bitwise inversion @Override - void invert() { + public void invert() { if (isDirty_) { numBitsSet_ = 0; for (int i = 0; i < data_.length; ++i) { @@ -200,7 +284,7 @@ void invert() { } } - void writeToBuffer(final WritableBuffer wbuf) { + public void writeToBuffer(final WritableBuffer wbuf) { wbuf.putInt(data_.length); wbuf.putInt(0); // unused @@ -211,18 +295,18 @@ void writeToBuffer(final WritableBuffer wbuf) { } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return data_[arrayIndex]; } @Override - protected void setLong(final int arrayIndex, final long value) { + public void setLong(final int arrayIndex, final long value) { data_[arrayIndex] = value; } // clears the array @Override - void reset() { + public void reset() { Arrays.fill(data_, 0); numBitsSet_ = 0; isDirty_ = false; diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java deleted file mode 100644 index 658e15f0d..000000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -public abstract class Bitmap { - - public abstract long size(); - public abstract void set(long bit_index, boolean value); - public abstract void setFromTo(long from, long to, long value); - public abstract boolean get(long bit_index); - public abstract long getFromTo(long from, long to); - - public static boolean get_fingerprint_bit(long index, long fingerprint) { - long mask = 1 << index; - long and = fingerprint & mask; - return and != 0; - } -} diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java deleted file mode 100644 index ca387ebc9..000000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -/* -Copyright � 1999 CERN - European Organization for Nuclear Research. -Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose -is hereby granted without fee, provided that the above copyright notice appear in all copies and -that both that copyright notice and this permission notice appear in supporting documentation. -CERN makes no representations about the suitability of this software for any purpose. -It is provided "as is" without expressed or implied warranty. -*/ - -/** - * Implements quick non polymorphic non bounds checking low level bitvector operations. - * Includes some operations that interpret sub-bitstrings as long integers. - *
- * WARNING: Methods of this class do not check preconditions. - * Provided with invalid parameters these method may return (or set) invalid values without throwing any exception. - * You should only use this class when performance is critical and you are absolutely sure that indexes are within bounds. - *
- * A bitvector is modelled as a long array, i.e. long[] bits holds bits of a bitvector.
- * Each long value holds 64 bits.
- * The i-th bit is stored in bits[i/64] at
- * bit position i % 64 (where bit position 0 refers to the least
- * significant bit and 63 refers to the most significant bit).
- *
- * @author wolfgang.hoschek@cern.ch
- * @version 1.0, 09/24/99
- * @see java.util.BitSet
- */
-//package bitmap_implementations;
-
-public class QuickBitVector extends Object {
- protected final static int ADDRESS_BITS_PER_UNIT = 6; // 64=2^6
- protected final static int BITS_PER_UNIT = 64; // = 1 << ADDRESS_BITS_PER_UNIT
- protected final static int BIT_INDEX_MASK = 63; // = BITS_PER_UNIT - 1;
-
- private static final long[] pows = precomputePows(); //precompute bitmasks for speed
- /**
- * Makes this class non instantiable, but still inheritable.
- */
- protected QuickBitVector() {
- }
- /**
- * Returns a bit mask with bits in the specified range set to 1, all the rest set to 0.
- * In other words, returns a bit mask having 0,1,2,3,...,64 bits set.
- * If to-from+1==0 then returns zero (0L).
- * Precondition (not checked): to-from+1 ≥ 0 AND to-from+1 ≤ 64.
- *
- * @param from index of start bit (inclusive)
- * @param to index of end bit (inclusive).
- * @return the bit mask having all bits between from and to set to 1.
- */
- public static final long bitMaskWithBitsSetFromTo(long from, long to) {
- return pows[(int)(to-from+1)] << from;
-
- // This turned out to be slower:
- // 0xffffffffffffffffL == ~0L == -1L == all 64 bits set.
- // int width;
- // return (width=to-from+1) == 0 ? 0L : (0xffffffffffffffffL >>> (BITS_PER_UNIT-width)) << from;
- }
- /**
- * Changes the bit with index bitIndex in the bitvector bits to the "clear" (false) state.
- *
- * @param bits the bitvector.
- * @param bitIndex the index of the bit to be cleared.
- */
- public static void clear(long[] bits, long bitIndex) {
- bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] &= ~(1L << (bitIndex & BIT_INDEX_MASK));
- }
- /**
- * Returns from the bitvector the value of the bit with the specified index.
- * The value is true if the bit with the index bitIndex
- * is currently set; otherwise, returns false.
- *
- * @param bits the bitvector.
- * @param bitIndex the bit index.
- * @return the value of the bit with the specified index.
- */
- public static boolean get(long[] bits, long bitIndex) {
- return ((bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] & (1L << (bitIndex & BIT_INDEX_MASK))) != 0);
- }
- /**
- * Returns a long value representing bits of a bitvector from index from to index to.
- * Bits are returned as a long value with the return value having bit 0 set to bit from, ..., bit to-from set to bit to.
- * All other bits of return value are set to 0.
- * If from > to then returns zero (0L).
- * Precondition (not checked): to-from+1 ≤ 64.
- * @param bits the bitvector.
- * @param from index of start bit (inclusive).
- * @param to index of end bit (inclusive).
- * @return the specified bits as long value.
- */
- public static long getLongFromTo(long[] bits, long from, long to) {
- if (from>to) return 0L;
-
- final int fromIndex = (int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64
- final int toIndex = (int)(to >> ADDRESS_BITS_PER_UNIT);
- final int fromOffset = (int)(from & BIT_INDEX_MASK); //equivalent to from%64
- final int toOffset = (int)(to & BIT_INDEX_MASK);
- //this is equivalent to the above, but slower:
- //final int fromIndex=from/BITS_PER_UNIT;
- //final int toIndex=to/BITS_PER_UNIT;
- //final int fromOffset=from%BITS_PER_UNIT;
- //final int toOffset=to%BITS_PER_UNIT;
-
-
- long mask;
- if (fromIndex==toIndex) { //range does not cross unit boundaries; value to retrieve is contained in one single long value.
- mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset);
- return (bits[fromIndex] & mask) >>> fromOffset;
-
- }
-
- //range crosses unit boundaries; value to retrieve is spread over two long values.
- //get part from first long value
- mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK);
- final long x1=(bits[fromIndex] & mask) >>> fromOffset;
-
- //get part from second long value
- mask=bitMaskWithBitsSetFromTo(0, toOffset);
- final long x2=(bits[toIndex] & mask) << (BITS_PER_UNIT-fromOffset);
-
- //combine
- return x1|x2;
- }
-
- /**
- * Returns the index of the least significant bit in state "true".
- * Returns 32 if no bit is in state "true".
- *
- * Examples:
- *
- * 0x80000000 : 31 - * 0x7fffffff : 0 - * 0x00000001 : 0 - * 0x00000000 : 32 - *- * - * @param value The integer value for which the least significant bit index is to be found. - * @return The index of the least significant bit in state "true". Returns 32 if no bit is in state "true". - */ - static public int leastSignificantBit(int value) { - int i=-1; - while (++i < 32 && (((1<> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - long safe_right_shift = ((nBits-1) >>> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - // System.out.println("Right shift " + right_shift); - //System.out.println("Safe Right shift " + safe_right_shift); - int unitIndex = (int)((nBits-1) >> ADDRESS_BITS_PER_UNIT); // How many multiples of 64 bits do we need to store nBits bits? - //System.out.println(ADDRESS_BITS_PER_UNIT); - long[] bitVector = new long[unitIndex + 1]; - //System.out.println("length " + bitVector.length); - //System.out.println("Total bits: " + (bitVector.length * 64)); - //System.out.println("Num slots available: " + (bitVector.length * 64) / bitsPerElement); - return bitVector; - } - - /** - * Returns the index of the most significant bit in state "true". - * Returns -1 if no bit is in state "true". - * - * Examples: - *
- * 0x80000000 : 31 - * 0x7fffffff : 30 - * 0x00000001 : 0 - * 0x00000000 : -1 - *- * - * @param value The integer value for which the most significant bit index is to be found. - * @return The index of the most significant bit in state "true". Returns -1 if no bit is in state "true". - */ - static public int mostSignificantBit(int value) { - int i=32; - while (--i >=0 && (((1<= 1; ) { - pows[i]=value >>> (BITS_PER_UNIT-i); - } - pows[0]=0L; - return pows; - } - - /** - * Sets the bit with index bitIndex in the bitvector bits to the state specified by value. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be changed. - * @param value the value to be stored in the bit. - */ - public static void put(long[] bits, long bitIndex, boolean value) { - if (value) - set(bits, bitIndex); - else - clear(bits, bitIndex); - } - - /** - * Sets bits of a bitvector from index
from to index to to the bits of value.
- * Bit from is set to bit 0 of value, ..., bit to is set to bit to-from of value.
- * All other bits stay unaffected.
- * If from > to then does nothing.
- * Precondition (not checked): to-from+1 ≤ 64.
- *
- * this function is equivalent to the slower code below:
- * int fromIndex=from/BITS_PER_UNIT;
- * int toIndex=to/BITS_PER_UNIT;
- * int fromOffset=from%BITS_PER_UNIT;
- * int toOffset=to%BITS_PER_UNIT;
- *
- * @param bits the bitvector.
- * @param value the value to be copied into the bitvector.
- * @param from index of start bit (inclusive).
- * @param to index of end bit (inclusive).
- */
- public static void putLongFromTo(long[] bits, long value, long from, long to) {
- if (from>to) return;
-
- final int fromIndex=(int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64
- final int toIndex=(int)(to >> ADDRESS_BITS_PER_UNIT);
- final int fromOffset=(int)(from & BIT_INDEX_MASK); //equivalent to from % 64
- final int toOffset=(int)(to & BIT_INDEX_MASK);
-
- //make sure all unused bits to the left are cleared.
- long mask;
- mask=bitMaskWithBitsSetFromTo(to-from+1, BIT_INDEX_MASK);
- long cleanValue=value & (~mask);
-
- long shiftedValue;
-
- if (fromIndex==toIndex) { //range does not cross unit boundaries; should go into one single long value.
- shiftedValue=cleanValue << fromOffset;
- mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset);
- bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue;
- return;
-
- }
-
- //range crosses unit boundaries; value should go into two long values.
- //copy into first long value.
- shiftedValue=cleanValue << fromOffset;
- mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK);
- bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue;
-
- //copy into second long value.
- shiftedValue=cleanValue >>> (BITS_PER_UNIT - fromOffset);
- mask=bitMaskWithBitsSetFromTo(0, toOffset);
- bits[toIndex] = (bits[toIndex] & (~mask)) | shiftedValue;
- }
-
- /**
- * Changes the bit with index bitIndex in the bitvector bits to the "set" (true) state.
- *
- * @param bits the bitvector.
- * @param bitIndex the index of the bit to be set.
- */
- public static void set(long[] bits, long bitIndex) {
- bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] |= 1L << (bitIndex & BIT_INDEX_MASK);
- }
-
- /**
- * Returns the index of the unit that contains the given bitIndex.
- *
- * @param bitIndex The index of the bit to be checked.
- * @return The index of the unit that contains the given bitIndex.
- */
- protected static long unit(long bitIndex) {
- return bitIndex >> ADDRESS_BITS_PER_UNIT; // equivalent to bitIndex/64
- }
-}
diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java
deleted file mode 100644
index a4c24a3ff..000000000
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.filters.quotientfilter;
-
-public class QuickBitVectorWrapper extends Bitmap {
-
- long[] bs;
-
- public QuickBitVectorWrapper(int bits_per_entry, long num_entries) {
- bs = QuickBitVector.makeBitVector(num_entries, bits_per_entry);
- }
-
- @Override
- public long size() {
- return (long)bs.length * Long.BYTES * 8L;
- }
-
- @Override
- public void set(long bit_index, boolean value) {
- if (value) {
- QuickBitVector.set(bs, bit_index);
- }
- else {
- QuickBitVector.clear(bs, bit_index);
- }
- }
-
- @Override
- public void setFromTo(long from, long to, long value) {
- QuickBitVector.putLongFromTo(bs, value, from, to - 1);
- }
-
- @Override
- public boolean get(long bit_index) {
- return QuickBitVector.get(bs, bit_index);
- }
-
- @Override
- public long getFromTo(long from, long to) {
- return QuickBitVector.getLongFromTo(bs, from, to - 1);
- }
-
-
-}
-
diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
index 93a6761c1..8671dd18f 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
@@ -23,13 +23,16 @@
import java.util.HashSet;
import java.util.Set;
+import org.apache.datasketches.filters.common.BitArray;
+import org.apache.datasketches.filters.common.HeapBitArray;
+
public class QuotientFilter extends Filter {
int bitPerEntry;
int fingerprintLength;
int power_of_two_size;
int num_entries;
- Bitmap filter;
+ BitArray filter;
double expansion_threshold;
long max_entries_before_expansion;
@@ -83,18 +86,19 @@ public void set_expand_autonomously(boolean val) {
expand_autonomously = val;
}
- Bitmap make_filter(long init_size, int bits_per_entry) {
+ BitArray make_filter(long init_size, int bits_per_entry) {
// System.out.println(init_size ) ;
// System.out.println(num_extension_slots);
// System.out.println("Making BitVector with: " + (init_size + num_extension_slots) + "SLOTS");
- return new QuickBitVectorWrapper(bits_per_entry, init_size);
+ //return new QuickBitVectorWrapper(bits_per_entry, init_size);
+ return new HeapBitArray(init_size * bits_per_entry);
}
public int get_fingerprint_length() {
return fingerprintLength;
}
- QuotientFilter(int power_of_two, int bits_per_entry, Bitmap bitmap) {
+ QuotientFilter(int power_of_two, int bits_per_entry, BitArray bitmap) {
power_of_two_size = power_of_two;
bitPerEntry = bits_per_entry;
fingerprintLength = bits_per_entry - 3;
@@ -152,7 +156,7 @@ public long get_num_slots() {
long getMask() {
return get_num_slots() - 1;
}
-
+
// sets the metadata flag bits for a given slot index
void modify_slot(boolean is_occupied, boolean is_continuation, boolean is_shifted,
long index) {
@@ -163,7 +167,7 @@ void modify_slot(boolean is_occupied, boolean is_continuation, boolean is_shifte
// sets the fingerprint for a given slot index
void set_fingerprint(long index, long fingerprint) {
- filter.setFromTo(index * bitPerEntry + 3, (long)index * bitPerEntry + 3 + fingerprintLength, fingerprint);
+ filter.setBits(index * bitPerEntry + 3, fingerprintLength, fingerprint);
}
// print a nice representation of the filter that can be understood.
@@ -185,7 +189,7 @@ public String get_pretty_str(boolean vertical) {
if (remainder == 3) {
sbr.append(" ");
}
- sbr.append(filter.get(i) ? "1" : "0");
+ sbr.append(filter.getBit(i) ? "1" : "0");
}
sbr.append("\n");
return sbr.toString();
@@ -198,12 +202,12 @@ public void pretty_print() {
// return a fingerprint in a given slot index
long get_fingerprint(long index) {
- return filter.getFromTo(index * bitPerEntry + 3, index * bitPerEntry + 3 + fingerprintLength);
+ return filter.getBits(index * bitPerEntry + 3, fingerprintLength);
}
// return an entire slot representation, including metadata flags and fingerprint
long get_slot(long index) {
- return filter.getFromTo(index * bitPerEntry, (index + 1) * bitPerEntry);
+ return filter.getBits(index * bitPerEntry, bitPerEntry);
}
// compare a fingerprint input to the fingerprint in some slot index
@@ -251,27 +255,27 @@ public int get_bits_per_entry() {
}
boolean is_occupied(long index) {
- return filter.get(index * bitPerEntry);
+ return filter.getBit(index * bitPerEntry);
}
boolean is_continuation(long index) {
- return filter.get(index * bitPerEntry + 1);
+ return filter.getBit(index * bitPerEntry + 1);
}
boolean is_shifted(long index) {
- return filter.get(index * bitPerEntry + 2);
+ return filter.getBit(index * bitPerEntry + 2);
}
void set_occupied(long index, boolean val) {
- filter.set(index * bitPerEntry, val);
+ filter.assignBit(index * bitPerEntry, val);
}
void set_continuation(long index, boolean val) {
- filter.set(index * bitPerEntry + 1, val);
+ filter.assignBit(index * bitPerEntry + 1, val);
}
void set_shifted(long index, boolean val) {
- filter.set(index * bitPerEntry + 2, val);
+ filter.assignBit(index * bitPerEntry + 2, val);
}
boolean is_slot_empty(long index) {
@@ -689,7 +693,7 @@ protected boolean _search(long large_hash) {
}
public boolean get_bit_at_offset(int offset) {
- return filter.get(offset);
+ return filter.getBit(offset);
}
public void compute_statistics() {
diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java
index 1f98c82f2..a39712195 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java
@@ -18,7 +18,7 @@
*/
package org.apache.datasketches.filters.quotientfilter;
-import java.util.concurrent.ThreadLocalRandom;
+//import java.util.concurrent.ThreadLocalRandom;
import org.apache.datasketches.common.SketchesArgumentException;
diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java
index 079c9f282..52b1371a9 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java
@@ -26,6 +26,7 @@
import java.util.ArrayList;
import java.util.List;
+import org.apache.datasketches.common.Family;
import org.apache.datasketches.memory.Buffer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableBuffer;
@@ -126,7 +127,7 @@ class ReqSerDe {
enum SerDeFormat { EMPTY, RAWITEMS, EXACT, ESTIMATION }
private static final byte SER_VER = 1;
- private static final byte FAMILY_ID = 17;
+ private static final byte FAMILY_ID = (byte) Family.REQ.getID();
static ReqSketch heapify(final Memory mem) {
final Buffer buff = mem.asBuffer();
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
index 8397c130b..a1ac53c6d 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
@@ -169,7 +169,7 @@ private DirectQuickSelectSketch(
//clear hash table area
dstMem.clear(preambleLongs << 3, 8 << lgArrLongs);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
memReqSvr_ = memReqSvr;
}
@@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l
final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin
final DirectQuickSelectSketch dqss =
new DirectQuickSelectSketch(seed, srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) {
if (actLgRF > 0) { //Expand in current Memory
//lgArrLongs will change; thetaLong, curCount will not
resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Expand in current memory, exit.
@@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) {
memReqSvr_.requestClose(wmem_, newDstMem);
wmem_ = newDstMem;
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs);
+ hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs);
return InsertedCountIncrementedResized;
} //end of Request more memory to resize
} //end of resize
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index c593f52e3..a3ffebc14 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see
final DirectQuickSelectSketchR dqssr =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
- dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqssr;
}
@@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long
final DirectQuickSelectSketchR dqss =
new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem);
- dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs);
return dqss;
}
@@ -276,11 +276,11 @@ UpdateReturnState hashUpdate(final long hash) {
* @return the hash table threshold
*/
@SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments")
- static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
//SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
//but this allows us to tune these constants for different sketches.
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
- return (int) Math.floor(fraction * (1 << lgArrLongs));
+ return (int) (fraction * (1 << lgArrLongs));
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
index 37b615456..b9d4dc9e1 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
@@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float
}
lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS);
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_);
curCount_ = 0;
thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE);
empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false;
@@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see
final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF,
preambleLongs, family);
hqss.lgArrLongs_ = lgArrLongs;
- hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
+ hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = extractCurCount(srcMem);
hqss.thetaLong_ = extractThetaLong(srcMem);
hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem);
@@ -197,7 +197,7 @@ public void reset() {
cache_ = new long[1 << lgArrLongsSM];
lgArrLongs_ = lgArrLongsSM;
}
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
empty_ = true;
curCount_ = 0;
thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE);
@@ -293,7 +293,7 @@ private final void resizeCache() {
curCount_ = newCount;
cache_ = tgtArr;
- hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_);
+ hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_);
}
//array stays the same size. Changes theta and thus count
@@ -318,9 +318,9 @@ private final void quickSelectAndRebuild() {
* @param lgArrLongs See lgArrLongs.
* @return the hash table threshold
*/
- static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
+ private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD;
- return (int) Math.floor(fraction * (1 << lgArrLongs));
+ return (int) (fraction * (1 << lgArrLongs));
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index 888116512..cc1fd4d23 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) {
* @param numberOfEntries the actual number of entries stored with the CompactSketch.
* @return the maximum number of storage bytes required for a CompactSketch with the given number
* of entries.
+ * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
*/
+ @Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
if (numberOfEntries == 0) { return 8; }
if (numberOfEntries == 1) { return 16; }
return (numberOfEntries << 3) + 24;
}
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * log_base2 of the number of nominal entries, which is a power of 2.
+ * @param lgNomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * nomEntries.
+ */
+ public static int getCompactSketchMaxBytes(final int lgNomEntries) {
+ return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD)
+ + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES;
+ }
+
/**
* Returns the maximum number of storage bytes required for an UpdateSketch with the given
* number of nominal entries (power of 2).
diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java
index a5862e4a4..c204751f2 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketches.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketches.java
@@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) {
}
/**
- * Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
- * @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)},
- * {@code numberOfEntries}
- * @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)}
- */
+ * Returns the maximum number of storage bytes required for a CompactSketch with the given
+ * number of actual entries. Note that this assumes the worst case of the sketch in
+ * estimation mode, which requires storing theta and count.
+ * @param numberOfEntries the actual number of entries stored with the CompactSketch.
+ * @return the maximum number of storage bytes required for a CompactSketch with the given number
+ * of entries.
+ * @see Sketch#getMaxCompactSketchBytes(int)
+ * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead}
+ */
+ @Deprecated
public static int getMaxCompactSketchBytes(final int numberOfEntries) {
return Sketch.getMaxCompactSketchBytes(numberOfEntries);
}
+ /**
+ * Returns the maximum number of storage bytes required for a CompactSketch given the configured
+ * number of nominal entries (power of 2).
+ * @param nomEntries Nominal Entries
+ * @return the maximum number of storage bytes required for a CompactSketch with the given
+ * nomEntries.
+ * @see Sketch#getCompactSketchMaxBytes(int)
+ */
+ public static int getCompactSketchMaxBytes(final int nomEntries) {
+ return Sketch.getCompactSketchMaxBytes(nomEntries);
+ }
+
/**
* Ref: {@link SetOperation#getMaxIntersectionBytes(int)}
* @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries}
diff --git a/src/main/java/org/apache/datasketches/tuple/Filter.java b/src/main/java/org/apache/datasketches/tuple/Filter.java
index 28b38295c..2ed156b84 100644
--- a/src/main/java/org/apache/datasketches/tuple/Filter.java
+++ b/src/main/java/org/apache/datasketches/tuple/Filter.java
@@ -19,10 +19,10 @@
package org.apache.datasketches.tuple;
+import java.lang.reflect.Array;
+import java.util.Arrays;
import java.util.function.Predicate;
-import org.apache.datasketches.common.ResizeFactor;
-
/**
* Class for filtering entries from a {@link Sketch} given a {@link Summary}
*
@@ -52,23 +52,25 @@ public CompactSketch