diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java deleted file mode 100644 index bfa696cad..000000000 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.bloomfilter; - -import static org.apache.datasketches.common.Util.LS; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Buffer; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; - -/** - * This class holds an array of bits suitable for use in a Bloom Filter - * - *

Rounds the number of bits up to the smallest multiple of 64 (one long) - * that is not smaller than the specified number. - */ -abstract class BitArray { - // MAX_BITS using longs, based on array indices being capped at Integer.MAX_VALUE - protected static final long MAX_BITS = Integer.MAX_VALUE * (long) Long.SIZE; - - protected BitArray() {} - - static BitArray heapify(final Buffer mem, final boolean isEmpty) { - return HeapBitArray.heapify(mem, isEmpty); - } - - static BitArray wrap(final Memory mem, final boolean isEmpty) { - return DirectBitArrayR.wrap(mem, isEmpty); - } - - static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { - return DirectBitArray.writableWrap(wmem, isEmpty); - } - - boolean isEmpty() { - return !isDirty() && getNumBitsSet() == 0; - } - - abstract boolean hasMemory(); - - abstract boolean isDirect(); - - abstract boolean isReadOnly(); - - abstract boolean getBit(final long index); - - abstract boolean getAndSetBit(final long index); - - abstract void setBit(final long index); - - abstract long getNumBitsSet(); - - abstract void reset(); - - abstract long getCapacity(); - - abstract int getArrayLength(); - - abstract void union(final BitArray other); - - abstract void intersect(final BitArray other); - - abstract void invert(); - - // prints the raw BitArray as 0s and 1s, one long per row - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < getArrayLength(); ++i) { - sb.append(i + ": ") - .append(printLong(getLong(i))) - .append(LS); - } - return sb.toString(); - } - - long getSerializedSizeBytes() { - // We only really need an int for array length but this will keep everything - // aligned to 8 bytes. - // Always write array length, but write numBitsSet only if empty - return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); - } - - // returns the number of bytes needed for a non-empty BitArray of the requested size - static long getSerializedSizeBytes(final long numBits) { - if (numBits <= 0) { - throw new SketchesArgumentException("Requested number of bits must be strictly positive"); - } - if (numBits > MAX_BITS) { - throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " - + "Requested: " + numBits + ", maximum: " + MAX_BITS); - } - final int numLongs = (int) Math.ceil(numBits / 64.0); - return Long.BYTES * (numLongs + 2L); - } - - abstract protected boolean isDirty(); - - // used to get a long from the array regardless of underlying storage - // NOT used to query individual bits - abstract protected long getLong(final int arrayIndex); - - // used to set a long in the array regardless of underlying storage - // NOT used to set individual bits - abstract protected void setLong(final int arrayIndex, final long value); - - // prints a long as a series of 0s and 1s as little endian - protected static String printLong(final long val) { - final StringBuilder sb = new StringBuilder(); - for (int j = 0; j < Long.SIZE; ++j) { - sb.append((val & (1L << j)) != 0 ? "1" : "0"); - if (j % 8 == 7) { sb.append(" "); } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index 3ea73b9bd..10829d7b7 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -26,6 +26,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.filters.common.BitArray; +import org.apache.datasketches.filters.common.DirectBitArray; +import org.apache.datasketches.filters.common.HeapBitArray; import org.apache.datasketches.memory.Buffer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; diff --git a/src/main/java/org/apache/datasketches/filters/common/BitArray.java b/src/main/java/org/apache/datasketches/filters/common/BitArray.java new file mode 100644 index 000000000..8320a369f --- /dev/null +++ b/src/main/java/org/apache/datasketches/filters/common/BitArray.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.common; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Buffer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class holds an array of bits and should be suitable for use in + * the various membership filters. The representation is not compressed and + * is designed to fit in a single array, meaning that the maximum number + * of bits is limited by the maximize size of an array of longs in Java. + * + *

Rounds the number of bits up to the smallest multiple of 64 (one long) + * that is not smaller than the specified number. + */ +public abstract class BitArray { + + /** + * The maximum number of bits that can be represented using longs, + * based on array indices being capped at Integer.MAX_VALUE + * and allowing room for encoding both the size and the number of bits set. + */ + protected static final long MAX_BITS = (Integer.MAX_VALUE - 1) * (long) Long.SIZE; + + /** + * Constructs a new BitArray. + */ + BitArray() {} + + /** + * Creates a BitArray from a given Buffer. + * + * @param mem The Buffer to heapify. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The heapified BitArray. + */ + public static BitArray heapify(final Buffer mem, final boolean isEmpty) { + return HeapBitArray.heapify(mem, isEmpty); + } + + /** + * Creates a BitArray from a given Memory. + * + * @param mem The Memory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The wrapped BitArray. + */ + public static BitArray wrap(final Memory mem, final boolean isEmpty) { + return DirectBitArrayR.wrap(mem, isEmpty); + } + + /** + * Creates a writable BitArray from a given WritableMemory. + * + * @param wmem The WritableMemory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The writable wrapped BitArray. + */ + public static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { + return DirectBitArray.writableWrap(wmem, isEmpty); + } + + /** + * Checks if the BitArray is empty. + * + * @return True if the BitArray is empty, false otherwise. + */ + public boolean isEmpty() { + return !isDirty() && getNumBitsSet() == 0; + } + + /** + * Checks if the BitArray has a backing Memory. + * + * @return True if the BitArray has a backing Memory, false otherwise. + */ + public abstract boolean hasMemory(); + + /** + * Checks if the BitArray is direct. + * + * @return True if the BitArray is direct, false otherwise. + */ + public abstract boolean isDirect(); + + /** + * Checks if the BitArray is read-only. + * + * @return True if the BitArray is read-only, false otherwise. + */ + public abstract boolean isReadOnly(); + + /** + * Gets the value of a bit at the specified index. + * + * @param index The index of the bit. + * @return The value of the bit at the specified index. + */ + public abstract boolean getBit(final long index); + + /** + * Gets the a specified number of bits starting at the given index. Limited + * to a single long (64 bits). + * + * @param index The starting index. + * @param numBits The number of bits to return. + * @return The value of the requested bits, starting at bit 0 of the result. + */ + public abstract long getBits(final long index, final int numBits); + + /** + * Gets the value of a bit at the specified index and sets it to true. + * + * @param index The index of the bit. + * @return The previous value of the bit at the specified index. + */ + public abstract boolean getAndSetBit(final long index); + + /** + * Assigns the value of a bit at the specified index to true. + * + * @param index The index of the bit. + */ + public abstract void setBit(final long index); + + /** + * Assigns the value of a bit at the specified index to false. + * + * @param index The index of the bit. + */ + public abstract void clearBit(final long index); + + /** + * Assigns the given value of a bit at the specified index. + * + * @param index The index of the bit. + * @param value The value to set the bit to. + */ + public abstract void assignBit(final long index, final boolean value); + + /** + /** + * Sets {@code numBits} starting from {@code index} to the specified value. + * Limited to a single long (64 bits). + * + * @param index the starting index of the range (inclusive) + * @param numBits the number of bits to write + * @param bits the value to set the bits to, starting with bit 0 + */ + public abstract void setBits(final long index, final int numBits, final long bits); + + /** + * Gets the number of bits that are set to true in the BitArray. + * + * @return The number of bits set to true. + */ + public abstract long getNumBitsSet(); + + /** + * Resets the BitArray, setting all bits to false. + */ + public abstract void reset(); + + /** + * Gets the capacity of the BitArray in bits. + * + * @return The capacity of the BitArray in bits + */ + public abstract long getCapacity(); + + /** + * Gets the length of the underlying array in longs. + * + * @return The length of the underlying array in longs. + */ + public abstract int getArrayLength(); + + /** + * Performs a union operation with another BitArray. + * + * @param other The other BitArray to perform the union with. + */ + public abstract void union(final BitArray other); + + /** + * Performs an intersection operation with another BitArray. + * + * @param other The other BitArray to perform the intersection with. + */ + public abstract void intersect(final BitArray other); + + /** + * Inverts the BitArray, flipping all bits. + */ + public abstract void invert(); + + /** + * Returns a string representation of the BitArray. + * + * @return A string representation of the BitArray. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < getArrayLength(); ++i) { + sb.append(i + ": ") + .append(printLong(getLong(i))) + .append(LS); + } + return sb.toString(); + } + + /** + * Gets the serialized size of the BitArray in bytes. + * + * @return The serialized size of the BitArray in bytes. + */ + public long getSerializedSizeBytes() { + // We only really need an int for array length but this will keep everything + // aligned to 8 bytes. + // Always write array length, but write numBitsSet only if empty + return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); + } + + /** + * Gets the serialized size of a non-empty BitArray of the specified size in bytes. + * + * @param numBits The number of bits in the BitArray. + * @return The serialized size of the BitArray in bytes. + * @throws SketchesArgumentException If the requested number of bits is not strictly positive + * or exceeds the maximum allowed. + */ + public static long getSerializedSizeBytes(final long numBits) { + if (numBits <= 0) { + throw new SketchesArgumentException("Requested number of bits must be strictly positive"); + } + if (numBits > MAX_BITS) { + throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " + + "Requested: " + numBits + ", maximum: " + MAX_BITS); + } + final int numLongs = (int) Math.ceil(numBits / 64.0); + return Long.BYTES * (numLongs + 2L); + } + + /** + * Checks if the BitArray has changes not reflected in state variables. + * + * @return True if the BitArray is dirty, false otherwise. + */ + abstract boolean isDirty(); + + /** + * Gets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @return The long value at the specified array index. + */ + abstract long getLong(final int arrayIndex); + + /** + * Sets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @param value The value to set the long to. + */ + abstract void setLong(final int arrayIndex, final long value); + + /** + * Returns a string representation of a long value as a series of 0s and 1s (little endian). + * + * @param val The long value to print. + * @return A string representation of the long value. + */ + public static String printLong(final long val) { + final StringBuilder sb = new StringBuilder(); + for (int j = 0; j < Long.SIZE; ++j) { + sb.append((val & (1L << j)) != 0 ? "1" : "0"); + if (j % 8 == 7) { sb.append(" "); } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java similarity index 62% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java index 77c24f027..25521672e 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java @@ -17,21 +17,21 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; -final class DirectBitArray extends DirectBitArrayR { +public final class DirectBitArray extends DirectBitArrayR { - DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { super(dataLength, 0, wmem); // we'll set numBitsSet_ ourselves so pass 0 // can recompute later if needed numBitsSet_ = storedNumBitsSet; } - DirectBitArray(final int dataLength, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final WritableMemory wmem) { super(dataLength, 0, wmem); wmem_.putInt(0, dataLength_); @@ -39,7 +39,7 @@ final class DirectBitArray extends DirectBitArrayR { wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } - static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { + public static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { if (numBits <= 0) { throw new SketchesArgumentException("Number of bits must be strictly positive. Found: " + numBits); } @@ -58,7 +58,7 @@ static DirectBitArray initialize(final long numBits, final WritableMemory wmem) return new DirectBitArray(arrayLength, wmem); } - static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { + public static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -81,7 +81,7 @@ static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmp } @Override - long getNumBitsSet() { + public long getNumBitsSet() { // update numBitsSet and store in array if (isDirty()) { numBitsSet_ = 0; @@ -95,17 +95,17 @@ long getNumBitsSet() { } @Override - protected boolean isDirty() { + public boolean isDirty() { return numBitsSet_ == -1; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -115,21 +115,83 @@ public boolean isReadOnly() { } @Override - void reset() { + public void reset() { setNumBitsSet(0); wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } @Override - void setBit(final long index) { + public void setBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte val = wmem_.getByte(memoryOffset); - wmem_.setBits(memoryOffset, (byte) (val | (1 << (index & 0x07)))); + wmem_.putByte(memoryOffset, (byte) (val | (1 << (index & 0x07)))); setNumBitsSet(-1); // mark dirty } @Override - boolean getAndSetBit(final long index) { + public void clearBit(final long index) { + final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); + final byte val = wmem_.getByte(memoryOffset); + wmem_.putByte(memoryOffset, (byte) (val & ~(1 << (index & 0x07)))); + setNumBitsSet(-1); // mark dirty + } + + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + + setNumBitsSet(-1); // mark dirty + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + final long maskedVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & ~mask; + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedVal | ((bits << fromOffset) & mask)); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = (1L << fromOffset) - 1; // inverse mask in this case + final long toMask = (1L << (toOffset + 1)) - 1; + + final long maskedFromVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & fromMask; + final long maskedToVal = wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & ~toMask; + + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedFromVal | ((bits << fromOffset) & ~fromMask)); + wmem_.putLong(DATA_OFFSET + (toIndex << 3), maskedToVal | ((bits >>> splitBit) & toMask)); + } + + @Override + public boolean getAndSetBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte mask = (byte) (1 << (index & 0x07)); final byte val = wmem_.getByte(memoryOffset); @@ -143,7 +205,7 @@ boolean getAndSetBit(final long index) { } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -158,7 +220,7 @@ void intersect(final BitArray other) { } @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -173,7 +235,7 @@ void union(final BitArray other) { } @Override - void invert() { + public void invert() { if (isDirty()) { numBitsSet_ = 0; for (int i = 0; i < dataLength_; ++i) { @@ -191,7 +253,7 @@ void invert() { } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { wmem_.putLong(DATA_OFFSET + (arrayIndex << 3), value); } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java similarity index 58% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java index 8acc36be2..6d0d4bad3 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; @@ -35,7 +35,7 @@ public class DirectBitArrayR extends BitArray { final protected WritableMemory wmem_; // for inheritance; we won't write to it protected long numBitsSet_; // could be final here but writable direct will update it - protected DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { + public DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { super(); dataLength_ = dataLength; @@ -53,7 +53,7 @@ protected DirectBitArrayR(final int dataLength, final long storedNumBitsSet, fin // assumes we have a region with only the portion of Memory // the BitArray cares about - static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { + public static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -71,34 +71,73 @@ static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { } @Override - long getCapacity() { + public long getCapacity() { return (long) dataLength_ * Long.SIZE; } @Override - long getNumBitsSet() { + public long getNumBitsSet() { return numBitsSet_; } @Override - protected boolean isDirty() { + public boolean isDirty() { // read-only so necessarily false return false; } @Override - int getArrayLength() { + public int getArrayLength() { return dataLength_; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { if (isEmpty()) { return false; } return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (isEmpty()) { return 0L; } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = ~((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & fromMask) >>> fromOffset; + result |= (wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & toMask) << splitBit; + return result; + } + + @Override + long getLong(final int arrayIndex) { if (isEmpty()) { return 0L; } return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -119,37 +158,52 @@ public boolean isReadOnly() { } @Override - void reset() { + public void reset() { throw new SketchesReadOnlyException("Attempt to call reset() on read-only memory"); } @Override - void setBit(final long index) { + public void setBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); + } + + @Override + public void clearBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call clearBit() on read-only memory"); + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + throw new SketchesReadOnlyException("Attempt to call setBits() on read-only memory"); + } + + @Override + public void assignBit(final long index, final boolean value) { throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); } @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { throw new SketchesReadOnlyException("Attempt to call getAndSetBit() on read-only memory"); } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call intersect() on read-only memory"); } @Override - void union(final BitArray other) { + public void union(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call union() on read-only memory"); } @Override - void invert() { + public void invert() { throw new SketchesReadOnlyException("Attempt to call invert() on read-only memory"); } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { throw new SketchesReadOnlyException("Attempt to call setLong() on read-only memory"); } } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java similarity index 57% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java index 4048b6775..ca81ae073 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import java.util.Arrays; @@ -31,13 +31,13 @@ *

Rounds the number of bits up to the smallest multiple of 64 (one long) * that is not smaller than the specified number. */ -final class HeapBitArray extends BitArray { +public final class HeapBitArray extends BitArray { private long numBitsSet_; // if -1, need to recompute value private boolean isDirty_; final private long[] data_; // creates an array of a given size - HeapBitArray(final long numBits) { + public HeapBitArray(final long numBits) { super(); if (numBits <= 0) { @@ -54,7 +54,7 @@ final class HeapBitArray extends BitArray { } // uses the provided array - HeapBitArray(final long numBitsSet, final long[] data) { + public HeapBitArray(final long numBitsSet, final long[] data) { super(); data_ = data; @@ -64,7 +64,7 @@ final class HeapBitArray extends BitArray { // reads a serialized image, but the BitArray is not fully self-describing so requires // a flag to indicate whether the array is empty - static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { + public static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { final int numLongs = buffer.getInt(); if (numLongs < 0) { throw new SketchesArgumentException("Possible corruption: Must have strictly positive array size. Found: " + numLongs); @@ -85,40 +85,124 @@ static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { } @Override - protected boolean isDirty() { + public boolean isDirty() { return isDirty_; } @Override - boolean hasMemory() { + public boolean hasMemory() { return false; } @Override - boolean isDirect() { + public boolean isDirect() { return false; } @Override - boolean isReadOnly() { return false; } + public boolean isReadOnly() { return false; } // queries a single bit in the array @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (data_[(int) index >>> 6] & (1L << index)) != 0 ? true : false; } + @Override + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return 0; } + + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (data_[fromIndex] & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = ~((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (data_[fromIndex] & fromMask) >>> fromOffset; + result |= (data_[toIndex] & toMask) << splitBit; + return result; + } + // sets a single bit in the array without querying, meaning the method // cannot properly track the number of bits set so set isDirty = true @Override - void setBit(final long index) { + public void setBit(final long index) { data_[(int) index >>> 6] |= 1L << index; isDirty_ = true; } + @Override + public void clearBit(final long index) { + data_[(int) index >>> 6] &= ~(1L << index); + isDirty_ = true; + } + + // assigns a single bit in the array without querying + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return; } + + isDirty_ = true; + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + data_[fromIndex] = (data_[fromIndex] & ~mask) | ((bits << fromOffset) & mask); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = (1L << fromOffset) - 1; // inverse mask in this case + final long toMask = (1L << (toOffset + 1)) - 1; + + data_[fromIndex] = (data_[fromIndex] & fromMask) | ((bits << fromOffset) & ~fromMask); + data_[toIndex] = (data_[toIndex] & ~toMask) | ((bits >>> splitBit) & toMask); + } + // returns existing value of bit @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { final int offset = (int) index >>> 6; final long mask = 1L << index; if ((data_[offset] & mask) != 0) { @@ -134,7 +218,7 @@ boolean getAndSetBit(final long index) { // O(1) if only getAndSetBit() has been used // O(data_.length) if setBit() has ever been used @Override - long getNumBitsSet() { + public long getNumBitsSet() { if (isDirty_) { numBitsSet_ = 0; for (final long val : data_) { @@ -145,14 +229,14 @@ long getNumBitsSet() { } @Override - long getCapacity() { return (long) data_.length * Long.SIZE; } + public long getCapacity() { return (long) data_.length * Long.SIZE; } @Override - int getArrayLength() { return data_.length; } + public int getArrayLength() { return data_.length; } // applies logical OR @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot union bit arrays with unequal lengths"); } @@ -168,7 +252,7 @@ void union(final BitArray other) { // applies logical AND @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -184,7 +268,7 @@ void intersect(final BitArray other) { // applies bitwise inversion @Override - void invert() { + public void invert() { if (isDirty_) { numBitsSet_ = 0; for (int i = 0; i < data_.length; ++i) { @@ -200,7 +284,7 @@ void invert() { } } - void writeToBuffer(final WritableBuffer wbuf) { + public void writeToBuffer(final WritableBuffer wbuf) { wbuf.putInt(data_.length); wbuf.putInt(0); // unused @@ -211,18 +295,18 @@ void writeToBuffer(final WritableBuffer wbuf) { } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return data_[arrayIndex]; } @Override - protected void setLong(final int arrayIndex, final long value) { + public void setLong(final int arrayIndex, final long value) { data_[arrayIndex] = value; } // clears the array @Override - void reset() { + public void reset() { Arrays.fill(data_, 0); numBitsSet_ = 0; isDirty_ = false; diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java deleted file mode 100644 index 658e15f0d..000000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -public abstract class Bitmap { - - public abstract long size(); - public abstract void set(long bit_index, boolean value); - public abstract void setFromTo(long from, long to, long value); - public abstract boolean get(long bit_index); - public abstract long getFromTo(long from, long to); - - public static boolean get_fingerprint_bit(long index, long fingerprint) { - long mask = 1 << index; - long and = fingerprint & mask; - return and != 0; - } -} diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java deleted file mode 100644 index ca387ebc9..000000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -/* -Copyright � 1999 CERN - European Organization for Nuclear Research. -Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose -is hereby granted without fee, provided that the above copyright notice appear in all copies and -that both that copyright notice and this permission notice appear in supporting documentation. -CERN makes no representations about the suitability of this software for any purpose. -It is provided "as is" without expressed or implied warranty. -*/ - -/** - * Implements quick non polymorphic non bounds checking low level bitvector operations. - * Includes some operations that interpret sub-bitstrings as long integers. - *

- * WARNING: Methods of this class do not check preconditions. - * Provided with invalid parameters these method may return (or set) invalid values without throwing any exception. - * You should only use this class when performance is critical and you are absolutely sure that indexes are within bounds. - *

- * A bitvector is modelled as a long array, i.e. long[] bits holds bits of a bitvector. - * Each long value holds 64 bits. - * The i-th bit is stored in bits[i/64] at - * bit position i % 64 (where bit position 0 refers to the least - * significant bit and 63 refers to the most significant bit). - * - * @author wolfgang.hoschek@cern.ch - * @version 1.0, 09/24/99 - * @see java.util.BitSet - */ -//package bitmap_implementations; - -public class QuickBitVector extends Object { - protected final static int ADDRESS_BITS_PER_UNIT = 6; // 64=2^6 - protected final static int BITS_PER_UNIT = 64; // = 1 << ADDRESS_BITS_PER_UNIT - protected final static int BIT_INDEX_MASK = 63; // = BITS_PER_UNIT - 1; - - private static final long[] pows = precomputePows(); //precompute bitmasks for speed - /** - * Makes this class non instantiable, but still inheritable. - */ - protected QuickBitVector() { - } - /** - * Returns a bit mask with bits in the specified range set to 1, all the rest set to 0. - * In other words, returns a bit mask having 0,1,2,3,...,64 bits set. - * If to-from+1==0 then returns zero (0L). - * Precondition (not checked): to-from+1 ≥ 0 AND to-from+1 ≤ 64. - * - * @param from index of start bit (inclusive) - * @param to index of end bit (inclusive). - * @return the bit mask having all bits between from and to set to 1. - */ - public static final long bitMaskWithBitsSetFromTo(long from, long to) { - return pows[(int)(to-from+1)] << from; - - // This turned out to be slower: - // 0xffffffffffffffffL == ~0L == -1L == all 64 bits set. - // int width; - // return (width=to-from+1) == 0 ? 0L : (0xffffffffffffffffL >>> (BITS_PER_UNIT-width)) << from; - } - /** - * Changes the bit with index bitIndex in the bitvector bits to the "clear" (false) state. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be cleared. - */ - public static void clear(long[] bits, long bitIndex) { - bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] &= ~(1L << (bitIndex & BIT_INDEX_MASK)); - } - /** - * Returns from the bitvector the value of the bit with the specified index. - * The value is true if the bit with the index bitIndex - * is currently set; otherwise, returns false. - * - * @param bits the bitvector. - * @param bitIndex the bit index. - * @return the value of the bit with the specified index. - */ - public static boolean get(long[] bits, long bitIndex) { - return ((bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] & (1L << (bitIndex & BIT_INDEX_MASK))) != 0); - } - /** - * Returns a long value representing bits of a bitvector from index from to index to. - * Bits are returned as a long value with the return value having bit 0 set to bit from, ..., bit to-from set to bit to. - * All other bits of return value are set to 0. - * If from > to then returns zero (0L). - * Precondition (not checked): to-from+1 ≤ 64. - * @param bits the bitvector. - * @param from index of start bit (inclusive). - * @param to index of end bit (inclusive). - * @return the specified bits as long value. - */ - public static long getLongFromTo(long[] bits, long from, long to) { - if (from>to) return 0L; - - final int fromIndex = (int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64 - final int toIndex = (int)(to >> ADDRESS_BITS_PER_UNIT); - final int fromOffset = (int)(from & BIT_INDEX_MASK); //equivalent to from%64 - final int toOffset = (int)(to & BIT_INDEX_MASK); - //this is equivalent to the above, but slower: - //final int fromIndex=from/BITS_PER_UNIT; - //final int toIndex=to/BITS_PER_UNIT; - //final int fromOffset=from%BITS_PER_UNIT; - //final int toOffset=to%BITS_PER_UNIT; - - - long mask; - if (fromIndex==toIndex) { //range does not cross unit boundaries; value to retrieve is contained in one single long value. - mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset); - return (bits[fromIndex] & mask) >>> fromOffset; - - } - - //range crosses unit boundaries; value to retrieve is spread over two long values. - //get part from first long value - mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK); - final long x1=(bits[fromIndex] & mask) >>> fromOffset; - - //get part from second long value - mask=bitMaskWithBitsSetFromTo(0, toOffset); - final long x2=(bits[toIndex] & mask) << (BITS_PER_UNIT-fromOffset); - - //combine - return x1|x2; - } - - /** - * Returns the index of the least significant bit in state "true". - * Returns 32 if no bit is in state "true". - * - * Examples: - *

-     * 0x80000000 : 31
-     * 0x7fffffff : 0
-     * 0x00000001 : 0
-     * 0x00000000 : 32
-     * 
- * - * @param value The integer value for which the least significant bit index is to be found. - * @return The index of the least significant bit in state "true". Returns 32 if no bit is in state "true". - */ - static public int leastSignificantBit(int value) { - int i=-1; - while (++i < 32 && (((1<> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - long safe_right_shift = ((nBits-1) >>> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - // System.out.println("Right shift " + right_shift); - //System.out.println("Safe Right shift " + safe_right_shift); - int unitIndex = (int)((nBits-1) >> ADDRESS_BITS_PER_UNIT); // How many multiples of 64 bits do we need to store nBits bits? - //System.out.println(ADDRESS_BITS_PER_UNIT); - long[] bitVector = new long[unitIndex + 1]; - //System.out.println("length " + bitVector.length); - //System.out.println("Total bits: " + (bitVector.length * 64)); - //System.out.println("Num slots available: " + (bitVector.length * 64) / bitsPerElement); - return bitVector; - } - - /** - * Returns the index of the most significant bit in state "true". - * Returns -1 if no bit is in state "true". - * - * Examples: - *
-     * 0x80000000 : 31
-     * 0x7fffffff : 30
-     * 0x00000001 : 0
-     * 0x00000000 : -1
-     * 
- * - * @param value The integer value for which the most significant bit index is to be found. - * @return The index of the most significant bit in state "true". Returns -1 if no bit is in state "true". - */ - static public int mostSignificantBit(int value) { - int i=32; - while (--i >=0 && (((1<= 1; ) { - pows[i]=value >>> (BITS_PER_UNIT-i); - } - pows[0]=0L; - return pows; - } - - /** - * Sets the bit with index bitIndex in the bitvector bits to the state specified by value. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be changed. - * @param value the value to be stored in the bit. - */ - public static void put(long[] bits, long bitIndex, boolean value) { - if (value) - set(bits, bitIndex); - else - clear(bits, bitIndex); - } - - /** - * Sets bits of a bitvector from index from to index to to the bits of value. - * Bit from is set to bit 0 of value, ..., bit to is set to bit to-from of value. - * All other bits stay unaffected. - * If from > to then does nothing. - * Precondition (not checked): to-from+1 ≤ 64. - * - * this function is equivalent to the slower code below: - * int fromIndex=from/BITS_PER_UNIT; - * int toIndex=to/BITS_PER_UNIT; - * int fromOffset=from%BITS_PER_UNIT; - * int toOffset=to%BITS_PER_UNIT; - * - * @param bits the bitvector. - * @param value the value to be copied into the bitvector. - * @param from index of start bit (inclusive). - * @param to index of end bit (inclusive). - */ - public static void putLongFromTo(long[] bits, long value, long from, long to) { - if (from>to) return; - - final int fromIndex=(int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64 - final int toIndex=(int)(to >> ADDRESS_BITS_PER_UNIT); - final int fromOffset=(int)(from & BIT_INDEX_MASK); //equivalent to from % 64 - final int toOffset=(int)(to & BIT_INDEX_MASK); - - //make sure all unused bits to the left are cleared. - long mask; - mask=bitMaskWithBitsSetFromTo(to-from+1, BIT_INDEX_MASK); - long cleanValue=value & (~mask); - - long shiftedValue; - - if (fromIndex==toIndex) { //range does not cross unit boundaries; should go into one single long value. - shiftedValue=cleanValue << fromOffset; - mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset); - bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue; - return; - - } - - //range crosses unit boundaries; value should go into two long values. - //copy into first long value. - shiftedValue=cleanValue << fromOffset; - mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK); - bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue; - - //copy into second long value. - shiftedValue=cleanValue >>> (BITS_PER_UNIT - fromOffset); - mask=bitMaskWithBitsSetFromTo(0, toOffset); - bits[toIndex] = (bits[toIndex] & (~mask)) | shiftedValue; - } - - /** - * Changes the bit with index bitIndex in the bitvector bits to the "set" (true) state. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be set. - */ - public static void set(long[] bits, long bitIndex) { - bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] |= 1L << (bitIndex & BIT_INDEX_MASK); - } - - /** - * Returns the index of the unit that contains the given bitIndex. - * - * @param bitIndex The index of the bit to be checked. - * @return The index of the unit that contains the given bitIndex. - */ - protected static long unit(long bitIndex) { - return bitIndex >> ADDRESS_BITS_PER_UNIT; // equivalent to bitIndex/64 - } -} diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java deleted file mode 100644 index a4c24a3ff..000000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -public class QuickBitVectorWrapper extends Bitmap { - - long[] bs; - - public QuickBitVectorWrapper(int bits_per_entry, long num_entries) { - bs = QuickBitVector.makeBitVector(num_entries, bits_per_entry); - } - - @Override - public long size() { - return (long)bs.length * Long.BYTES * 8L; - } - - @Override - public void set(long bit_index, boolean value) { - if (value) { - QuickBitVector.set(bs, bit_index); - } - else { - QuickBitVector.clear(bs, bit_index); - } - } - - @Override - public void setFromTo(long from, long to, long value) { - QuickBitVector.putLongFromTo(bs, value, from, to - 1); - } - - @Override - public boolean get(long bit_index) { - return QuickBitVector.get(bs, bit_index); - } - - @Override - public long getFromTo(long from, long to) { - return QuickBitVector.getLongFromTo(bs, from, to - 1); - } - - -} - diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 93a6761c1..8671dd18f 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -23,13 +23,16 @@ import java.util.HashSet; import java.util.Set; +import org.apache.datasketches.filters.common.BitArray; +import org.apache.datasketches.filters.common.HeapBitArray; + public class QuotientFilter extends Filter { int bitPerEntry; int fingerprintLength; int power_of_two_size; int num_entries; - Bitmap filter; + BitArray filter; double expansion_threshold; long max_entries_before_expansion; @@ -83,18 +86,19 @@ public void set_expand_autonomously(boolean val) { expand_autonomously = val; } - Bitmap make_filter(long init_size, int bits_per_entry) { + BitArray make_filter(long init_size, int bits_per_entry) { // System.out.println(init_size ) ; // System.out.println(num_extension_slots); // System.out.println("Making BitVector with: " + (init_size + num_extension_slots) + "SLOTS"); - return new QuickBitVectorWrapper(bits_per_entry, init_size); + //return new QuickBitVectorWrapper(bits_per_entry, init_size); + return new HeapBitArray(init_size * bits_per_entry); } public int get_fingerprint_length() { return fingerprintLength; } - QuotientFilter(int power_of_two, int bits_per_entry, Bitmap bitmap) { + QuotientFilter(int power_of_two, int bits_per_entry, BitArray bitmap) { power_of_two_size = power_of_two; bitPerEntry = bits_per_entry; fingerprintLength = bits_per_entry - 3; @@ -152,7 +156,7 @@ public long get_num_slots() { long getMask() { return get_num_slots() - 1; } - + // sets the metadata flag bits for a given slot index void modify_slot(boolean is_occupied, boolean is_continuation, boolean is_shifted, long index) { @@ -163,7 +167,7 @@ void modify_slot(boolean is_occupied, boolean is_continuation, boolean is_shifte // sets the fingerprint for a given slot index void set_fingerprint(long index, long fingerprint) { - filter.setFromTo(index * bitPerEntry + 3, (long)index * bitPerEntry + 3 + fingerprintLength, fingerprint); + filter.setBits(index * bitPerEntry + 3, fingerprintLength, fingerprint); } // print a nice representation of the filter that can be understood. @@ -185,7 +189,7 @@ public String get_pretty_str(boolean vertical) { if (remainder == 3) { sbr.append(" "); } - sbr.append(filter.get(i) ? "1" : "0"); + sbr.append(filter.getBit(i) ? "1" : "0"); } sbr.append("\n"); return sbr.toString(); @@ -198,12 +202,12 @@ public void pretty_print() { // return a fingerprint in a given slot index long get_fingerprint(long index) { - return filter.getFromTo(index * bitPerEntry + 3, index * bitPerEntry + 3 + fingerprintLength); + return filter.getBits(index * bitPerEntry + 3, fingerprintLength); } // return an entire slot representation, including metadata flags and fingerprint long get_slot(long index) { - return filter.getFromTo(index * bitPerEntry, (index + 1) * bitPerEntry); + return filter.getBits(index * bitPerEntry, bitPerEntry); } // compare a fingerprint input to the fingerprint in some slot index @@ -251,27 +255,27 @@ public int get_bits_per_entry() { } boolean is_occupied(long index) { - return filter.get(index * bitPerEntry); + return filter.getBit(index * bitPerEntry); } boolean is_continuation(long index) { - return filter.get(index * bitPerEntry + 1); + return filter.getBit(index * bitPerEntry + 1); } boolean is_shifted(long index) { - return filter.get(index * bitPerEntry + 2); + return filter.getBit(index * bitPerEntry + 2); } void set_occupied(long index, boolean val) { - filter.set(index * bitPerEntry, val); + filter.assignBit(index * bitPerEntry, val); } void set_continuation(long index, boolean val) { - filter.set(index * bitPerEntry + 1, val); + filter.assignBit(index * bitPerEntry + 1, val); } void set_shifted(long index, boolean val) { - filter.set(index * bitPerEntry + 2, val); + filter.assignBit(index * bitPerEntry + 2, val); } boolean is_slot_empty(long index) { @@ -689,7 +693,7 @@ protected boolean _search(long large_hash) { } public boolean get_bit_at_offset(int offset) { - return filter.get(offset); + return filter.getBit(offset); } public void compute_statistics() { diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java index 1f98c82f2..a39712195 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java @@ -18,7 +18,7 @@ */ package org.apache.datasketches.filters.quotientfilter; -import java.util.concurrent.ThreadLocalRandom; +//import java.util.concurrent.ThreadLocalRandom; import org.apache.datasketches.common.SketchesArgumentException; diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 079c9f282..52b1371a9 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -26,6 +26,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.datasketches.common.Family; import org.apache.datasketches.memory.Buffer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; @@ -126,7 +127,7 @@ class ReqSerDe { enum SerDeFormat { EMPTY, RAWITEMS, EXACT, ESTIMATION } private static final byte SER_VER = 1; - private static final byte FAMILY_ID = 17; + private static final byte FAMILY_ID = (byte) Family.REQ.getID(); static ReqSketch heapify(final Memory mem) { final Buffer buff = mem.asBuffer(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index 8397c130b..a1ac53c6d 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -169,7 +169,7 @@ private DirectQuickSelectSketch( //clear hash table area dstMem.clear(preambleLongs << 3, 8 << lgArrLongs); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); memReqSvr_ = memReqSvr; } @@ -210,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final l final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(seed, srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -228,7 +228,7 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin final DirectQuickSelectSketch dqss = new DirectQuickSelectSketch(seed, srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -310,7 +310,7 @@ UpdateReturnState hashUpdate(final long hash) { if (actLgRF > 0) { //Expand in current Memory //lgArrLongs will change; thetaLong, curCount will not resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; } //end of Expand in current memory, exit. @@ -330,7 +330,7 @@ UpdateReturnState hashUpdate(final long hash) { memReqSvr_.requestClose(wmem_, newDstMem); wmem_ = newDstMem; - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, tgtLgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; } //end of Request more memory to resize } //end of resize diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index c593f52e3..a3ffebc14 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -86,7 +86,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long see final DirectQuickSelectSketchR dqssr = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); - dqssr.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqssr; } @@ -104,7 +104,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); - dqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -276,11 +276,11 @@ UpdateReturnState hashUpdate(final long hash) { * @return the hash table threshold */ @SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments") - static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { //SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, //but this allows us to tune these constants for different sketches. final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) Math.floor(fraction * (1 << lgArrLongs)); + return (int) (fraction * (1 << lgArrLongs)); } } diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java index 37b615456..b9d4dc9e1 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java @@ -92,7 +92,7 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float } lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); - hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_); curCount_ = 0; thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; @@ -128,7 +128,7 @@ static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long see final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, preambleLongs, family); hqss.lgArrLongs_ = lgArrLongs; - hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); hqss.curCount_ = extractCurCount(srcMem); hqss.thetaLong_ = extractThetaLong(srcMem); hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem); @@ -197,7 +197,7 @@ public void reset() { cache_ = new long[1 << lgArrLongsSM]; lgArrLongs_ = lgArrLongsSM; } - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); empty_ = true; curCount_ = 0; thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); @@ -293,7 +293,7 @@ private final void resizeCache() { curCount_ = newCount; cache_ = tgtArr; - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); } //array stays the same size. Changes theta and thus count @@ -318,9 +318,9 @@ private final void quickSelectAndRebuild() { * @param lgArrLongs See lgArrLongs. * @return the hash table threshold */ - static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) Math.floor(fraction * (1 << lgArrLongs)); + return (int) (fraction * (1 << lgArrLongs)); } } diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index 888116512..cc1fd4d23 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -297,13 +297,27 @@ public double getLowerBound(final int numStdDev) { * @param numberOfEntries the actual number of entries stored with the CompactSketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number * of entries. + * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} */ + @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { if (numberOfEntries == 0) { return 8; } if (numberOfEntries == 1) { return 16; } return (numberOfEntries << 3) + 24; } + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * nomEntries. + */ + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD) + + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES; + } + /** * Returns the maximum number of storage bytes required for an UpdateSketch with the given * number of nominal entries (power of 2). diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index a5862e4a4..c204751f2 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -79,15 +79,32 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { } /** - * Ref: {@link Sketch#getMaxCompactSketchBytes(int)} - * @param numberOfEntries Ref: {@link Sketch#getMaxCompactSketchBytes(int)}, - * {@code numberOfEntries} - * @return Ref: {@link Sketch#getMaxCompactSketchBytes(int)} - */ + * Returns the maximum number of storage bytes required for a CompactSketch with the given + * number of actual entries. Note that this assumes the worst case of the sketch in + * estimation mode, which requires storing theta and count. + * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * @return the maximum number of storage bytes required for a CompactSketch with the given number + * of entries. + * @see Sketch#getMaxCompactSketchBytes(int) + * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + */ + @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { return Sketch.getMaxCompactSketchBytes(numberOfEntries); } + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * number of nominal entries (power of 2). + * @param nomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * nomEntries. + * @see Sketch#getCompactSketchMaxBytes(int) + */ + public static int getCompactSketchMaxBytes(final int nomEntries) { + return Sketch.getCompactSketchMaxBytes(nomEntries); + } + /** * Ref: {@link SetOperation#getMaxIntersectionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} diff --git a/src/main/java/org/apache/datasketches/tuple/Filter.java b/src/main/java/org/apache/datasketches/tuple/Filter.java index 28b38295c..2ed156b84 100644 --- a/src/main/java/org/apache/datasketches/tuple/Filter.java +++ b/src/main/java/org/apache/datasketches/tuple/Filter.java @@ -19,10 +19,10 @@ package org.apache.datasketches.tuple; +import java.lang.reflect.Array; +import java.util.Arrays; import java.util.function.Predicate; -import org.apache.datasketches.common.ResizeFactor; - /** * Class for filtering entries from a {@link Sketch} given a {@link Summary} * @@ -52,23 +52,25 @@ public CompactSketch filter(final Sketch sketchIn) { if (sketchIn == null) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); } - - final QuickSelectSketch sketch = - new QuickSelectSketch<>(sketchIn.getRetainedEntries(), ResizeFactor.X1.lg(), null); + final long[] hashes = new long[sketchIn.getRetainedEntries()]; + T[] summaries = null; // lazy init to get class from the first entry + int i = 0; final TupleSketchIterator it = sketchIn.iterator(); while (it.next()) { final T summary = it.getSummary(); if (predicate.test(summary)) { - sketch.insert(it.getHash(), (T)summary.copy()); + hashes[i] = it.getHash(); + if (summaries == null) { + summaries = (T[]) Array.newInstance(summary.getClass(), sketchIn.getRetainedEntries()); + } + summaries[i++] = (T) summary.copy(); } } - - sketch.setThetaLong(sketchIn.getThetaLong()); - if (!sketchIn.isEmpty()) { - sketch.setEmpty(false); + final boolean isEmpty = i == 0 && !sketchIn.isEstimationMode(); + if (i == 0) { + return new CompactSketch<>(null, null, sketchIn.getThetaLong(), isEmpty); } - - return sketch.compact(); + return new CompactSketch<>(Arrays.copyOf(hashes, i), Arrays.copyOf(summaries, i), sketchIn.getThetaLong(), isEmpty); } } diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java similarity index 85% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java rename to src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java index 521019e62..ea02ad21a 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -99,6 +99,27 @@ public void basicOperationTest() { assertTrue(dba.isReadOnly()); } + @Test + public void getBitsFromToTest() { + final HeapBitArray hba = new HeapBitArray(128); + hba.setBit(1); // will override, but this forces non-empty + hba.setLong(0, 0x5555555555555555L); + hba.setLong(1, 0xFFFFFFFFFC003FFFL); + final Memory mem = bitArrayToMemory(hba); + DirectBitArrayR dba = DirectBitArrayR.wrap(mem, hba.isEmpty()); + + // single, full long test + assertEquals(dba.getBits(0, 64), 0x5555555555555555L); + + // subset of single long, mostly ones with a stretch of zeros + assertEquals(dba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(78, 12), 0); + assertEquals(dba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(dba.getBits(60, 20), 0x3FFF5); + } + @Test public void countBitsWhenDirty() { // like basicOperationTest but with setBit which does @@ -159,6 +180,9 @@ public void checkInvalidMethods() { // all of these try to modify a read-only memory assertThrows(SketchesReadOnlyException.class, () -> dba.setBit(14)); + assertThrows(SketchesReadOnlyException.class, () -> dba.clearBit(7)); + assertThrows(SketchesReadOnlyException.class, () -> dba.assignBit(924, false)); + assertThrows(SketchesReadOnlyException.class, () -> dba.setBits(100, 30, 0xFF)); assertThrows(SketchesReadOnlyException.class, () -> dba.getAndSetBit(100)); assertThrows(SketchesReadOnlyException.class, () -> dba.reset()); assertThrows(SketchesReadOnlyException.class, () -> dba.invert()); diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java similarity index 78% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java rename to src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java index a45bcbb82..4cc229c50 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -68,6 +68,8 @@ public void tooSmallCapacityTest() { } // no text of max size because the BitArray allows up to Integer.MAX_VALUE + // bits, which is the maximum size of an array in Java -- can't use it all, + // (need 2 longs for preamble) but also can't allocate that large to test on most machines @Test public void initializeTooSmallTest() { @@ -134,6 +136,70 @@ public void basicWritableWrapTest() { dba.setBit(100); assertTrue(dba.getAndSetBit(100)); assertEquals(dba.getNumBitsSet(), 8); + + dba.reset(); + assertTrue(dba.isEmpty()); + assertEquals(dba.getNumBitsSet(), 0); + + dba.setBit(0); + dba.setLong(0, -1); + assertTrue(dba.getBit(60)); + dba.clearBit(60); + assertFalse(dba.getBit(60)); + + assertTrue(dba.getBit(35)); + dba.assignBit(35, false); + assertFalse(dba.getBit(35)); + dba.assignBit(35, true); + assertTrue(dba.getBit(35)); + } + + @Test + public void getBitsFromToTest() { + final int numBits = 128; + final WritableMemory wmem = WritableMemory.writableWrap(new byte[32]); + final DirectBitArray dba = DirectBitArray.initialize(numBits, wmem); + + // single, full long test + dba.setBit(0); // useless but forces non-empty when using setLong() + dba.setLong(0, 0x5555555555555555L); + assertEquals(dba.getBits(0, 64), 0x5555555555555555L); + assertEquals(dba.getBits(64, 64), 0); + + // subset of single long, mostly ones with a stretch of zeros + dba.setLong(1, 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(78, 12), 0); + assertEquals(dba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(dba.getBits(60, 20), 0x3FFF5); + } + + @Test + public void setBitsFromToTest() { + final int numBits = 128; + WritableMemory wmem = WritableMemory.writableWrap(new byte[32]); + DirectBitArray ba = DirectBitArray.initialize(numBits, wmem); + + // within a single long + ba.setBits(0, 64, 0x80000000DAB8C730L); + assertEquals(ba.getLong(0), 0x80000000DAB8C730L); + assertEquals(ba.getLong(1), 0); + + ba.setBits(40, 8, 0xA6); + assertEquals(ba.getLong(0), 0x8000A600DAB8C730L); + + // spanning longs + ba.setBits(60, 20, 0x3FFF5); + assertEquals(ba.getLong(0), 0x5000A600DAB8C730L); + assertEquals(ba.getLong(1), 0x3FFFL); + + // found specific failure with this test + wmem = WritableMemory.writableWrap(new byte[1272]); + ba = DirectBitArray.initialize(10000, wmem); + ba.setBits(601 * 10 + 3, 7, 125); + assertEquals(ba.getBits(601 * 10 + 3, 7), 125); } @Test diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java similarity index 79% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java rename to src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java index 0e91788ea..a55f98a30 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -75,9 +75,62 @@ public void basicOperationTest() { assertTrue(ba.isEmpty()); assertEquals(ba.getNumBitsSet(), 0); + ba.setLong(0, -1); + assertTrue(ba.getBit(60)); + ba.clearBit(60); + assertFalse(ba.getBit(60)); + + assertTrue(ba.getBit(35)); + ba.assignBit(35, false); + assertFalse(ba.getBit(35)); + ba.assignBit(35, true); + assertTrue(ba.getBit(35)); + assertTrue(String.valueOf(ba).length() > 0); } + @Test + public void getBitsFromToTest() { + final HeapBitArray ba = new HeapBitArray(128); + + // single, full long test + ba.setLong(0, 0x5555555555555555L); + assertEquals(ba.getBits(0, 64), 0x5555555555555555L); + assertEquals(ba.getBits(64, 64), 0); + + // subset of single long, mostly ones with a stretch of zeros + ba.setLong(1, 0xFFFFFFFFFC003FFFL); + assertEquals(ba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(ba.getBits(78, 12), 0); + assertEquals(ba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(ba.getBits(60, 20), 0x3FFF5); + } + + @Test + public void setBitsFromToTest() { + HeapBitArray ba = new HeapBitArray(128); + + // within a single long + ba.setBits(0, 64, 0x80000000DAB8C730L); + assertEquals(ba.getLong(0), 0x80000000DAB8C730L); + assertEquals(ba.getLong(1), 0); + + ba.setBits(40, 8, 0xA6); + assertEquals(ba.getLong(0), 0x8000A600DAB8C730L); + + // spanning longs + ba.setBits(60, 20, 0x3FFF5); + assertEquals(ba.getLong(0), 0x5000A600DAB8C730L); + assertEquals(ba.getLong(1), 0x3FFFL); + + // found specific failure with this test + ba = new HeapBitArray(10000); + ba.setBits(601 * 10 + 3, 7, 125); + assertEquals(ba.getBits(601 * 10 + 3, 7), 125); + } + @Test public void bitAddresOutOfBoundsTest() { final HeapBitArray ba = new HeapBitArray(1024); diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java deleted file mode 100644 index 487e36576..000000000 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.assertFalse; - -public class BitVectorTests { - - /** - * This test method initializes a QuickBitVectorWrapper with various combinations of bits per entry and number of entries. - * It then calculates the expected length of the bit vector and asserts that the actual size of the bit vector matches the expected length. - * - * Example Input-Output Pairs: - * 1. Input: bitsPerEntry = 2, numEntries = 8 (1L << 3) - * Output: expectedLengthBits = 64 - * - * 2. Input: bitsPerEntry = 3, numEntries = 16 (1L << 4) - * Output: 64 - * - * 3. Input: bitsPerEntry = 33, numEntries = 8 (1L << 3) - * Output: expectedLengthBits = 320 - */ - @Test - static public void testSize(){ - int[] bitsPerEntry = {2, 3, 4, 5, 6, 7, 8, 9, 10, 23, 24, 25, 31, 32, 33}; - long[] numEntries = {1L << 3, 1L<<4, 1L<<8, 1L << 16}; - long nBits ; - long expectedLengthBits ; - - for (int i = 0; i < bitsPerEntry.length; i++){ - for (int j = 0; j < numEntries.length; j++) { - QuickBitVectorWrapper bv = new QuickBitVectorWrapper(bitsPerEntry[i], numEntries[j]); - nBits = bitsPerEntry[i] * numEntries[j]; - expectedLengthBits = 64 * ((nBits % 64 == 0) ? (nBits / 64) : (1 + nBits / 64)); - assertEquals(bv.size(), expectedLengthBits); - } - } - } - - /* - This test amends a few entries in the BitVector and checks that they are appropriately set. - */ - @Test - static public void testSettersAndGetters(){ - QuickBitVectorWrapper bv = new QuickBitVectorWrapper(6, 16); - - // All entries should be False before any updates - for (int i = 0; i < bv.size(); i++){ - assertFalse(bv.get(i), "All entries should be False"); - } - - // Set some values - bv.set(0, true); - assertTrue(bv.get(0), "Value at index 0 should be True"); - - bv.set(32, true) ; - assertTrue(bv.get(32), "Value at index 32 should be True"); - - bv.setFromTo(64, 128, ~0L); - assertTrue(bv.getFromTo(64, 128) == -1L, "Values from 64 to 128 should be set to 1") ; - } -} diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java index c30d7317e..64f92ddc8 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java @@ -22,12 +22,20 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertEquals; +import java.util.ArrayList; import java.util.BitSet; import java.util.HashSet; import java.util.Random; public class QuotientFilterTest { + // this method had been in Bitmap, but was used only to test the QuotientFilter + public static boolean get_fingerprint_bit(long index, long fingerprint) { + long mask = 1 << index; + long and = fingerprint & mask; + return and != 0; + } + /* * This test is based on the example from https://en.wikipedia.org/wiki/Quotient_filter * in "Algorithm Description" section. @@ -36,7 +44,7 @@ public class QuotientFilterTest { * (b,1), (e,4), (f, 7), (c,1), (d,2), (a,1) */ @Test - static public void WikiInsertionTest() { + public void WikiInsertionTest() { int bits_per_entry = 6; // 6 bits per entry => 3 bits fingerprint, resolved internally in the filter. int num_entries_power = 3; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); @@ -74,7 +82,7 @@ static public void WikiInsertionTest() { assertEquals(qf.get_fingerprint(7), F); } - static public int getState(QuotientFilter filter, int slot) { + public int getState(QuotientFilter filter, int slot) { return (filter.is_occupied(slot) ? 1 : 0) << 2 | (filter.is_continuation(slot) ? 1 : 0) << 1 | (filter.is_shifted(slot) ? 1 : 0); @@ -85,7 +93,7 @@ static public int getState(QuotientFilter filter, int slot) { * It performs the same insertions as in Figure 2 and checks for the same result. */ @Test - static public void PaperInsertionTest() { + public void PaperInsertionTest() { int bits_per_entry = 8; int num_entries_power = 4; int num_entries = (int)Math.pow(2, num_entries_power); @@ -116,7 +124,7 @@ static public void PaperInsertionTest() { // test we don't get any false negatives for quotient filter @Test - static public void FalseNegativeTest() { + public void FalseNegativeTest() { int bits_per_entry = 10; int num_entries_power = 10; QuotientFilter filter = new QuotientFilter(num_entries_power, bits_per_entry); @@ -130,7 +138,7 @@ static public void FalseNegativeTest() { * Checks this can be handled by the internal data structure and then deletes one of the keys from the filter. */ @Test - static public void OverflowTest() { + public void OverflowTest() { int bits_per_entry = 8; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); @@ -158,7 +166,7 @@ static public void OverflowTest() { * and the program exits, indicating a test failure. */ @Test - static public void testQuotientFilterInsertionAndIteration() { + public void testQuotientFilterInsertionAndIteration() { int bits_per_entry = 8; int num_entries_power = 4; @@ -166,12 +174,12 @@ static public void testQuotientFilterInsertionAndIteration() { //int fingerprint_size = bits_per_entry - 3; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - qf.insert(0, 2, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 4, false); - qf.insert(0, 15, false); // last slot in the filter - qf.insert(0, 16, false); // outside the bounds + qf.insert(0x1F, 2, false); + qf.insert(0x1F, 3, false); + qf.insert(0x1F, 3, false); + qf.insert(0x1F, 4, false); + qf.insert(0x1F, 15, false); // last slot in the filter + qf.insert(0x1F, 16, false); // outside the bounds qf.pretty_print() ; Iterator it = new Iterator(qf); @@ -181,7 +189,7 @@ static public void testQuotientFilterInsertionAndIteration() { } @Test - static public void testQuotientFilterIterator() { + public void testQuotientFilterIterator() { int bits_per_entry = 8; int num_entries_power = 4; @@ -225,7 +233,7 @@ static public BitSet set_slot_in_test(BitSet result, int bits_per_entry, int slo result.set(index++, is_continuation); result.set(index++, is_shifted); for (int i = 0; i < bits_per_entry - 3; i++) { - result.set(index++, Bitmap.get_fingerprint_bit(i, fingerprint) ); + result.set(index++, get_fingerprint_bit(i, fingerprint) ); } return result; } @@ -274,7 +282,7 @@ static public boolean test_no_false_negatives(QuotientFilter filter, int num_ent for (Integer i : added) { boolean found = filter.search((long)i); if (!found) { - return false ; + return false; } } return true; diff --git a/src/test/java/org/apache/datasketches/theta/SketchesTest.java b/src/test/java/org/apache/datasketches/theta/SketchesTest.java index 6b887448e..84942fe21 100644 --- a/src/test/java/org/apache/datasketches/theta/SketchesTest.java +++ b/src/test/java/org/apache/datasketches/theta/SketchesTest.java @@ -20,6 +20,7 @@ package org.apache.datasketches.theta; import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta.Sketches.getCompactSketchMaxBytes; import static org.apache.datasketches.theta.Sketches.getMaxCompactSketchBytes; import static org.apache.datasketches.theta.Sketches.getMaxIntersectionBytes; import static org.apache.datasketches.theta.Sketches.getMaxUnionBytes; @@ -35,6 +36,7 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -130,7 +132,8 @@ public void checkSetOpMethods() { @Test public void checkUtilMethods() { - final int k = 1024; + final int lgK = 10; + final int k = 1 << lgK; final int maxUnionBytes = getMaxUnionBytes(k); assertEquals(2*k*8+32, maxUnionBytes); @@ -141,6 +144,10 @@ public void checkUtilMethods() { final int maxCompSkBytes = getMaxCompactSketchBytes(k+1); assertEquals(24+(k+1)*8, maxCompSkBytes); + final int compSkMaxBytes = getCompactSketchMaxBytes(lgK); { + assertEquals(compSkMaxBytes, ((2 << lgK) * 15) / 16 + (Family.QUICKSELECT.getMaxPreLongs() << 3)); + } + final int maxSkBytes = getMaxUpdateSketchBytes(k); assertEquals(24+2*k*8, maxSkBytes); } diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 8a45ea574..873a878a0 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -36,18 +36,17 @@ under the License. + + - + - - - - - - - - + + + + @@ -77,7 +76,18 @@ under the License. - + + + + + + +