Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@

/**
* This class holds an array of bits suitable for use in a Bloom Filter
*
*
* <p>Rounds the number of bits up to the smallest multiple of 64 (one long)
* that is not smaller than the specified number.
*/
final class BitArray {
// MAX_BITS using longs, based on array indices being capped at Integer.MAX_VALUE
private static final long MAX_BITS = Integer.MAX_VALUE * (long) Long.SIZE;
private static final long MAX_BITS = Integer.MAX_VALUE * (long) Long.SIZE;

private long numBitsSet_; // if -1, need to recompute value
private boolean isDirty_;
Expand Down Expand Up @@ -68,10 +68,10 @@ static BitArray heapify(final Buffer buffer, final boolean isEmpty) {
if (numLongs < 0) {
throw new SketchesArgumentException("Possible corruption: Must have strictly positive array size. Found: " + numLongs);
}

if (isEmpty) {
return new BitArray((long) numLongs * Long.SIZE);
}
}

buffer.getInt(); // unused

Expand Down Expand Up @@ -120,7 +120,7 @@ long getNumBitsSet() {
numBitsSet_ = 0;
for (final long val : data_) {
numBitsSet_ += Long.bitCount(val);
}
}
}
return numBitsSet_;
}
Expand Down Expand Up @@ -184,7 +184,7 @@ long getSerializedSizeBytes() {
void writeToBuffer(final WritableBuffer wbuf) {
wbuf.putInt(data_.length);
wbuf.putInt(0); // unused

if (!isEmpty()) {
wbuf.putLong(isDirty_ ? -1 : numBitsSet_);
wbuf.putLongArray(data_, 0, data_.length);
Expand All @@ -204,7 +204,7 @@ public String toString() {
}

// prints a long as a series of 0s and 1s as little endian
private String printLong(final long val) {
private static String printLong(final long val) {
final StringBuilder sb = new StringBuilder();
for (int j = 0; j < Long.SIZE; ++j) {
sb.append((val & (1L << j)) != 0 ? "1" : "0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/**
* <p>This class provides methods to help estimate the correct paramters to use when
* creating a Bloom filter, and methods to create the filter using those values.</p>
*
*
* <p>The underlying math is described in the
* <a href='https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions'>
* Wikipedia article on Bloom filters</a>.</p>
Expand Down Expand Up @@ -75,7 +75,7 @@ public static long suggestNumFilterBits(final long maxDistinctItems, final doubl
if (targetFalsePositiveProb <= 0.0 || targetFalsePositiveProb > 1.0) {
throw new SketchesArgumentException("targetFalsePositiveProb must be a valid probability and strictly greater than 0");
}
return (long) Math.round(-maxDistinctItems * Math.log(targetFalsePositiveProb) / (Math.log(2) * Math.log(2)));
return Math.round(-maxDistinctItems * Math.log(targetFalsePositiveProb) / (Math.log(2) * Math.log(2)));
}

/**
Expand All @@ -93,7 +93,7 @@ public static BloomFilter createByAccuracy(final long maxDistinctItems, final do
* using the provided base seed for the hash function.
* @param maxDistinctItems The maximum expected number of distinct items to add to the filter
* @param targetFalsePositiveProb A desired false positive probability per item
* @param seed A base hash seed
* @param seed A base hash seed
* @return A new BloomFilter configured for the given input parameters
*/
public static BloomFilter createByAccuracy(final long maxDistinctItems, final double targetFalsePositiveProb, final long seed) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.datasketches.filters.bloomfilter;