Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/content/development/extensions-core/bloom-filter.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp
"type" : "bloom",
"dimension" : <dimension_name>,
"bloomKFilter" : <serialized_bytes_for_BloomKFilter>,
"extractionFn" : <extraction_fn>
"extractionFn" : <extraction_fn>,
"useBitmapIndex" : <boolean>
}
```

Expand All @@ -53,6 +54,7 @@ Internally, this implementation of bloom filter uses Murmur3 fast non-cryptograp
|`dimension` |The dimension to filter over. | yes |
|`bloomKFilter` |Base64 encoded Binary representation of `org.apache.hive.common.util.BloomKFilter`| yes |
|`extractionFn`|[Extraction function](./../dimensionspecs.html#extraction-functions) to apply to the dimension values |no|
|`useBitmapIndex` |Use bitmap indexes for filter the dimension. This can be slower for higher cardinality dimensions.|no (default `false`)|


### Serialized Format for BloomKFilter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,30 @@
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.hash.HashCode;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.segment.filter.DimensionPredicateFilter;

import javax.annotation.Nullable;
import java.util.HashSet;
import java.util.Objects;

/**
*/
public class BloomDimFilter implements DimFilter
{

private final String dimension;
private final BloomKFilter bloomKFilter;
private final HashCode hash;
private final ExtractionFn extractionFn;
private final boolean useBitmapIndex;

@JsonCreator
public BloomDimFilter(
@JsonProperty("dimension") String dimension,
@JsonProperty("bloomKFilter") BloomKFilterHolder bloomKFilterHolder,
@JsonProperty("extractionFn") ExtractionFn extractionFn
@JsonProperty("extractionFn") ExtractionFn extractionFn,
@Nullable @JsonProperty("useBitmapIndex") Boolean useBitmapIndex
)
{
Preconditions.checkArgument(dimension != null, "dimension must not be null");
Expand All @@ -56,6 +58,7 @@ public BloomDimFilter(
this.bloomKFilter = bloomKFilterHolder.getFilter();
this.hash = bloomKFilterHolder.getFilterHash();
this.extractionFn = extractionFn;
this.useBitmapIndex = useBitmapIndex != null && useBitmapIndex;
}

@Override
Expand Down Expand Up @@ -153,7 +156,14 @@ public boolean applyNull()
}
},
extractionFn
);
)
{
@Override
public boolean supportsBitmapIndex(BitmapIndexSelector selector)
{
return useBitmapIndex;
}
};
}

@JsonProperty
Expand All @@ -174,14 +184,17 @@ public ExtractionFn getExtractionFn()
return extractionFn;
}


Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to add this blank line.

@Override
public String toString()
public RangeSet<String> getDimensionRangeSet(String dimension)
{
if (extractionFn != null) {
return StringUtils.format("%s(%s) = %s", extractionFn, dimension, hash.toString());
} else {
return StringUtils.format("%s = %s", dimension, hash.toString());
}
return null;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better if you could add the @Nullable annotation to this method.

}

@Override
public HashSet<String> getRequiredColumns()
{
return Sets.newHashSet(dimension);
}

@Override
Expand All @@ -193,36 +206,27 @@ public boolean equals(Object o)
if (o == null || getClass() != o.getClass()) {
return false;
}

BloomDimFilter that = (BloomDimFilter) o;

if (!dimension.equals(that.dimension)) {
return false;
}
if (hash != null ? !hash.equals(that.hash) : that.hash != null) {
return false;
}
return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null;
}

@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
return null;
return useBitmapIndex == that.useBitmapIndex &&
Objects.equals(dimension, that.dimension) &&
Objects.equals(hash, that.hash) &&
Objects.equals(extractionFn, that.extractionFn);
}

@Override
public HashSet<String> getRequiredColumns()
public int hashCode()
{
return Sets.newHashSet(dimension);
return Objects.hash(dimension, hash, extractionFn, useBitmapIndex);
}

@Override
public int hashCode()
public String toString()
{
int result = dimension.hashCode();
result = 31 * result + (hash != null ? hash.hashCode() : 0);
result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0);
return result;
return "BloomDimFilter{" +
"dimension='" + dimension + '\'' +
", hash=" + hash +
", extractionFn=" + extractionFn +
", useBitmapIndex=" + useBitmapIndex +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ public DimFilter toDruidFilter(
return new BloomDimFilter(
druidExpression.getSimpleExtraction().getColumn(),
holder,
druidExpression.getSimpleExtraction().getExtractionFn()
druidExpression.getSimpleExtraction().getExtractionFn(),
false
);
} else {
// expression virtual columns not currently supported
Expand Down
Loading