Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4177dc3
add support for mechanism to control filter optimization in historica…
clintropolis Jul 31, 2019
d4d6629
oops
clintropolis Jul 31, 2019
383eeb4
adjust
clintropolis Jul 31, 2019
cc9c9b0
woo
clintropolis Aug 1, 2019
212c9a8
Merge remote-tracking branch 'upstream/master' into filter-tuning
clintropolis Aug 2, 2019
f7b8a49
javadoc
clintropolis Aug 2, 2019
ef28052
review comments
clintropolis Aug 3, 2019
922c740
fix
clintropolis Aug 3, 2019
9587425
default
clintropolis Aug 3, 2019
1f65d05
oops
clintropolis Aug 3, 2019
3fbf5e6
oof
clintropolis Aug 3, 2019
6b86b32
this will fix it
clintropolis Aug 3, 2019
bb48537
more nullable, refactor DimFilter.getRequiredColumns to use Set, form…
clintropolis Aug 5, 2019
d0d7f05
extract class DimFilterToStringBuilder with common code from custom D…
clintropolis Aug 5, 2019
1aa5484
adjust variable naming
clintropolis Aug 5, 2019
359cf4f
missing nullable
clintropolis Aug 6, 2019
2be81b0
more nullable
clintropolis Aug 6, 2019
a6e6a31
fix javadocs
clintropolis Aug 6, 2019
346e8fd
nullable
clintropolis Aug 6, 2019
4402160
Merge remote-tracking branch 'upstream/master' into filter-tuning
clintropolis Aug 7, 2019
b8a3cae
address review comments
clintropolis Aug 7, 2019
cdb63a7
javadocs, precondition
clintropolis Aug 7, 2019
35098b0
nullable
clintropolis Aug 7, 2019
9e6a2e0
rename method to be consistent
clintropolis Aug 7, 2019
8fc735a
review comments
clintropolis Aug 8, 2019
505b668
remove tuning from ColumnComparisonFilter/ColumnComparisonDimFilter
clintropolis Aug 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ public class MomentSketchAggregatorFactory extends AggregatorFactory
public MomentSketchAggregatorFactory(
@JsonProperty("name") final String name,
@JsonProperty("fieldName") final String fieldName,
@Nullable @JsonProperty("k") final Integer k,
@Nullable @JsonProperty("compress") final Boolean compress
@JsonProperty("k") @Nullable final Integer k,
@JsonProperty("compress") @Nullable final Boolean compress
)
{
this(name, fieldName, k, compress, AggregatorUtil.MOMENTS_SKETCH_BUILD_CACHE_TYPE_ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public class TDigestSketchAggregatorFactory extends AggregatorFactory
public TDigestSketchAggregatorFactory(
@JsonProperty("name") final String name,
@JsonProperty("fieldName") final String fieldName,
@Nullable @JsonProperty("compression") final Integer compression
@JsonProperty("compression") @Nullable final Integer compression
)
{
this(name, fieldName, compression, AggregatorUtil.TDIGEST_BUILD_SKETCH_CACHE_TYPE_ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ public class SketchMergeAggregatorFactory extends SketchAggregatorFactory
public SketchMergeAggregatorFactory(
@JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName,
@Nullable @JsonProperty("size") Integer size,
@Nullable @JsonProperty("shouldFinalize") Boolean shouldFinalize,
@Nullable @JsonProperty("isInputThetaSketch") Boolean isInputThetaSketch,
@Nullable @JsonProperty("errorBoundsStdDev") Integer errorBoundsStdDev
@JsonProperty("size") @Nullable Integer size,
@JsonProperty("shouldFinalize") @Nullable Boolean shouldFinalize,
@JsonProperty("isInputThetaSketch") @Nullable Boolean isInputThetaSketch,
@JsonProperty("errorBoundsStdDev") @Nullable Integer errorBoundsStdDev
)
{
super(name, fieldName, size, AggregatorUtil.SKETCH_MERGE_CACHE_TYPE_ID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public class BloomFilterAggregatorFactory extends AggregatorFactory
public BloomFilterAggregatorFactory(
@JsonProperty("name") String name,
@JsonProperty("field") final DimensionSpec field,
@Nullable @JsonProperty("maxNumEntries") Integer maxNumEntries
@JsonProperty("maxNumEntries") @Nullable Integer maxNumEntries
)
{
this.name = name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,21 @@
package org.apache.druid.query.filter;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.hash.HashCode;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.segment.filter.DimensionPredicateFilter;

import java.util.HashSet;
import javax.annotation.Nullable;
import java.util.Objects;
import java.util.Set;

/**
*/
Expand All @@ -41,13 +44,17 @@ public class BloomDimFilter implements DimFilter
private final String dimension;
private final BloomKFilter bloomKFilter;
private final HashCode hash;
@Nullable
private final ExtractionFn extractionFn;
@Nullable
private final FilterTuning filterTuning;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be @Nullable, along with the corresponding constructor parameter

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I've updated all DimFilter implementations to have the appropriate things annotated with @Nullable


@JsonCreator
public BloomDimFilter(
@JsonProperty("dimension") String dimension,
@JsonProperty("bloomKFilter") BloomKFilterHolder bloomKFilterHolder,
@JsonProperty("extractionFn") ExtractionFn extractionFn
@JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
)
{
Preconditions.checkArgument(dimension != null, "dimension must not be null");
Expand All @@ -56,6 +63,13 @@ public BloomDimFilter(
this.bloomKFilter = bloomKFilterHolder.getFilter();
this.hash = bloomKFilterHolder.getFilterHash();
this.extractionFn = extractionFn;
this.filterTuning = filterTuning;
}

@VisibleForTesting
public BloomDimFilter(String dimension, BloomKFilterHolder bloomKFilterHolder, @Nullable ExtractionFn extractionFn)
{
this(dimension, bloomKFilterHolder, extractionFn, null);
}

@Override
Expand Down Expand Up @@ -152,7 +166,8 @@ public boolean applyNull()
};
}
},
extractionFn
extractionFn,
filterTuning
);
}

Expand All @@ -168,20 +183,40 @@ public BloomKFilter getBloomKFilter()
return bloomKFilter;
}

@Nullable
@JsonProperty
public ExtractionFn getExtractionFn()
{
return extractionFn;
}

@Nullable
@JsonInclude(JsonInclude.Include.NON_NULL)
@JsonProperty
public FilterTuning getFilterTuning()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method should be @Nullable

{
return filterTuning;
}

@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
return null;
}

@Override
public Set<String> getRequiredColumns()
{
return ImmutableSet.of(dimension);
}

@Override
public String toString()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add filterTuning in the toString() form.

{
if (extractionFn != null) {
return StringUtils.format("%s(%s) = %s", extractionFn, dimension, hash.toString());
} else {
return StringUtils.format("%s = %s", dimension, hash.toString());
}
return new DimFilterToStringBuilder().appendDimension(dimension, extractionFn)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoa. This is cool.

.appendEquals(hash.toString())
.appendFilterTuning(filterTuning)
.build();
}

@Override
Expand All @@ -193,36 +228,16 @@ public boolean equals(Object o)
if (o == null || getClass() != o.getClass()) {
return false;
}

BloomDimFilter that = (BloomDimFilter) o;

if (!dimension.equals(that.dimension)) {
return false;
}
if (hash != null ? !hash.equals(that.hash) : that.hash != null) {
return false;
}
return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null;
}

@Override
public RangeSet<String> getDimensionRangeSet(String dimension)
{
return null;
}

@Override
public HashSet<String> getRequiredColumns()
{
return Sets.newHashSet(dimension);
return dimension.equals(that.dimension) &&
hash.equals(that.hash) &&
Objects.equals(extractionFn, that.extractionFn) &&
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything about extractionFn: the field, constructor parameter, getter should be @Nullable

Objects.equals(filterTuning, that.filterTuning);
}

@Override
public int hashCode()
{
int result = dimension.hashCode();
result = 31 * result + (hash != null ? hash.hashCode() : 0);
result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0);
return result;
return Objects.hash(dimension, hash, extractionFn, filterTuning);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ public DimFilter toDruidFilter(
return new BloomDimFilter(
druidExpression.getSimpleExtraction().getColumn(),
holder,
druidExpression.getSimpleExtraction().getExtractionFn()
druidExpression.getSimpleExtraction().getExtractionFn(),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Constructor annotated @VisibleForTesting doesn't mean it can't be used in production code.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is true, but I was trying to avoid using these constructors in production code to minimize the chance of errors and make things more obvious.

I initially had it more like you are suggesting and was using some of the 'test' constructors where we were always passing null, but this comment suggested the way it is now and I agree, I think the production code explicitly passing in null for the filterTuning parameter makes it less ambiguous about if not having a tuning is intentional or not, without requiring a comment.

Copy link
Copy Markdown
Member

@leventov leventov Aug 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's anyway strange that there is a chaining constructor and any code is not using an existing constructor with fewer parameters.

I think it would be better to create static factory methods in this situation. Some of them may have "InTest" suffix to strongly indicate that they are not for prod code.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but the reason I have a second set of constructors at all was so I didn't have to change hundreds of lines of test code to add a null parameter, so making a static method would sort of defeat the purpose of that because the lines would change anyway.

I do think this is worth doing, I'll add this to the ticket i create about refactoring DimFilter to also add useful test filter creation static methods, so we can also eliminate passing in null everywhere for filters without extractionFn.

Copy link
Copy Markdown
Member Author

@clintropolis clintropolis Aug 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mentioned making static test object creators to decouple test filters from json schema in #8256

null
);
} else if (virtualColumnRegistry != null) {
VirtualColumn virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
Expand All @@ -114,6 +115,7 @@ public DimFilter toDruidFilter(
return new BloomDimFilter(
virtualColumn.getOutputName(),
holder,
null,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here.

null
);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public ApproximateHistogramAggregatorFactory(
@JsonProperty("numBuckets") Integer numBuckets,
@JsonProperty("lowerLimit") Float lowerLimit,
@JsonProperty("upperLimit") Float upperLimit,
@Nullable @JsonProperty("finalizeAsBase64Binary") Boolean finalizeAsBase64Binary
@JsonProperty("finalizeAsBase64Binary") @Nullable Boolean finalizeAsBase64Binary

)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public ApproximateHistogramFoldingAggregatorFactory(
@JsonProperty("numBuckets") Integer numBuckets,
@JsonProperty("lowerLimit") Float lowerLimit,
@JsonProperty("upperLimit") Float upperLimit,
@Nullable @JsonProperty("finalizeAsBase64Binary") Boolean finalizeAsBase64Binary
@JsonProperty("finalizeAsBase64Binary") @Nullable Boolean finalizeAsBase64Binary
)
{
super(name, fieldName, resolution, numBuckets, lowerLimit, upperLimit, finalizeAsBase64Binary);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ public class FixedBucketsHistogramAggregatorFactory extends AggregatorFactory
public FixedBucketsHistogramAggregatorFactory(
@JsonProperty("name") String name,
@JsonProperty("fieldName") String fieldName,
@Nullable @JsonProperty("numBuckets") Integer numBuckets,
@JsonProperty("numBuckets") @Nullable Integer numBuckets,
@JsonProperty("lowerLimit") double lowerLimit,
@JsonProperty("upperLimit") double upperLimit,
@JsonProperty("outlierHandlingMode") FixedBucketsHistogram.OutlierHandlingMode outlierHandlingMode,
@Nullable @JsonProperty("finalizeAsBase64Binary") Boolean finalizeAsBase64Binary
@JsonProperty("finalizeAsBase64Binary") @Nullable Boolean finalizeAsBase64Binary
)
{
this.name = name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ public JdbcExtractionNamespace(
@NotNull @JsonProperty(value = "table", required = true) final String table,
@NotNull @JsonProperty(value = "keyColumn", required = true) final String keyColumn,
@NotNull @JsonProperty(value = "valueColumn", required = true) final String valueColumn,
@Nullable @JsonProperty(value = "tsColumn", required = false) final String tsColumn,
@Nullable @JsonProperty(value = "filter", required = false) final String filter,
@Min(0) @Nullable @JsonProperty(value = "pollPeriod", required = false) final Period pollPeriod
@JsonProperty(value = "tsColumn", required = false) @Nullable final String tsColumn,
@JsonProperty(value = "filter", required = false) @Nullable final String filter,
@Min(0) @JsonProperty(value = "pollPeriod", required = false) @Nullable final Period pollPeriod
)
{
this.connectorConfig = Preconditions.checkNotNull(connectorConfig, "connectorConfig");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public UriExtractionNamespace(
String fileRegex,
@JsonProperty(value = "namespaceParseSpec", required = true)
FlatDataParser namespaceParseSpec,
@Min(0) @Nullable @JsonProperty(value = "pollPeriod", required = false)
@Min(0) @JsonProperty(value = "pollPeriod", required = false) @Nullable
Period pollPeriod,
@Deprecated
@JsonProperty(value = "versionRegex", required = false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public class OrcHadoopInputRowParser implements InputRowParser<OrcStruct>
@JsonCreator
public OrcHadoopInputRowParser(
@JsonProperty("parseSpec") ParseSpec parseSpec,
@Nullable @JsonProperty("binaryAsString") Boolean binaryAsString
@JsonProperty("binaryAsString") @Nullable Boolean binaryAsString
)
{
this.parseSpec = parseSpec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,15 @@ public CompactionTask(
@JsonProperty("id") final String id,
@JsonProperty("resource") final TaskResource taskResource,
@JsonProperty("dataSource") final String dataSource,
@Nullable @JsonProperty("interval") final Interval interval,
@Nullable @JsonProperty("segments") final List<DataSegment> segments,
@Nullable @JsonProperty("dimensions") final DimensionsSpec dimensions,
@Nullable @JsonProperty("dimensionsSpec") final DimensionsSpec dimensionsSpec,
@Nullable @JsonProperty("metricsSpec") final AggregatorFactory[] metricsSpec,
@Nullable @JsonProperty("segmentGranularity") final Granularity segmentGranularity,
@Nullable @JsonProperty("targetCompactionSizeBytes") final Long targetCompactionSizeBytes,
@Nullable @JsonProperty("tuningConfig") final IndexTuningConfig tuningConfig,
@Nullable @JsonProperty("context") final Map<String, Object> context,
@JsonProperty("interval") @Nullable final Interval interval,
@JsonProperty("segments") @Nullable final List<DataSegment> segments,
@JsonProperty("dimensions") @Nullable final DimensionsSpec dimensions,
@JsonProperty("dimensionsSpec") @Nullable final DimensionsSpec dimensionsSpec,
@JsonProperty("metricsSpec") @Nullable final AggregatorFactory[] metricsSpec,
@JsonProperty("segmentGranularity") @Nullable final Granularity segmentGranularity,
@JsonProperty("targetCompactionSizeBytes") @Nullable final Long targetCompactionSizeBytes,
@JsonProperty("tuningConfig") @Nullable final IndexTuningConfig tuningConfig,
@JsonProperty("context") @Nullable final Map<String, Object> context,
@JacksonInject ObjectMapper jsonMapper,
@JacksonInject AuthorizerMapper authorizerMapper,
@JacksonInject ChatHandlerProvider chatHandlerProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory<Input
@JsonCreator
public IngestSegmentFirehoseFactory(
@JsonProperty("dataSource") final String dataSource,
@Nullable @JsonProperty("interval") Interval interval,
@JsonProperty("interval") @Nullable Interval interval,
// Specifying "segments" is intended only for when this FirehoseFactory has split itself,
// not for direct end user use.
@Nullable @JsonProperty("segments") List<WindowedSegmentId> segmentIds,
@JsonProperty("segments") @Nullable List<WindowedSegmentId> segmentIds,
@JsonProperty("filter") DimFilter dimFilter,
@JsonProperty("dimensions") List<String> dimensions,
@JsonProperty("metrics") List<String> metrics,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public ImmutableWorkerInfo(
@JsonProperty("availabilityGroups") Set<String> availabilityGroups,
@JsonProperty("runningTasks") Collection<String> runningTasks,
@JsonProperty("lastCompletedTaskTime") DateTime lastCompletedTaskTime,
@Nullable @JsonProperty("blacklistedUntil") DateTime blacklistedUntil
@JsonProperty("blacklistedUntil") @Nullable DateTime blacklistedUntil
)
{
this.worker = worker;
Expand Down
4 changes: 2 additions & 2 deletions processing/src/main/java/org/apache/druid/query/Druids.java
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ public TimeseriesQueryBuilder filters(String dimensionName, String value)

public TimeseriesQueryBuilder filters(String dimensionName, String value, String... values)
{
dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null);
dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null, null);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could call the old constructor

return this;
}

Expand Down Expand Up @@ -361,7 +361,7 @@ public SearchQueryBuilder dataSource(DataSource d)

public SearchQueryBuilder filters(String dimensionName, String value)
{
dimFilter = new SelectorDimFilter(dimensionName, value, null);
dimFilter = new SelectorDimFilter(dimensionName, value, null, null);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could call the old constructor

return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public FilteredAggregatorFactory(
public FilteredAggregatorFactory(
@JsonProperty("aggregator") AggregatorFactory delegate,
@JsonProperty("filter") DimFilter dimFilter,
@Nullable @JsonProperty("name") String name
@JsonProperty("name") @Nullable String name
)
{
Preconditions.checkNotNull(delegate, "aggregator");
Expand Down Expand Up @@ -243,7 +243,8 @@ public AggregatorFactory optimizeForSegment(PerSegmentQueryOptimizationContext o
new IntervalDimFilter(
intervalDimFilter.getDimension(),
effectiveFilterIntervals,
intervalDimFilter.getExtractionFn()
intervalDimFilter.getExtractionFn(),
intervalDimFilter.getFilterTuning()
),
this.name
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public class BucketExtractionFn implements ExtractionFn

@JsonCreator
public BucketExtractionFn(
@Nullable @JsonProperty("size") Double size,
@Nullable @JsonProperty("offset") Double offset
@JsonProperty("size") @Nullable Double size,
@JsonProperty("offset") @Nullable Double offset
)
{
this.size = size == null ? 1 : size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
*/
Expand Down Expand Up @@ -99,7 +100,7 @@ public RangeSet<String> getDimensionRangeSet(String dimension)
}

@Override
public HashSet<String> getRequiredColumns()
public Set<String> getRequiredColumns()
{
HashSet<String> requiredColumns = new HashSet<>();
fields.forEach(field -> requiredColumns.addAll(field.getRequiredColumns()));
Expand Down
Loading