diff --git a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java index 1e007edb194f..d28a8e8fd4be 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java @@ -74,7 +74,8 @@ public class BoundFilterBenchmark String.valueOf(START_INT), true, false, - false + false, + null ) ); @@ -85,7 +86,8 @@ public class BoundFilterBenchmark String.valueOf(END_INT), false, false, - false + false, + null ) ); @@ -96,7 +98,8 @@ public class BoundFilterBenchmark String.valueOf(END_INT), false, false, - false + false, + null ) ); @@ -107,7 +110,8 @@ public class BoundFilterBenchmark String.valueOf(START_INT), true, false, - true + true, + null ) ); @@ -118,7 +122,8 @@ public class BoundFilterBenchmark String.valueOf(END_INT), false, false, - true + true, + null ) ); @@ -129,7 +134,8 @@ public class BoundFilterBenchmark String.valueOf(END_INT), false, false, - true + true, + null ) ); diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index 60b2a26cd4d5..ba51535dc084 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -259,7 +259,7 @@ For instance the following filter ```json { "filter": { - "type": "extraction", + "type": "selector", "dimension": "product", "value": "bar_1", "extractionFn": { diff --git a/docs/content/querying/filters.md b/docs/content/querying/filters.md index 629151069f49..bebdd93c62ff 100644 --- a/docs/content/querying/filters.md +++ b/docs/content/querying/filters.md @@ -16,6 +16,8 @@ The grammar for a SELECTOR filter is as follows: This is the equivalent of `WHERE = ''`. +The selector filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + ### Regular expression filter The regular expression filter is similar to the selector filter, but using regular expressions. It matches the specified dimension with the given pattern. The pattern can be any standard [Java regular expression](http://docs.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html). @@ -24,6 +26,9 @@ The regular expression filter is similar to the selector filter, but using regul "filter": { "type": "regex", "dimension": , "pattern": } ``` +The regex filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + + ### Logical expression filters #### AND @@ -81,11 +86,19 @@ The following matches any dimension values for the dimension `name` between `'ba } ``` +The JavaScript filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + + ### Extraction filter +
+The extraction filter is now deprecated. The selector filter with an extraction function specified +provides identical functionality and should be used instead. +
+ Extraction filter matches a dimension using some specific [Extraction function](./dimensionspecs.html#extraction-functions). The following filter matches the values for which the extraction function has transformation entry `input_key=output_value` where - `output_value` is equal to the filter `value` and `input_key` is present as dimension. +`output_value` is equal to the filter `value` and `input_key` is present as dimension. **Example** The following matches dimension values in `[product_1, product_3, product_5]` for the column `product` @@ -110,6 +123,7 @@ The following matches dimension values in `[product_1, product_3, product_5]` fo } } ``` + ### Search filter Search filters can be used to filter on partial string matches. @@ -132,6 +146,10 @@ Search filters can be used to filter on partial string matches. |type|This String should always be "search".|yes| |dimension|The dimension to perform the search over.|yes| |query|A JSON object for the type of search. See below for more information.|yes| +|extractionFn|[Extraction function](#filtering-with-extraction-functions) to apply to the dimension|no| + +The search filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + ### In filter @@ -151,6 +169,9 @@ The grammar for a IN filter is as follows: } ``` +The IN filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + + ### Bound filter Bound filter can be used to filter by comparing dimension values to an upper value or/and a lower value. @@ -158,6 +179,8 @@ By default Comparison is string based and **case sensitive**. To use numeric comparison you can set `alphaNumeric` to `true`. By default the bound filter is a not a strict inclusion `inputString <= upper && inputSting >= lower`. +The bound filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details. + The grammar for a bound filter is as follows: ```json @@ -246,6 +269,8 @@ For instance suppose lower bound is `100` and value is `10K` the filter will mat Now suppose that the lower bound is `110` the filter will not match (`110 < 10K` returns `false`) + + #### Search Query Spec ##### Insensitive Contains @@ -270,3 +295,38 @@ Now suppose that the lower bound is `110` the filter will not match (`110 < 10K` |type|This String should always be "contains".|yes| |value|A String value to run the search over.|yes| |caseSensitive|Whether two string should be compared as case sensitive or not|yes| + + +### Filtering with Extraction Functions +Some filters optionally support the use of extraction functions. +An extraction function is defined by setting the "extractionFn" field on a filter. +See [Extraction function](./dimensionspecs.html#extraction-functions) for more details on extraction functions. + +If specified, the extraction function will be used to transform input values before the filter is applied. +The example below shows a selector filter combined with an extraction function. This filter will transform input values +according to the values defined in the lookup map; transformed values will then be matched with the string "bar_1". + + +**Example** +The following matches dimension values in `[product_1, product_3, product_5]` for the column `product` + +```json +{ + "filter": { + "type": "selector", + "dimension": "product", + "value": "bar_1", + "extractionFn": { + "type": "lookup", + "lookup": { + "type": "map", + "map": { + "product_1": "bar_1", + "product_5": "bar_1", + "product_3": "bar_1" + } + } + } + } +} +``` \ No newline at end of file diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceIngestionSpecTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceIngestionSpecTest.java index e68999563db3..502adef96684 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceIngestionSpecTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceIngestionSpecTest.java @@ -48,7 +48,7 @@ public void testSingleIntervalSerde() throws Exception interval, null, null, - new SelectorDimFilter("dim", "value"), + new SelectorDimFilter("dim", "value", null), QueryGranularity.DAY, Lists.newArrayList("d1", "d2"), Lists.newArrayList("m1", "m2", "m3"), @@ -132,7 +132,7 @@ public void testMultiIntervalSerde() throws Exception 128 ) ), - new SelectorDimFilter("dim", "value"), + new SelectorDimFilter("dim", "value", null), QueryGranularity.DAY, Lists.newArrayList("d1", "d2"), Lists.newArrayList("m1", "m2", "m3"), diff --git a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java index ae94b5376d7f..b131781dc3bb 100644 --- a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java @@ -305,7 +305,7 @@ public List getLocations() new IngestSegmentFirehoseFactory( DATA_SOURCE_NAME, FOREVER, - new SelectorDimFilter(DIM_NAME, DIM_VALUE), + new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, Guice.createInjector( diff --git a/processing/src/main/java/io/druid/query/Druids.java b/processing/src/main/java/io/druid/query/Druids.java index d7a0870e2fd6..8427a7a2cd85 100644 --- a/processing/src/main/java/io/druid/query/Druids.java +++ b/processing/src/main/java/io/druid/query/Druids.java @@ -163,9 +163,9 @@ public OrDimFilterBuilder copy(OrDimFilterBuilder builder) public OrDimFilterBuilder fields(String dimensionName, String value, String... values) { - fields = Lists.newArrayList(new SelectorDimFilter(dimensionName, value)); + fields = Lists.newArrayList(new SelectorDimFilter(dimensionName, value, null)); for (String val : values) { - fields.add(new SelectorDimFilter(dimensionName, val)); + fields.add(new SelectorDimFilter(dimensionName, val, null)); } return this; } @@ -256,7 +256,7 @@ public SelectorDimFilterBuilder() public SelectorDimFilter build() { - return new SelectorDimFilter(dimension, value); + return new SelectorDimFilter(dimension, value, null); } public SelectorDimFilterBuilder copy(SelectorDimFilterBuilder builder) @@ -459,13 +459,13 @@ public TimeseriesQueryBuilder intervals(List l) public TimeseriesQueryBuilder filters(String dimensionName, String value) { - dimFilter = new SelectorDimFilter(dimensionName, value); + dimFilter = new SelectorDimFilter(dimensionName, value, null); return this; } public TimeseriesQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values)); + dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null); return this; } @@ -615,13 +615,13 @@ public SearchQueryBuilder dataSource(DataSource d) public SearchQueryBuilder filters(String dimensionName, String value) { - dimFilter = new SelectorDimFilter(dimensionName, value); + dimFilter = new SelectorDimFilter(dimensionName, value, null); return this; } public SearchQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values)); + dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null); return this; } @@ -1159,13 +1159,13 @@ public SelectQueryBuilder context(Map c) public SelectQueryBuilder filters(String dimensionName, String value) { - dimFilter = new SelectorDimFilter(dimensionName, value); + dimFilter = new SelectorDimFilter(dimensionName, value, null); return this; } public SelectQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values)); + dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null); return this; } diff --git a/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java index 7a068d035ba4..d1dcd453704f 100644 --- a/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/BoundDimFilter.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.filter.BoundFilter; import java.nio.ByteBuffer; @@ -35,6 +36,7 @@ public class BoundDimFilter implements DimFilter private final boolean lowerStrict; private final boolean upperStrict; private final boolean alphaNumeric; + private final ExtractionFn extractionFn; @JsonCreator public BoundDimFilter( @@ -43,7 +45,8 @@ public BoundDimFilter( @JsonProperty("upper") String upper, @JsonProperty("lowerStrict") Boolean lowerStrict, @JsonProperty("upperStrict") Boolean upperStrict, - @JsonProperty("alphaNumeric") Boolean alphaNumeric + @JsonProperty("alphaNumeric") Boolean alphaNumeric, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { this.dimension = Preconditions.checkNotNull(dimension, "dimension can not be null"); @@ -53,6 +56,7 @@ public BoundDimFilter( this.lowerStrict = (lowerStrict == null) ? false : lowerStrict; this.upperStrict = (upperStrict == null) ? false : upperStrict; this.alphaNumeric = (alphaNumeric == null) ? false : alphaNumeric; + this.extractionFn = extractionFn; } @JsonProperty @@ -101,6 +105,12 @@ public boolean hasUpperBound() return upper != null; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + @Override public byte[] getCacheKey() { @@ -118,11 +128,14 @@ public byte[] getCacheKey() byte upperStrictByte = (this.isUpperStrict() == false) ? 0x0 : (byte) 1; byte AlphaNumericByte = (this.isAlphaNumeric() == false) ? 0x0 : (byte) 1; + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); + ByteBuffer boundCacheBuffer = ByteBuffer.allocate( - 8 + 9 + dimensionBytes.length + upperBytes.length + lowerBytes.length + + extractionFnBytes.length ); boundCacheBuffer.put(DimFilterCacheHelper.BOUND_CACHE_ID) .put(boundType) @@ -134,7 +147,9 @@ public byte[] getCacheKey() .put(DimFilterCacheHelper.STRING_SEPARATOR) .put(upperBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR) - .put(lowerBytes); + .put(lowerBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes); return boundCacheBuffer.array(); } @@ -156,7 +171,7 @@ public boolean equals(Object o) if (this == o) { return true; } - if (!(o instanceof BoundDimFilter)) { + if (o == null || getClass() != o.getClass()) { return false; } @@ -177,7 +192,12 @@ public boolean equals(Object o) if (getUpper() != null ? !getUpper().equals(that.getUpper()) : that.getUpper() != null) { return false; } - return !(getLower() != null ? !getLower().equals(that.getLower()) : that.getLower() != null); + if (getLower() != null ? !getLower().equals(that.getLower()) : that.getLower() != null) { + return false; + } + return getExtractionFn() != null + ? getExtractionFn().equals(that.getExtractionFn()) + : that.getExtractionFn() == null; } @@ -190,6 +210,7 @@ public int hashCode() result = 31 * result + (isLowerStrict() ? 1 : 0); result = 31 * result + (isUpperStrict() ? 1 : 0); result = 31 * result + (isAlphaNumeric() ? 1 : 0); + result = 31 * result + (getExtractionFn() != null ? getExtractionFn().hashCode() : 0); return result; } } diff --git a/processing/src/main/java/io/druid/query/filter/DimFilters.java b/processing/src/main/java/io/druid/query/filter/DimFilters.java index 8c084fbb09c8..6db330871095 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilters.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilters.java @@ -34,7 +34,7 @@ public class DimFilters { public static SelectorDimFilter dimEquals(String dimension, String value) { - return new SelectorDimFilter(dimension, value); + return new SelectorDimFilter(dimension, value, null); } public static AndDimFilter and(DimFilter... filters) @@ -64,7 +64,7 @@ public static NotDimFilter not(DimFilter filter) public static RegexDimFilter regex(String dimension, String pattern) { - return new RegexDimFilter(dimension, pattern); + return new RegexDimFilter(dimension, pattern, null); } public static DimFilter dimEquals(final String dimension, String... values) diff --git a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java index 81bf20e4c9b8..0048b8c5bd06 100644 --- a/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/ExtractionDimFilter.java @@ -22,17 +22,21 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; +import com.google.common.base.Strings; import com.metamx.common.StringUtils; import io.druid.query.extraction.ExtractionFn; import io.druid.query.lookup.LookupExtractionFn; import io.druid.query.lookup.LookupExtractor; -import io.druid.segment.filter.ExtractionFilter; +import io.druid.segment.filter.SelectorFilter; import java.nio.ByteBuffer; import java.util.List; +import java.util.Objects; /** + * This class is deprecated, use SelectorDimFilter instead: {@link io.druid.query.filter.SelectorDimFilter} */ +@Deprecated public class ExtractionDimFilter implements DimFilter { private final String dimension; @@ -96,22 +100,13 @@ public byte[] getCacheKey() @Override public DimFilter optimize() { - if (this.getExtractionFn() instanceof LookupExtractionFn - && ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) { - LookupExtractor lookup = ((LookupExtractionFn) this.getExtractionFn()).getLookup(); - final List keys = lookup.unapply(this.getValue()); - final String dimensionName = this.getDimension(); - if (!keys.isEmpty()) { - return new InDimFilter(dimensionName, keys); - } - } - return this; + return new SelectorDimFilter(dimension, value, extractionFn).optimize(); } @Override public Filter toFilter() { - return new ExtractionFilter(dimension, value, extractionFn); + return new SelectorFilter(dimension, value, extractionFn); } @Override diff --git a/processing/src/main/java/io/druid/query/filter/InDimFilter.java b/processing/src/main/java/io/druid/query/filter/InDimFilter.java index e9167f5a9393..2b1899195409 100644 --- a/processing/src/main/java/io/druid/query/filter/InDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/InDimFilter.java @@ -29,18 +29,28 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; import io.druid.segment.filter.InFilter; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class InDimFilter implements DimFilter { private final List values; private final String dimension; + private final ExtractionFn extractionFn; @JsonCreator - public InDimFilter(@JsonProperty("dimension") String dimension, @JsonProperty("values") List values) + public InDimFilter( + @JsonProperty("dimension") String dimension, + @JsonProperty("values") List values, + @JsonProperty("extractionFn") ExtractionFn extractionFn + ) { Preconditions.checkNotNull(dimension, "dimension can not be null"); Preconditions.checkArgument(values != null && !values.isEmpty(), "values can not be null or empty"); @@ -58,6 +68,7 @@ public String apply(String input) ) ); this.dimension = dimension; + this.extractionFn = extractionFn; } @JsonProperty @@ -72,6 +83,12 @@ public List getValues() return values; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + @Override public byte[] getCacheKey() { @@ -84,10 +101,13 @@ public byte[] getCacheKey() valuesBytesSize += valuesBytes[index].length + 1; ++index; } + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); - ByteBuffer filterCacheKey = ByteBuffer.allocate(2 + dimensionBytes.length + valuesBytesSize) + ByteBuffer filterCacheKey = ByteBuffer.allocate(3 + dimensionBytes.length + valuesBytesSize + extractionFnBytes.length) .put(DimFilterCacheHelper.IN_CACHE_ID) .put(dimensionBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR); for (byte[] bytes : valuesBytes) { filterCacheKey.put(bytes) @@ -105,15 +125,7 @@ public DimFilter optimize() @Override public Filter toFilter() { - return new InFilter(dimension, ImmutableSet.copyOf(values)); - } - - @Override - public int hashCode() - { - int result = getValues().hashCode(); - result = 31 * result + getDimension().hashCode(); - return result; + return new InFilter(dimension, ImmutableSet.copyOf(values), extractionFn); } @Override @@ -122,16 +134,28 @@ public boolean equals(Object o) if (this == o) { return true; } - if (!(o instanceof InDimFilter)) { + if (o == null || getClass() != o.getClass()) { return false; } InDimFilter that = (InDimFilter) o; - if (!values.equals(that.values)) { + if (values != null ? !values.equals(that.values) : that.values != null) { + return false; + } + if (!dimension.equals(that.dimension)) { return false; } - return dimension.equals(that.dimension); + return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null; + + } + @Override + public int hashCode() + { + int result = values != null ? values.hashCode() : 0; + result = 31 * result + dimension.hashCode(); + result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); + return result; } } diff --git a/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java b/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java index 3e8713520143..f068d4aadaba 100644 --- a/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/JavaScriptDimFilter.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.filter.JavaScriptFilter; import java.nio.ByteBuffer; @@ -31,17 +32,20 @@ public class JavaScriptDimFilter implements DimFilter { private final String dimension; private final String function; + private final ExtractionFn extractionFn; @JsonCreator public JavaScriptDimFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("function") String function + @JsonProperty("function") String function, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); Preconditions.checkArgument(function != null, "function must not be null"); this.dimension = dimension; this.function = function; + this.extractionFn = extractionFn; } @JsonProperty @@ -56,17 +60,26 @@ public String getFunction() return function; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + @Override public byte[] getCacheKey() { final byte[] dimensionBytes = StringUtils.toUtf8(dimension); final byte[] functionBytes = StringUtils.toUtf8(function); + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); - return ByteBuffer.allocate(2 + dimensionBytes.length + functionBytes.length) + return ByteBuffer.allocate(3 + dimensionBytes.length + functionBytes.length + extractionFnBytes.length) .put(DimFilterCacheHelper.JAVASCRIPT_CACHE_ID) .put(dimensionBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR) .put(functionBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes) .array(); } @@ -79,7 +92,7 @@ public DimFilter optimize() @Override public Filter toFilter() { - return new JavaScriptFilter(dimension, function); + return new JavaScriptFilter(dimension, function, extractionFn); } @Override @@ -88,6 +101,7 @@ public String toString() return "JavaScriptDimFilter{" + "dimension='" + dimension + '\'' + ", function='" + function + '\'' + + ", extractionFn='" + extractionFn + '\'' + '}'; } } diff --git a/processing/src/main/java/io/druid/query/filter/RegexDimFilter.java b/processing/src/main/java/io/druid/query/filter/RegexDimFilter.java index 1168efa565e5..5b6a4235bb73 100644 --- a/processing/src/main/java/io/druid/query/filter/RegexDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/RegexDimFilter.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; import io.druid.segment.filter.RegexFilter; import java.nio.ByteBuffer; @@ -33,17 +34,20 @@ public class RegexDimFilter implements DimFilter { private final String dimension; private final String pattern; + private final ExtractionFn extractionFn; @JsonCreator public RegexDimFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("pattern") String pattern + @JsonProperty("pattern") String pattern, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); Preconditions.checkArgument(pattern != null, "pattern must not be null"); this.dimension = dimension; this.pattern = pattern; + this.extractionFn = extractionFn; } @JsonProperty @@ -58,17 +62,26 @@ public String getPattern() return pattern; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + @Override public byte[] getCacheKey() { final byte[] dimensionBytes = StringUtils.toUtf8(dimension); final byte[] patternBytes = StringUtils.toUtf8(pattern); + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); - return ByteBuffer.allocate(2 + dimensionBytes.length + patternBytes.length) + return ByteBuffer.allocate(3 + dimensionBytes.length + patternBytes.length + extractionFnBytes.length) .put(DimFilterCacheHelper.REGEX_CACHE_ID) .put(dimensionBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR) .put(patternBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes) .array(); } @@ -81,7 +94,7 @@ public DimFilter optimize() @Override public Filter toFilter() { - return new RegexFilter(dimension, pattern); + return new RegexFilter(dimension, pattern, extractionFn); } @Override @@ -90,6 +103,7 @@ public String toString() return "RegexDimFilter{" + "dimension='" + dimension + '\'' + ", pattern='" + pattern + '\'' + + ", extractionFn='" + extractionFn + '\'' + '}'; } } diff --git a/processing/src/main/java/io/druid/query/filter/SearchQueryDimFilter.java b/processing/src/main/java/io/druid/query/filter/SearchQueryDimFilter.java index 876ba58bba97..dcceae7b21b1 100644 --- a/processing/src/main/java/io/druid/query/filter/SearchQueryDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SearchQueryDimFilter.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.search.search.SearchQuerySpec; import io.druid.segment.filter.SearchQueryFilter; @@ -33,10 +34,12 @@ public class SearchQueryDimFilter implements DimFilter { private final String dimension; private final SearchQuerySpec query; + private final ExtractionFn extractionFn; public SearchQueryDimFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("query") SearchQuerySpec query + @JsonProperty("query") SearchQuerySpec query, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); @@ -44,6 +47,7 @@ public SearchQueryDimFilter( this.dimension = dimension; this.query = query; + this.extractionFn = extractionFn; } @JsonProperty @@ -58,17 +62,26 @@ public SearchQuerySpec getQuery() return query; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + @Override public byte[] getCacheKey() { final byte[] dimensionBytes = StringUtils.toUtf8(dimension); final byte[] queryBytes = query.getCacheKey(); + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); - return ByteBuffer.allocate(2 + dimensionBytes.length + queryBytes.length) + return ByteBuffer.allocate(3 + dimensionBytes.length + queryBytes.length + extractionFnBytes.length) .put(DimFilterCacheHelper.SEARCH_QUERY_TYPE_ID) .put(dimensionBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR) .put(queryBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes) .array(); } @@ -81,7 +94,7 @@ public DimFilter optimize() @Override public Filter toFilter() { - return new SearchQueryFilter(dimension, query); + return new SearchQueryFilter(dimension, query, extractionFn); } @Override @@ -90,6 +103,7 @@ public String toString() return "SearchQueryDimFilter{" + "dimension='" + dimension + '\'' + ", query=" + query + + ", extractionFn='" + extractionFn + '\'' + '}'; } } diff --git a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java index 186cbe88da28..6355ee60df9b 100644 --- a/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java +++ b/processing/src/main/java/io/druid/query/filter/SelectorDimFilter.java @@ -21,11 +21,21 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.metamx.common.StringUtils; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; import io.druid.segment.filter.SelectorFilter; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; /** */ @@ -33,17 +43,20 @@ public class SelectorDimFilter implements DimFilter { private final String dimension; private final String value; + private final ExtractionFn extractionFn; @JsonCreator public SelectorDimFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("value") String value + @JsonProperty("value") String value, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { Preconditions.checkArgument(dimension != null, "dimension must not be null"); this.dimension = dimension; this.value = value; + this.extractionFn = extractionFn; } @Override @@ -51,25 +64,62 @@ public byte[] getCacheKey() { byte[] dimensionBytes = StringUtils.toUtf8(dimension); byte[] valueBytes = (value == null) ? new byte[]{} : StringUtils.toUtf8(value); + byte[] extractionFnBytes = extractionFn == null ? new byte[0] : extractionFn.getCacheKey(); - return ByteBuffer.allocate(2 + dimensionBytes.length + valueBytes.length) + return ByteBuffer.allocate(3 + dimensionBytes.length + valueBytes.length + extractionFnBytes.length) .put(DimFilterCacheHelper.SELECTOR_CACHE_ID) .put(dimensionBytes) .put(DimFilterCacheHelper.STRING_SEPARATOR) .put(valueBytes) + .put(DimFilterCacheHelper.STRING_SEPARATOR) + .put(extractionFnBytes) .array(); } @Override public DimFilter optimize() { + if (this.getExtractionFn() instanceof LookupExtractionFn + && ((LookupExtractionFn) this.getExtractionFn()).isOptimize()) { + LookupExtractionFn exFn = (LookupExtractionFn) this.getExtractionFn(); + LookupExtractor lookup = exFn.getLookup(); + + final String convertedValue = Strings.emptyToNull(value); + + // We cannot do an unapply()-based optimization if the selector value + // and the replaceMissingValuesWith value are the same, since we have to match on + // all values that are not present in the lookup. + if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) { + return this; + } + + final String mappingForValue = lookup.apply(convertedValue); + final List keys = new ArrayList<>(); + keys.addAll(lookup.unapply(convertedValue)); + + // If retainMissingValues is true and the selector value is not in the lookup map, + // there may be row values that match the selector value but are not included + // in the lookup map. Match on the selector value as well. + // If the selector value is overwritten in the lookup map, don't add selector value to keys. + if (exFn.isRetainMissingValue() && mappingForValue == null) { + keys.add(convertedValue); + } + + if (keys.isEmpty()) { + return this; + } else if (keys.size() == 1) { + return new SelectorDimFilter(dimension, keys.get(0), null); + } else { + return new InDimFilter(dimension, keys, null); + } + } return this; } @Override public Filter toFilter() { - return new SelectorFilter(dimension, value); + return new SelectorFilter(dimension, value, extractionFn); } @JsonProperty @@ -84,6 +134,22 @@ public String getValue() return value; } + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; + } + + @Override + public String toString() + { + if (extractionFn != null) { + return String.format("%s(%s) = %s", extractionFn, dimension, value); + } else { + return String.format("%s = %s", dimension, value); + } + } + @Override public boolean equals(Object o) { @@ -96,27 +162,21 @@ public boolean equals(Object o) SelectorDimFilter that = (SelectorDimFilter) o; - if (dimension != null ? !dimension.equals(that.dimension) : that.dimension != null) { + if (!dimension.equals(that.dimension)) { return false; } if (value != null ? !value.equals(that.value) : that.value != null) { return false; } - - return true; + return extractionFn != null ? extractionFn.equals(that.extractionFn) : that.extractionFn == null; } @Override public int hashCode() { - int result = dimension != null ? dimension.hashCode() : 0; + int result = dimension.hashCode(); result = 31 * result + (value != null ? value.hashCode() : 0); + result = 31 * result + (extractionFn != null ? extractionFn.hashCode() : 0); return result; } - - @Override - public String toString() - { - return String.format("%s = %s", dimension, value); - } } diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java b/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java index 525a2b92acc6..5631a29730e6 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryBuilder.java @@ -246,13 +246,13 @@ public TopNQueryBuilder intervals(List l) public TopNQueryBuilder filters(String dimensionName, String value) { - dimFilter = new SelectorDimFilter(dimensionName, value); + dimFilter = new SelectorDimFilter(dimensionName, value, null); return this; } public TopNQueryBuilder filters(String dimensionName, String value, String... values) { - dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values)); + dimFilter = new InDimFilter(dimensionName, Lists.asList(value, values), null); return this; } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index f990f451a6aa..fbbaf27dc977 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -424,8 +424,8 @@ public String lookupName(int id) { final String value = column.lookupName(id); return extractionFn == null ? - Strings.nullToEmpty(value) : - extractionFn.apply(Strings.nullToEmpty(value)); + value : + extractionFn.apply(value); } @Override diff --git a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java index a2157d6bec53..b70093b037c6 100644 --- a/processing/src/main/java/io/druid/segment/filter/BoundFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/BoundFilter.java @@ -21,6 +21,7 @@ import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.Filter; @@ -37,6 +38,7 @@ public class BoundFilter implements Filter { private final BoundDimFilter boundDimFilter; private final Comparator comparator; + private final ExtractionFn extractionFn; public BoundFilter(final BoundDimFilter boundDimFilter) { @@ -44,6 +46,7 @@ public BoundFilter(final BoundDimFilter boundDimFilter) this.comparator = boundDimFilter.isAlphaNumeric() ? StringComparators.ALPHANUMERIC : StringComparators.LEXICOGRAPHIC; + this.extractionFn = boundDimFilter.getExtractionFn(); } @Override @@ -60,7 +63,7 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) } } - if (boundDimFilter.isAlphaNumeric()) { + if (boundDimFilter.isAlphaNumeric() || extractionFn != null) { // inspect all values // will be non-null because bitmapIndex was non-null @@ -185,6 +188,10 @@ public boolean apply(String input) private boolean doesMatch(String input) { + if (extractionFn != null) { + input = extractionFn.apply(input); + } + if (input == null) { return (!boundDimFilter.hasLowerBound() || (boundDimFilter.getLower().isEmpty() && !boundDimFilter.isLowerStrict())) // lower bound allows null diff --git a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java index 72d48af16a38..9c087ecfc2b3 100644 --- a/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/DimensionPredicateFilter.java @@ -23,6 +23,7 @@ import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.common.guava.FunctionalIterable; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; @@ -37,27 +38,54 @@ class DimensionPredicateFilter implements Filter { private final String dimension; private final Predicate predicate; + private final ExtractionFn extractionFn; public DimensionPredicateFilter( String dimension, - Predicate predicate + Predicate predicate, + ExtractionFn extractionFn ) { this.dimension = dimension; this.predicate = predicate; + this.extractionFn = extractionFn; } @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - Indexed dimValues = selector.getDimensionValues(dimension); - if (dimValues == null || dimValues.size() == 0 || predicate == null) { + if (predicate == null) { return selector.getBitmapFactory().makeEmptyImmutableBitmap(); } + Indexed dimValues = selector.getDimensionValues(dimension); + if (dimValues == null || dimValues.size() == 0) { + boolean needsComplement = predicate.apply(extractionFn == null ? null : extractionFn.apply(null)); + if (needsComplement) { + return selector.getBitmapFactory().complement( + selector.getBitmapFactory().makeEmptyImmutableBitmap(), + selector.getNumRows() + ); + } else { + return selector.getBitmapFactory().makeEmptyImmutableBitmap(); + } + + } return selector.getBitmapFactory().union( FunctionalIterable.create(dimValues) - .filter(predicate) + .filter( + extractionFn == null ? + predicate + : + new Predicate() + { + @Override + public boolean apply(@Nullable String input) + { + return predicate.apply(extractionFn.apply(input)); + } + } + ) .transform( new Function() { @@ -74,6 +102,18 @@ public ImmutableBitmap apply(@Nullable String input) @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher(dimension, predicate); + if (extractionFn == null) { + return factory.makeValueMatcher(dimension, predicate); + } else { + Predicate extractingPredicate = new Predicate() + { + @Override + public boolean apply(@Nullable Object input) + { + return predicate.apply(extractionFn.apply(input)); + } + }; + return factory.makeValueMatcher(dimension, extractingPredicate); + } } } diff --git a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java b/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java deleted file mode 100644 index 144b56326b63..000000000000 --- a/processing/src/main/java/io/druid/segment/filter/ExtractionFilter.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.segment.filter; - -import com.google.common.base.Predicate; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.metamx.collections.bitmap.ImmutableBitmap; -import io.druid.query.extraction.ExtractionFn; -import io.druid.query.filter.BitmapIndexSelector; -import io.druid.query.filter.Filter; -import io.druid.query.filter.ValueMatcher; -import io.druid.query.filter.ValueMatcherFactory; -import io.druid.segment.data.Indexed; - -import java.util.Iterator; -import java.util.List; - -/** - */ -public class ExtractionFilter implements Filter -{ - private final String dimension; - private final String value; - private final ExtractionFn fn; - - public ExtractionFilter(String dimension, String value, ExtractionFn fn) - { - this.dimension = dimension; - this.value = Strings.nullToEmpty(value); - this.fn = fn; - } - - private List makeFilters(BitmapIndexSelector selector) - { - Indexed allDimVals = selector.getDimensionValues(dimension); - final List filters = Lists.newArrayList(); - if (allDimVals == null) { - allDimVals = new Indexed() - { - @Override - public Iterator iterator() - { - return null; - } - - @Override - public Class getClazz() - { - return null; - } - - @Override - public int size() { return 1; } - - @Override - public String get(int index) { return null;} - - @Override - public int indexOf(String value) - { - return 0; - } - }; - } - - for (int i = 0; i < allDimVals.size(); i++) { - String dimVal = allDimVals.get(i); - if (value.equals(Strings.nullToEmpty(fn.apply(dimVal)))) { - filters.add(new SelectorFilter(dimension, dimVal)); - } - } - - return filters; - } - - @Override - public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) - { - final List filters = makeFilters(selector); - if (filters.isEmpty()) { - return selector.getBitmapFactory().makeEmptyImmutableBitmap(); - } - return new OrFilter(makeFilters(selector)).getBitmapIndex(selector); - } - - @Override - public ValueMatcher makeMatcher(ValueMatcherFactory factory) - { - return factory.makeValueMatcher( - dimension, new Predicate() - { - @Override - public boolean apply(String input) - { - // Assuming that a null/absent/empty dimension are equivalent from the druid perspective - return value.equals(Strings.nullToEmpty(fn.apply(Strings.emptyToNull(input)))); - } - } - ); - } -} diff --git a/processing/src/main/java/io/druid/segment/filter/InFilter.java b/processing/src/main/java/io/druid/segment/filter/InFilter.java index e637cbae8260..fa96bdb1480b 100644 --- a/processing/src/main/java/io/druid/segment/filter/InFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/InFilter.java @@ -23,14 +23,18 @@ import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.ImmutableBitmap; +import com.metamx.collections.bitmap.MutableBitmap; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; import javax.annotation.Nullable; +import java.util.List; import java.util.Set; /** @@ -39,28 +43,46 @@ public class InFilter implements Filter { private final String dimension; private final Set values; + private final ExtractionFn extractionFn; - public InFilter(String dimension, Set values) + public InFilter(String dimension, Set values, ExtractionFn extractionFn) { this.dimension = dimension; this.values = values; + this.extractionFn = extractionFn; } @Override public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) { - return selector.getBitmapFactory().union( - Iterables.transform( - values, new Function() - { - @Override - public ImmutableBitmap apply(String value) + if (extractionFn == null) { + return selector.getBitmapFactory().union( + Iterables.transform( + values, new Function() { - return selector.getBitmapIndex(dimension, value); + @Override + public ImmutableBitmap apply(String value) + { + return selector.getBitmapIndex(dimension, value); + } } - } - ) - ); + ) + ); + } else { + Iterable allDimVals = selector.getDimensionValues(dimension); + if (allDimVals == null) { + allDimVals = Lists.newArrayList((String) null); + } + + List bitmaps = Lists.newArrayList(); + for (String dimVal : allDimVals) { + System.out.println(dimVal); + if (values.contains(Strings.nullToEmpty(extractionFn.apply(dimVal)))) { + bitmaps.add(selector.getBitmapIndex(dimension, dimVal)); + } + } + return selector.getBitmapFactory().union(bitmaps); + } } @Override @@ -72,6 +94,9 @@ public ValueMatcher makeMatcher(ValueMatcherFactory factory) @Override public boolean apply(String input) { + if (extractionFn != null) { + input = extractionFn.apply(input); + } return values.contains(Strings.nullToEmpty(input)); } } diff --git a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java index 0703bc71d13c..f4d16b829f63 100644 --- a/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/JavaScriptFilter.java @@ -23,6 +23,7 @@ import com.google.common.base.Predicate; import com.metamx.collections.bitmap.ImmutableBitmap; import com.metamx.common.guava.FunctionalIterable; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; @@ -38,11 +39,13 @@ public class JavaScriptFilter implements Filter { private final JavaScriptPredicate predicate; private final String dimension; + private final ExtractionFn extractionFn; - public JavaScriptFilter(String dimension, final String script) + public JavaScriptFilter(String dimension, final String script, ExtractionFn extractionFn) { this.dimension = dimension; - this.predicate = new JavaScriptPredicate(script); + this.predicate = new JavaScriptPredicate(script, extractionFn); + this.extractionFn = extractionFn; } @Override @@ -52,8 +55,11 @@ public ImmutableBitmap getBitmapIndex(final BitmapIndexSelector selector) try { final Indexed dimValues = selector.getDimensionValues(dimension); ImmutableBitmap bitmap; - if (dimValues == null) { + if (dimValues == null || dimValues.size() == 0) { bitmap = selector.getBitmapFactory().makeEmptyImmutableBitmap(); + if (predicate.applyInContext(cx, null)) { + bitmap = selector.getBitmapFactory().complement(bitmap, selector.getNumRows()); + } } else { bitmap = selector.getBitmapFactory().union( FunctionalIterable.create(dimValues) @@ -98,11 +104,13 @@ static class JavaScriptPredicate implements Predicate final ScriptableObject scope; final Function fnApply; final String script; + final ExtractionFn extractionFn; - public JavaScriptPredicate(final String script) + public JavaScriptPredicate(final String script, final ExtractionFn extractionFn) { Preconditions.checkNotNull(script, "script must not be null"); this.script = script; + this.extractionFn = extractionFn; final Context cx = Context.enter(); try { @@ -127,11 +135,13 @@ public boolean apply(final String input) finally { Context.exit(); } - } public boolean applyInContext(Context cx, String input) { + if (extractionFn != null) { + input = extractionFn.apply(input); + } return Context.toBoolean(fnApply.call(cx, scope, scope, new String[]{input})); } diff --git a/processing/src/main/java/io/druid/segment/filter/RegexFilter.java b/processing/src/main/java/io/druid/segment/filter/RegexFilter.java index 4c1960dad573..02a267e05977 100644 --- a/processing/src/main/java/io/druid/segment/filter/RegexFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/RegexFilter.java @@ -20,6 +20,7 @@ package io.druid.segment.filter; import com.google.common.base.Predicate; +import io.druid.query.extraction.ExtractionFn; import java.util.regex.Pattern; @@ -29,7 +30,8 @@ public class RegexFilter extends DimensionPredicateFilter { public RegexFilter( String dimension, - final String pattern + final String pattern, + final ExtractionFn extractionFn ) { super( @@ -43,7 +45,8 @@ public boolean apply(String input) { return (input != null) && compiled.matcher(input).find(); } - } + }, + extractionFn ); } } diff --git a/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java b/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java index fbd8b1a2ed56..208d5729d186 100644 --- a/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SearchQueryFilter.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Predicate; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.search.search.SearchQuerySpec; import javax.annotation.Nullable; @@ -33,7 +34,8 @@ public class SearchQueryFilter extends DimensionPredicateFilter @JsonCreator public SearchQueryFilter( @JsonProperty("dimension") String dimension, - @JsonProperty("query") final SearchQuerySpec query + @JsonProperty("query") final SearchQuerySpec query, + @JsonProperty("extractionFn") final ExtractionFn extractionFn ) { super( @@ -45,7 +47,8 @@ public boolean apply(@Nullable String input) { return query.accept(input); } - } + }, + extractionFn ); } } diff --git a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java index 604dce9a45dd..444f6bbb4968 100644 --- a/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java +++ b/processing/src/main/java/io/druid/segment/filter/SelectorFilter.java @@ -19,11 +19,20 @@ package io.druid.segment.filter; +import com.google.common.base.Predicate; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; import com.metamx.collections.bitmap.ImmutableBitmap; +import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.BitmapIndexSelector; import io.druid.query.filter.Filter; import io.druid.query.filter.ValueMatcher; import io.druid.query.filter.ValueMatcherFactory; +import io.druid.segment.data.Indexed; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; /** */ @@ -31,25 +40,68 @@ public class SelectorFilter implements Filter { private final String dimension; private final String value; + private final ExtractionFn extractionFn; public SelectorFilter( String dimension, - String value + String value, + ExtractionFn extractionFn ) { this.dimension = dimension; - this.value = value; + this.value = Strings.nullToEmpty(value); + this.extractionFn = extractionFn; } @Override public ImmutableBitmap getBitmapIndex(BitmapIndexSelector selector) { - return selector.getBitmapIndex(dimension, value); + if (extractionFn == null) { + return selector.getBitmapIndex(dimension, value); + } else { + final List filters = makeFiltersUsingExtractionFn(selector); + if (filters.isEmpty()) { + return selector.getBitmapFactory().makeEmptyImmutableBitmap(); + } + return new OrFilter(filters).getBitmapIndex(selector); + } } @Override public ValueMatcher makeMatcher(ValueMatcherFactory factory) { - return factory.makeValueMatcher(dimension, value); + if (extractionFn == null) { + return factory.makeValueMatcher(dimension, value); + } else { + return factory.makeValueMatcher( + dimension, new Predicate() + { + @Override + public boolean apply(String input) + { + // Assuming that a null/absent/empty dimension are equivalent from the druid perspective + return value.equals(Strings.nullToEmpty(extractionFn.apply(input))); + } + } + ); + } + } + + private List makeFiltersUsingExtractionFn(BitmapIndexSelector selector) + { + final List filters = Lists.newArrayList(); + + Iterable allDimVals = selector.getDimensionValues(dimension); + if (allDimVals == null) { + allDimVals = Lists.newArrayList((String) null); + } + + for (String dimVal : allDimVals) { + if (value.equals(Strings.nullToEmpty(extractionFn.apply(dimVal)))) { + filters.add(new SelectorFilter(dimension, dimVal, null)); + } + } + + return filters; } } diff --git a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java index c0ee49d3e195..7f7a337f7f7b 100644 --- a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java +++ b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java @@ -196,7 +196,7 @@ public void testGroupByWithDimFilter() throws Exception ) ) .setDimFilter( - new SelectorDimFilter("tags", "t3") + new SelectorDimFilter("tags", "t3", null) ) .build(); @@ -244,7 +244,7 @@ public void testGroupByWithDimFilterAndWithFilteredDimSpec() throws Exception ) ) .setDimFilter( - new SelectorDimFilter("tags", "t3") + new SelectorDimFilter("tags", "t3", null) ) .build(); @@ -284,7 +284,7 @@ public void testTopNWithDimFilterAndWithFilteredDimSpec() throws Exception } )) .threshold(5) - .filters(new SelectorDimFilter("tags", "t3")).build(); + .filters(new SelectorDimFilter("tags", "t3", null)).build(); QueryRunnerFactory factory = new TopNQueryRunnerFactory( TestQueryRunners.getPool(), diff --git a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java index 7f41e2be25c4..31bfb2cc11b8 100644 --- a/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java +++ b/processing/src/test/java/io/druid/query/aggregation/FilteredAggregatorTest.java @@ -22,11 +22,18 @@ import com.google.common.collect.Lists; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.JavaScriptDimFilter; import io.druid.query.filter.NotDimFilter; import io.druid.query.filter.OrDimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.search.search.ContainsSearchQuerySpec; import io.druid.segment.ColumnSelectorFactory; import io.druid.segment.DimensionSelector; import io.druid.segment.FloatColumnSelector; @@ -37,6 +44,8 @@ import org.junit.Assert; import org.junit.Test; +import java.util.Arrays; + public class FilteredAggregatorTest { private void aggregate(TestFloatColumnSelector selector, FilteredAggregator agg) @@ -53,7 +62,7 @@ public void testAggregate() FilteredAggregatorFactory factory = new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("billy", "value"), - new SelectorDimFilter("dim", "a") + new SelectorDimFilter("dim", "a", null) ); FilteredAggregator agg = (FilteredAggregator) factory.factorize( @@ -175,19 +184,10 @@ public void testAggregateWithNotFilter() FilteredAggregatorFactory factory = new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("billy", "value"), - new NotDimFilter(new SelectorDimFilter("dim", "b")) + new NotDimFilter(new SelectorDimFilter("dim", "b", null)) ); - FilteredAggregator agg = (FilteredAggregator) factory.factorize( - makeColumnSelector(selector) - ); - - Assert.assertEquals("billy", agg.getName()); - - double expectedFirst = new Float(values[0]).doubleValue(); - double expectedSecond = new Float(values[1]).doubleValue() + expectedFirst; - double expectedThird = expectedSecond; - assertValues(agg, selector, expectedFirst, expectedSecond, expectedThird); + validateFilteredAggs(factory, values, selector); } @Test @@ -198,7 +198,7 @@ public void testAggregateWithOrFilter() FilteredAggregatorFactory factory = new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("billy", "value"), - new OrDimFilter(Lists.newArrayList(new SelectorDimFilter("dim", "a"), new SelectorDimFilter("dim", "b"))) + new OrDimFilter(Lists.newArrayList(new SelectorDimFilter("dim", "a", null), new SelectorDimFilter("dim", "b", null))) ); FilteredAggregator agg = (FilteredAggregator) factory.factorize( @@ -221,8 +221,108 @@ public void testAggregateWithAndFilter() FilteredAggregatorFactory factory = new FilteredAggregatorFactory( new DoubleSumAggregatorFactory("billy", "value"), - new AndDimFilter(Lists.newArrayList(new NotDimFilter(new SelectorDimFilter("dim", "b")), new SelectorDimFilter("dim", "a")))); + new AndDimFilter(Lists.newArrayList(new NotDimFilter(new SelectorDimFilter("dim", "b", null)), new SelectorDimFilter("dim", "a", null)))); + + validateFilteredAggs(factory, values, selector); + } + + @Test + public void testAggregateWithPredicateFilters() + { + final float[] values = {0.15f, 0.27f}; + TestFloatColumnSelector selector; + FilteredAggregatorFactory factory; + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new BoundDimFilter("dim", "a", "a", false, false, true, null) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new RegexDimFilter("dim", "a", null) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new SearchQueryDimFilter("dim", new ContainsSearchQuerySpec("a", true), null) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + String jsFn = "function(x) { return(x === 'a') }"; + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new JavaScriptDimFilter("dim", jsFn, null) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + } + + @Test + public void testAggregateWithExtractionFns() + { + final float[] values = {0.15f, 0.27f}; + TestFloatColumnSelector selector; + FilteredAggregatorFactory factory; + + String extractionJsFn = "function(str) { return str + 'AARDVARK'; }"; + ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new SelectorDimFilter("dim", "aAARDVARK", extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new InDimFilter("dim", Arrays.asList("NOT-aAARDVARK", "FOOBAR", "aAARDVARK"), extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new BoundDimFilter("dim", "aAARDVARK", "aAARDVARK", false, false, true, extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new RegexDimFilter("dim", "aAARDVARK", extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new SearchQueryDimFilter("dim", new ContainsSearchQuerySpec("aAARDVARK", true), extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + String jsFn = "function(x) { return(x === 'aAARDVARK') }"; + factory = new FilteredAggregatorFactory( + new DoubleSumAggregatorFactory("billy", "value"), + new JavaScriptDimFilter("dim", jsFn, extractionFn) + ); + selector = new TestFloatColumnSelector(values); + validateFilteredAggs(factory, values, selector); + } + + private void validateFilteredAggs( + FilteredAggregatorFactory factory, + float[] values, + TestFloatColumnSelector selector + ) + { FilteredAggregator agg = (FilteredAggregator) factory.factorize( makeColumnSelector(selector) ); @@ -232,7 +332,7 @@ public void testAggregateWithAndFilter() double expectedFirst = new Float(values[0]).doubleValue(); double expectedSecond = new Float(values[1]).doubleValue() + expectedFirst; double expectedThird = expectedSecond; + assertValues(agg, selector, expectedFirst, expectedSecond, expectedThird); } - } diff --git a/processing/src/test/java/io/druid/query/filter/BoundDimFilterTest.java b/processing/src/test/java/io/druid/query/filter/BoundDimFilterTest.java index 8b427d2aa12d..1f99d084fb48 100644 --- a/processing/src/test/java/io/druid/query/filter/BoundDimFilterTest.java +++ b/processing/src/test/java/io/druid/query/filter/BoundDimFilterTest.java @@ -27,6 +27,8 @@ import com.google.inject.Key; import io.druid.guice.GuiceInjectors; import io.druid.guice.annotations.Json; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -42,17 +44,22 @@ public class BoundDimFilterTest private final BoundDimFilter boundDimFilter; + private static final ExtractionFn extractionFn = new RegexDimExtractionFn(".*", false, null); + @Parameterized.Parameters public static Iterable constructorFeeder(){ - return ImmutableList.of(new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, null)}, - new Object[]{new BoundDimFilter("dimension", "12", "15", null, true, false)}, - new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, true)}, - new Object[]{new BoundDimFilter("dimension", null, "15", null, true, true)}, - new Object[]{new BoundDimFilter("dimension", "12", "15", true, null, null)}, - new Object[]{new BoundDimFilter("dimension", "12", null, true, null, true)}, - new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, true)}, - new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, false)}); + return ImmutableList.of( + new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, null, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", null, true, false, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", null, null, true, null)}, + new Object[]{new BoundDimFilter("dimension", null, "15", null, true, true, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, null, null, null)}, + new Object[]{new BoundDimFilter("dimension", "12", null, true, null, true, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, true, null)}, + new Object[]{new BoundDimFilter("dimension", "12", "15", true, true, false, null)}, + new Object[]{new BoundDimFilter("dimension", null, "15", null, true, true, extractionFn)} + ); } @Test @@ -68,10 +75,24 @@ public void testSerDesBoundFilter() throws IOException @Test public void testGetCacheKey() { - BoundDimFilter boundDimFilter = new BoundDimFilter("dimension", "12", "15", null, null, true); - BoundDimFilter boundDimFilterCopy = new BoundDimFilter("dimension", "12", "15", false, false, true); + BoundDimFilter boundDimFilter = new BoundDimFilter("dimension", "12", "15", null, null, true, null); + BoundDimFilter boundDimFilterCopy = new BoundDimFilter("dimension", "12", "15", false, false, true, null); Assert.assertArrayEquals(boundDimFilter.getCacheKey(), boundDimFilterCopy.getCacheKey()); - BoundDimFilter anotherBoundDimFilter = new BoundDimFilter("dimension", "12", "15", true, null, false); + BoundDimFilter anotherBoundDimFilter = new BoundDimFilter("dimension", "12", "15", true, null, false, null); Assert.assertFalse(Arrays.equals(anotherBoundDimFilter.getCacheKey(), boundDimFilter.getCacheKey())); + + BoundDimFilter boundDimFilterWithExtract = new BoundDimFilter("dimension", "12", "15", null, null, true, extractionFn); + BoundDimFilter boundDimFilterWithExtractCopy = new BoundDimFilter("dimension", "12", "15", false, false, true, extractionFn); + Assert.assertFalse(Arrays.equals(boundDimFilter.getCacheKey(), boundDimFilterWithExtract.getCacheKey())); + Assert.assertArrayEquals(boundDimFilterWithExtract.getCacheKey(), boundDimFilterWithExtractCopy.getCacheKey()); + } + + @Test + public void testHashCode() + { + BoundDimFilter boundDimFilter = new BoundDimFilter("dimension", "12", "15", null, null, true, null); + BoundDimFilter boundDimFilterWithExtract = new BoundDimFilter("dimension", "12", "15", null, null, true, extractionFn); + + Assert.assertNotEquals(boundDimFilter.hashCode(), boundDimFilterWithExtract.hashCode()); } } diff --git a/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java b/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java index 80e9dc6d7fcb..7e7539fe73d7 100644 --- a/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java +++ b/processing/src/test/java/io/druid/query/filter/InDimFilterSerDesrTest.java @@ -24,6 +24,7 @@ import com.google.inject.Key; import io.druid.guice.GuiceInjectors; import io.druid.guice.annotations.Json; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -35,7 +36,7 @@ public class InDimFilterSerDesrTest { private static ObjectMapper mapper; - private final String actualInFilter = "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"good\",\"bad\"]}"; + private final String actualInFilter = "{\"type\":\"in\",\"dimension\":\"dimTest\",\"values\":[\"good\",\"bad\"],\"extractionFn\":null}"; @Before public void setUp() { @@ -47,14 +48,14 @@ public void setUp() public void testDeserialization() throws IOException { final InDimFilter actualInDimFilter = mapper.reader(DimFilter.class).readValue(actualInFilter); - final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad")); + final InDimFilter expectedInDimFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); Assert.assertEquals(expectedInDimFilter, actualInDimFilter); } @Test public void testSerialization() throws IOException { - final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad")); + final InDimFilter dimInFilter = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); final String expectedInFilter = mapper.writeValueAsString(dimInFilter); Assert.assertEquals(expectedInFilter, actualInFilter); } @@ -62,9 +63,14 @@ public void testSerialization() throws IOException @Test public void testGetCacheKey() { - final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad")); - final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Arrays.asList("good,bad")); + final InDimFilter inDimFilter_1 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), null); + final InDimFilter inDimFilter_2 = new InDimFilter("dimTest", Arrays.asList("good,bad"), null); Assert.assertNotEquals(inDimFilter_1.getCacheKey(), inDimFilter_2.getCacheKey()); + + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + final InDimFilter inDimFilter_3 = new InDimFilter("dimTest", Arrays.asList("good", "bad"), regexFn); + final InDimFilter inDimFilter_4 = new InDimFilter("dimTest", Arrays.asList("good,bad"), regexFn); + Assert.assertNotEquals(inDimFilter_3.getCacheKey(), inDimFilter_4.getCacheKey()); } @Test diff --git a/processing/src/test/java/io/druid/query/filter/JavaScriptDimFilterTest.java b/processing/src/test/java/io/druid/query/filter/JavaScriptDimFilterTest.java index 3e8b37268743..dd3342f623b1 100644 --- a/processing/src/test/java/io/druid/query/filter/JavaScriptDimFilterTest.java +++ b/processing/src/test/java/io/druid/query/filter/JavaScriptDimFilterTest.java @@ -19,6 +19,7 @@ package io.druid.query.filter; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -30,8 +31,12 @@ public class JavaScriptDimFilterTest @Test public void testGetCacheKey() { - JavaScriptDimFilter javaScriptDimFilter = new JavaScriptDimFilter("dim", "fn"); - JavaScriptDimFilter javaScriptDimFilter2 = new JavaScriptDimFilter("di", "mfn"); + JavaScriptDimFilter javaScriptDimFilter = new JavaScriptDimFilter("dim", "fn", null); + JavaScriptDimFilter javaScriptDimFilter2 = new JavaScriptDimFilter("di", "mfn", null); Assert.assertFalse(Arrays.equals(javaScriptDimFilter.getCacheKey(), javaScriptDimFilter2.getCacheKey())); + + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + JavaScriptDimFilter javaScriptDimFilter3 = new JavaScriptDimFilter("dim", "fn", regexFn); + Assert.assertFalse(Arrays.equals(javaScriptDimFilter.getCacheKey(), javaScriptDimFilter3.getCacheKey())); } } diff --git a/processing/src/test/java/io/druid/query/filter/RegexDimFilterTest.java b/processing/src/test/java/io/druid/query/filter/RegexDimFilterTest.java index f137f469f53d..5e0f719f8208 100644 --- a/processing/src/test/java/io/druid/query/filter/RegexDimFilterTest.java +++ b/processing/src/test/java/io/druid/query/filter/RegexDimFilterTest.java @@ -19,6 +19,7 @@ package io.druid.query.filter; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -30,8 +31,13 @@ public class RegexDimFilterTest @Test public void testGetCacheKey() { - RegexDimFilter regexDimFilter = new RegexDimFilter("dim", "reg"); - RegexDimFilter regexDimFilter2 = new RegexDimFilter("di", "mreg"); + RegexDimFilter regexDimFilter = new RegexDimFilter("dim", "reg", null); + RegexDimFilter regexDimFilter2 = new RegexDimFilter("di", "mreg", null); Assert.assertFalse(Arrays.equals(regexDimFilter.getCacheKey(), regexDimFilter2.getCacheKey())); + + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + RegexDimFilter regexDimFilter3 = new RegexDimFilter("dim", "reg", regexFn); + Assert.assertFalse(Arrays.equals(regexDimFilter.getCacheKey(), regexDimFilter3.getCacheKey())); + } } diff --git a/processing/src/test/java/io/druid/query/filter/SearchQueryDimFilterTest.java b/processing/src/test/java/io/druid/query/filter/SearchQueryDimFilterTest.java index e1799f62942b..f90880914c10 100644 --- a/processing/src/test/java/io/druid/query/filter/SearchQueryDimFilterTest.java +++ b/processing/src/test/java/io/druid/query/filter/SearchQueryDimFilterTest.java @@ -20,6 +20,7 @@ package io.druid.query.filter; import com.metamx.common.StringUtils; +import io.druid.query.extraction.RegexDimExtractionFn; import io.druid.query.search.search.SearchQuerySpec; import org.junit.Assert; import org.junit.Test; @@ -32,35 +33,64 @@ public class SearchQueryDimFilterTest @Test public void testGetCacheKey() { - SearchQueryDimFilter searchQueryDimFilter = new SearchQueryDimFilter("dim", new SearchQuerySpec() - { - @Override - public boolean accept(String dimVal) - { - return false; - } + SearchQueryDimFilter searchQueryDimFilter = new SearchQueryDimFilter( + "dim", + new SearchQuerySpec() + { + @Override + public boolean accept(String dimVal) + { + return false; + } - @Override - public byte[] getCacheKey() - { - return StringUtils.toUtf8("value"); - } - }); + @Override + public byte[] getCacheKey() + { + return StringUtils.toUtf8("value"); + } + }, + null + ); - SearchQueryDimFilter searchQueryDimFilter2 = new SearchQueryDimFilter("di", new SearchQuerySpec() - { - @Override - public boolean accept(String dimVal) - { - return false; - } + SearchQueryDimFilter searchQueryDimFilter2 = new SearchQueryDimFilter( + "di", + new SearchQuerySpec() + { + @Override + public boolean accept(String dimVal) + { + return false; + } - @Override - public byte[] getCacheKey() - { - return StringUtils.toUtf8("mvalue"); - } - }); + @Override + public byte[] getCacheKey() + { + return StringUtils.toUtf8("mvalue"); + } + }, + null + ); Assert.assertFalse(Arrays.equals(searchQueryDimFilter.getCacheKey(), searchQueryDimFilter2.getCacheKey())); + + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + SearchQueryDimFilter searchQueryDimFilter3 = new SearchQueryDimFilter( + "dim", + new SearchQuerySpec() + { + @Override + public boolean accept(String dimVal) + { + return false; + } + + @Override + public byte[] getCacheKey() + { + return StringUtils.toUtf8("value"); + } + }, + regexFn + ); + Assert.assertFalse(Arrays.equals(searchQueryDimFilter.getCacheKey(), searchQueryDimFilter3.getCacheKey())); } } diff --git a/processing/src/test/java/io/druid/query/filter/SelectorDimFilterTest.java b/processing/src/test/java/io/druid/query/filter/SelectorDimFilterTest.java index 577297214dc4..d5e9a70fbad7 100644 --- a/processing/src/test/java/io/druid/query/filter/SelectorDimFilterTest.java +++ b/processing/src/test/java/io/druid/query/filter/SelectorDimFilterTest.java @@ -19,6 +19,7 @@ package io.druid.query.filter; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -29,15 +30,40 @@ public class SelectorDimFilterTest @Test public void testGetCacheKey() { - SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d"); - SelectorDimFilter selectorDimFilter2 = new SelectorDimFilter("ab", "cd"); + SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d", null); + SelectorDimFilter selectorDimFilter2 = new SelectorDimFilter("ab", "cd", null); Assert.assertFalse(Arrays.equals(selectorDimFilter.getCacheKey(), selectorDimFilter2.getCacheKey())); + + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + SelectorDimFilter selectorDimFilter3 = new SelectorDimFilter("abc", "d", regexFn); + Assert.assertFalse(Arrays.equals(selectorDimFilter.getCacheKey(), selectorDimFilter3.getCacheKey())); + } + + @Test + public void testToString() + { + SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d", null); + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + SelectorDimFilter selectorDimFilter2 = new SelectorDimFilter("abc", "d", regexFn); + + Assert.assertEquals("abc = d", selectorDimFilter.toString()); + Assert.assertEquals("regex(.*)(abc) = d", selectorDimFilter2.toString()); + } + + @Test + public void testHashCode() + { + SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d", null); + RegexDimExtractionFn regexFn = new RegexDimExtractionFn(".*", false, null); + SelectorDimFilter selectorDimFilter2 = new SelectorDimFilter("abc", "d", regexFn); + + Assert.assertNotEquals(selectorDimFilter.hashCode(), selectorDimFilter2.hashCode()); } @Test public void testSimpleOptimize() { - SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d"); + SelectorDimFilter selectorDimFilter = new SelectorDimFilter("abc", "d", null); DimFilter filter = new AndDimFilter( Arrays.asList( new OrDimFilter( diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 0dbdf6d99607..ea8911814ee4 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -64,6 +64,11 @@ import io.druid.query.dimension.DimensionSpec; import io.druid.query.dimension.ExtractionDimensionSpec; import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.filter.AndDimFilter; +import io.druid.query.filter.BoundDimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; import io.druid.query.lookup.LookupExtractionFn; import io.druid.query.extraction.MapLookupExtractor; import io.druid.query.extraction.RegexDimExtractionFn; @@ -82,6 +87,7 @@ import io.druid.query.groupby.orderby.LimitSpec; import io.druid.query.groupby.orderby.OrderByColumnSpec; import io.druid.query.ordering.StringComparators; +import io.druid.query.search.search.ContainsSearchQuerySpec; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.TestHelper; import io.druid.segment.column.Column; @@ -101,6 +107,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; @@ -2506,7 +2513,7 @@ public void testGroupByWithRegEx() throws Exception .builder() .setDataSource(QueryRunnerTestHelper.dataSource) .setInterval("2011-04-02/2011-04-04") - .setDimFilter(new RegexDimFilter("quality", "auto.*")) + .setDimFilter(new RegexDimFilter("quality", "auto.*", null)) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "quality"))) .setAggregatorSpecs( Arrays.asList( @@ -2715,7 +2722,7 @@ public void testIdenticalSubquery() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -2774,7 +2781,7 @@ public void testSubqueryWithMultipleIntervalsInOuterQuery() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -2842,7 +2849,7 @@ public void testSubqueryWithExtractionFnInOuterQuery() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -3012,15 +3019,15 @@ public void testDifferentGroupingSubqueryWithFilter() .setDimFilter( new OrDimFilter( Lists.newArrayList( - new SelectorDimFilter("quality", "automotive"), - new SelectorDimFilter("quality", "premium"), - new SelectorDimFilter("quality", "mezzanine"), - new SelectorDimFilter("quality", "business"), - new SelectorDimFilter("quality", "entertainment"), - new SelectorDimFilter("quality", "health"), - new SelectorDimFilter("quality", "news"), - new SelectorDimFilter("quality", "technology"), - new SelectorDimFilter("quality", "travel") + new SelectorDimFilter("quality", "automotive", null), + new SelectorDimFilter("quality", "premium", null), + new SelectorDimFilter("quality", "mezzanine", null), + new SelectorDimFilter("quality", "business", null), + new SelectorDimFilter("quality", "entertainment", null), + new SelectorDimFilter("quality", "health", null), + new SelectorDimFilter("quality", "news", null), + new SelectorDimFilter("quality", "technology", null), + new SelectorDimFilter("quality", "travel", null) ) ) ) @@ -3114,7 +3121,7 @@ public void testSubqueryWithPostAggregators() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -3375,7 +3382,7 @@ public void testSubqueryWithPostAggregatorsAndHaving() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -3633,7 +3640,7 @@ public void testSubqueryWithMultiColumnAggregators() .setDataSource(QueryRunnerTestHelper.dataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) - .setDimFilter(new JavaScriptDimFilter("market", "function(dim){ return true; }")) + .setDimFilter(new JavaScriptDimFilter("market", "function(dim){ return true; }", null)) .setAggregatorSpecs( Arrays.asList( QueryRunnerTestHelper.rowsCount, @@ -4054,8 +4061,8 @@ public void testGroupByTimeExtraction() .setDimFilter( new OrDimFilter( Arrays.asList( - new SelectorDimFilter("market", "spot"), - new SelectorDimFilter("market", "upfront") + new SelectorDimFilter("market", "spot", null), + new SelectorDimFilter("market", "upfront", null) ) ) ) @@ -4286,7 +4293,7 @@ public void testBySegmentResults() ) ) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) - .setDimFilter(new SelectorDimFilter("quality", "mezzanine")) + .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) .setContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -4362,7 +4369,7 @@ public void testBySegmentResultsUnOptimizedDimextraction() ) ) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) - .setDimFilter(new SelectorDimFilter("quality", "mezzanine")) + .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) .setContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -4437,7 +4444,7 @@ public void testBySegmentResultsOptimizedDimextraction() ) ) .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) - .setDimFilter(new SelectorDimFilter("quality", "mezzanine")) + .setDimFilter(new SelectorDimFilter("quality", "mezzanine", null)) .setContext(ImmutableMap.of("bySegment", true)); final GroupByQuery fullQuery = builder.build(); QueryToolChest toolChest = factory.getToolchest(); @@ -4474,11 +4481,11 @@ public void testGroupByWithExtractionDimFilter() List dimFilters = Lists.newArrayList( new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null), - new SelectorDimFilter("quality", "entertainment"), - new SelectorDimFilter("quality", "health"), - new SelectorDimFilter("quality", "premium"), - new SelectorDimFilter("quality", "technology"), - new SelectorDimFilter("quality", "travel") + new SelectorDimFilter("quality", "entertainment", null), + new SelectorDimFilter("quality", "health", null), + new SelectorDimFilter("quality", "premium", null), + new SelectorDimFilter("quality", "technology", null), + new SelectorDimFilter("quality", "travel", null) ); GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) @@ -4751,7 +4758,8 @@ public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() } - @Test public void testGroupByWithExtractionDimFilterNullDims() + @Test + public void testGroupByWithExtractionDimFilterNullDims() { Map extractionMap = new HashMap<>(); extractionMap.put("", "EMPTY"); @@ -4773,4 +4781,111 @@ public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); TestHelper.assertExpectedObjects(expectedResults, results, ""); } + + @Test + public void testBySegmentResultsWithAllFiltersWithExtractionFns() + { + int segmentCount = 32; + Result singleSegmentResult = new Result( + new DateTime("2011-01-12T00:00:00.000Z"), + new BySegmentResultValueClass( + Arrays.asList( + GroupByQueryRunnerTestHelper.createExpectedRow( + "2011-04-01", + "alias", + "mezzanine", + "rows", + 6L, + "idx", + 4420L + ) + ), "testSegment", new Interval("2011-04-02T00:00:00.000Z/2011-04-04T00:00:00.000Z") + ) + ); + List bySegmentResults = Lists.newArrayList(); + for (int i = 0; i < segmentCount; i++) { + bySegmentResults.add(singleSegmentResult); + } + + String extractionJsFn = "function(str) { return 'super-' + str; }"; + String jsFn = "function(x) { return(x === 'super-mezzanine') }"; + ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false); + + List superFilterList = new ArrayList<>(); + superFilterList.add(new SelectorDimFilter("quality", "super-mezzanine", extractionFn)); + superFilterList.add(new InDimFilter("quality", Arrays.asList("not-super-mezzanine", "FOOBAR", "super-mezzanine"), extractionFn)); + superFilterList.add(new BoundDimFilter("quality", "super-mezzanine", "super-mezzanine", false, false, true, extractionFn)); + superFilterList.add(new RegexDimFilter("quality", "super-mezzanine", extractionFn)); + superFilterList.add(new SearchQueryDimFilter("quality", new ContainsSearchQuerySpec("super-mezzanine", true), extractionFn)); + superFilterList.add(new JavaScriptDimFilter("quality", jsFn, extractionFn)); + DimFilter superFilter = new AndDimFilter(superFilterList); + + GroupByQuery.Builder builder = GroupByQuery + .builder() + .setDataSource(QueryRunnerTestHelper.dataSource) + .setInterval("2011-04-02/2011-04-04") + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("quality", "alias"))) + .setAggregatorSpecs( + Arrays.asList( + QueryRunnerTestHelper.rowsCount, + new LongSumAggregatorFactory("idx", "index") + ) + ) + .setGranularity(new PeriodGranularity(new Period("P1M"), null, null)) + .setDimFilter(superFilter) + .setContext(ImmutableMap.of("bySegment", true)); + final GroupByQuery fullQuery = builder.build(); + QueryToolChest toolChest = factory.getToolchest(); + + List> singleSegmentRunners = Lists.newArrayList(); + for (int i = 0; i < segmentCount; i++) { + singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner)); + } + ExecutorService exec = Executors.newCachedThreadPool(); + QueryRunner theRunner = toolChest.postMergeQueryDecoration( + new FinalizeResultsQueryRunner<>( + toolChest.mergeResults(factory.mergeRunners(Executors.newCachedThreadPool(), singleSegmentRunners)), + toolChest + ) + ); + + TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), ""); + exec.shutdownNow(); + } + + @Test + public void testGroupByWithAllFiltersOnNullDimsWithExtractionFns() + { + Map extractionMap = new HashMap<>(); + extractionMap.put("", "EMPTY"); + extractionMap.put(null, "EMPTY"); + + MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false); + LookupExtractionFn extractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true); + String jsFn = "function(x) { return(x === 'EMPTY') }"; + + List superFilterList = new ArrayList<>(); + superFilterList.add(new SelectorDimFilter("null_column", "EMPTY", extractionFn)); + superFilterList.add(new InDimFilter("null_column", Arrays.asList("NOT-EMPTY", "FOOBAR", "EMPTY"), extractionFn)); + superFilterList.add(new BoundDimFilter("null_column", "EMPTY", "EMPTY", false, false, true, extractionFn)); + superFilterList.add(new RegexDimFilter("null_column", "EMPTY", extractionFn)); + superFilterList.add(new SearchQueryDimFilter("null_column", new ContainsSearchQuerySpec("EMPTY", true), extractionFn)); + superFilterList.add(new JavaScriptDimFilter("null_column", jsFn, extractionFn)); + DimFilter superFilter = new AndDimFilter(superFilterList); + + GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird) + .setDimensions(Lists.newArrayList(new DefaultDimensionSpec("null_column", "alias"))) + .setAggregatorSpecs( + Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))) + .setGranularity(QueryRunnerTestHelper.dayGran) + .setDimFilter(superFilter).build(); + + List expectedResults = Arrays + .asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), + GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L)); + + Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); + TestHelper.assertExpectedObjects(expectedResults, results, ""); + } } diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java index d1456a720fcc..0b968dd93b7e 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryRunnerTest.java @@ -385,8 +385,8 @@ public void testSearchWithSingleFilter1() .filters( new AndDimFilter( Arrays.asList( - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "total_market"), - new SelectorDimFilter(QueryRunnerTestHelper.qualityDimension, "mezzanine")))) + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "total_market", null), + new SelectorDimFilter(QueryRunnerTestHelper.qualityDimension, "mezzanine", null)))) .intervals(QueryRunnerTestHelper.fullOnInterval) .dimensions(QueryRunnerTestHelper.qualityDimension) .query("a") diff --git a/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java b/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java index 58895b35ce5a..9f8ddd92b0ef 100644 --- a/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/select/SelectQueryRunnerTest.java @@ -398,7 +398,7 @@ public void testFullOnSelectWithFilter() for (int[] param : new int[][]{{3, 3}, {0, 1}, {5, 5}, {2, 7}, {3, 0}}) { SelectQuery query = newTestQuery() .intervals(I_0112_0114) - .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot")) + .filters(new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null)) .granularity(QueryRunnerTestHelper.dayGran) .dimensionSpecs(DefaultDimensionSpec.toSpec(QueryRunnerTestHelper.qualityDimension)) .metrics(Lists.newArrayList(QueryRunnerTestHelper.indexMetric)) @@ -463,8 +463,8 @@ public void testFullSelectNoResults() .filters( new AndDimFilter( Arrays.asList( - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot"), - new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "foo") + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "spot", null), + new SelectorDimFilter(QueryRunnerTestHelper.marketDimension, "foo", null) ) ) ) diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java index b66ddb85dba0..d57436de47f2 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryRunnerTest.java @@ -791,7 +791,7 @@ public void testTimeseriesWithRegexFilter() TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) - .filters(new RegexDimFilter(QueryRunnerTestHelper.marketDimension, "^.p.*$")) // spot and upfront + .filters(new RegexDimFilter(QueryRunnerTestHelper.marketDimension, "^.p.*$", null)) // spot and upfront .intervals(QueryRunnerTestHelper.firstToThird) .aggregators( Arrays.asList( @@ -1232,12 +1232,14 @@ public void testTimeseriesWithInFilter() .granularity(QueryRunnerTestHelper.dayGran) .filters( new InDimFilter( - QueryRunnerTestHelper.marketDimension, Arrays.asList( - "spot", - "upfront", - "total_market", - "billyblank" - ) + QueryRunnerTestHelper.marketDimension, + Arrays.asList( + "spot", + "upfront", + "total_market", + "billyblank" + ), + null ) ) .intervals(QueryRunnerTestHelper.firstToThird) @@ -1466,7 +1468,7 @@ public void testTimeseriesWithInvertedFilterOnNonExistentDimension() TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() .dataSource(QueryRunnerTestHelper.dataSource) .granularity(QueryRunnerTestHelper.dayGran) - .filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally"))) + .filters(new NotDimFilter(new SelectorDimFilter("bobby", "sally", null))) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators(QueryRunnerTestHelper.commonAggregators) .postAggregators(Arrays.asList(QueryRunnerTestHelper.addRowsIndexConstant)) @@ -2198,6 +2200,7 @@ public void testTimeseriesWithBoundFilter1() "spot", true, null, + null, null ), new BoundDimFilter( @@ -2206,6 +2209,7 @@ public void testTimeseriesWithBoundFilter1() "spotify", null, true, + null, null ), (DimFilter) new BoundDimFilter( @@ -2214,6 +2218,7 @@ public void testTimeseriesWithBoundFilter1() "spot", null, null, + null, null ) ) diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index 4bee2f23aae4..6b7526f500d0 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -3042,7 +3042,7 @@ public void testTopNOverNullDimensionWithFilter() .granularity(QueryRunnerTestHelper.allGran) .dimension("null_column") .filters( - new SelectorDimFilter("null_column", null) + new SelectorDimFilter("null_column", null, null) ) .metric(QueryRunnerTestHelper.indexMetric) .threshold(4) @@ -3127,7 +3127,7 @@ public void testTopNOverPartialNullDimensionWithFilterOnNullValue() .granularity(QueryGranularity.ALL) .dimension("partial_null_column") .metric(QueryRunnerTestHelper.uniqueMetric) - .filters(new SelectorDimFilter("partial_null_column", null)) + .filters(new SelectorDimFilter("partial_null_column", null, null)) .threshold(1000) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators(QueryRunnerTestHelper.commonAggregators) @@ -3159,7 +3159,7 @@ public void testTopNOverPartialNullDimensionWithFilterOnNOTNullValue() .granularity(QueryGranularity.ALL) .dimension("partial_null_column") .metric(QueryRunnerTestHelper.uniqueMetric) - .filters(new SelectorDimFilter("partial_null_column", "value")) + .filters(new SelectorDimFilter("partial_null_column", "value", null)) .threshold(1000) .intervals(QueryRunnerTestHelper.firstToThird) .aggregators(QueryRunnerTestHelper.commonAggregators) diff --git a/processing/src/test/java/io/druid/segment/filter/BoundFilterTest.java b/processing/src/test/java/io/druid/segment/filter/BoundFilterTest.java index 61dc41459898..ee1104f9a7f2 100644 --- a/processing/src/test/java/io/druid/segment/filter/BoundFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/BoundFilterTest.java @@ -29,6 +29,8 @@ import io.druid.data.input.impl.MapInputRowParser; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.segment.IndexBuilder; @@ -78,10 +80,10 @@ public BoundFilterTest( public void testLexicographicMatchEverything() { final List filters = ImmutableList.of( - new BoundDimFilter("dim0", "", "z", false, false, false), - new BoundDimFilter("dim1", "", "z", false, false, false), - new BoundDimFilter("dim2", "", "z", false, false, false), - new BoundDimFilter("dim3", "", "z", false, false, false) + new BoundDimFilter("dim0", "", "z", false, false, false, null), + new BoundDimFilter("dim1", "", "z", false, false, false, null), + new BoundDimFilter("dim2", "", "z", false, false, false, null), + new BoundDimFilter("dim3", "", "z", false, false, false, null) ); for (BoundDimFilter filter : filters) { @@ -93,15 +95,15 @@ public void testLexicographicMatchEverything() public void testLexicographicMatchNull() { assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, false), + new BoundDimFilter("dim0", "", "", false, false, false, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "", "", false, false, false), + new BoundDimFilter("dim1", "", "", false, false, false, null), ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter("dim2", "", "", false, false, false), + new BoundDimFilter("dim2", "", "", false, false, false, null), ImmutableList.of("1", "2", "5") ); } @@ -110,27 +112,27 @@ public void testLexicographicMatchNull() public void testLexicographicMatchMissingColumn() { assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, false), + new BoundDimFilter("dim3", "", "", false, false, false, null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); assertFilterMatches( - new BoundDimFilter("dim3", "", "", true, false, false), + new BoundDimFilter("dim3", "", "", true, false, false, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, true, false), + new BoundDimFilter("dim3", "", "", false, true, false, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim3", "", null, false, true, false), + new BoundDimFilter("dim3", "", null, false, true, false, null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, false, false), + new BoundDimFilter("dim3", null, "", false, false, false, null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); assertFilterMatches( - new BoundDimFilter("dim3", null, "", false, true, false), + new BoundDimFilter("dim3", null, "", false, true, false, null), ImmutableList.of() ); } @@ -139,15 +141,15 @@ public void testLexicographicMatchMissingColumn() public void testLexicographicMatchTooStrict() { assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", true, false, false), + new BoundDimFilter("dim1", "abc", "abc", true, false, false, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", true, true, false), + new BoundDimFilter("dim1", "abc", "abc", true, true, false, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", false, true, false), + new BoundDimFilter("dim1", "abc", "abc", false, true, false, null), ImmutableList.of() ); } @@ -156,7 +158,7 @@ public void testLexicographicMatchTooStrict() public void testLexicographicMatchExactlySingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "abc", "abc", false, false, false), + new BoundDimFilter("dim1", "abc", "abc", false, false, false, null), ImmutableList.of("5") ); } @@ -165,7 +167,7 @@ public void testLexicographicMatchExactlySingleValue() public void testLexicographicMatchSurroundingSingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "ab", "abd", true, true, false), + new BoundDimFilter("dim1", "ab", "abd", true, true, false, null), ImmutableList.of("5") ); } @@ -174,7 +176,7 @@ public void testLexicographicMatchSurroundingSingleValue() public void testLexicographicMatchNoUpperLimit() { assertFilterMatches( - new BoundDimFilter("dim1", "ab", null, true, true, false), + new BoundDimFilter("dim1", "ab", null, true, true, false, null), ImmutableList.of("4", "5") ); } @@ -183,7 +185,7 @@ public void testLexicographicMatchNoUpperLimit() public void testLexicographicMatchNoLowerLimit() { assertFilterMatches( - new BoundDimFilter("dim1", null, "abd", true, true, false), + new BoundDimFilter("dim1", null, "abd", true, true, false, null), ImmutableList.of("0", "1", "2", "3", "5") ); } @@ -192,11 +194,11 @@ public void testLexicographicMatchNoLowerLimit() public void testLexicographicMatchNumbers() { assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", false, false, false), + new BoundDimFilter("dim1", "1", "3", false, false, false, null), ImmutableList.of("1", "2", "3") ); assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", true, true, false), + new BoundDimFilter("dim1", "1", "3", true, true, false, null), ImmutableList.of("1", "2") ); } @@ -205,19 +207,19 @@ public void testLexicographicMatchNumbers() public void testAlphaNumericMatchNull() { assertFilterMatches( - new BoundDimFilter("dim0", "", "", false, false, true), + new BoundDimFilter("dim0", "", "", false, false, true, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "", "", false, false, true), + new BoundDimFilter("dim1", "", "", false, false, true, null), ImmutableList.of("0") ); assertFilterMatches( - new BoundDimFilter("dim2", "", "", false, false, true), + new BoundDimFilter("dim2", "", "", false, false, true, null), ImmutableList.of("1", "2", "5") ); assertFilterMatches( - new BoundDimFilter("dim3", "", "", false, false, true), + new BoundDimFilter("dim3", "", "", false, false, true, null), ImmutableList.of("0", "1", "2", "3", "4", "5") ); } @@ -226,15 +228,15 @@ public void testAlphaNumericMatchNull() public void testAlphaNumericMatchTooStrict() { assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", true, false, true), + new BoundDimFilter("dim1", "2", "2", true, false, true, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", true, true, true), + new BoundDimFilter("dim1", "2", "2", true, true, true, null), ImmutableList.of() ); assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", false, true, true), + new BoundDimFilter("dim1", "2", "2", false, true, true, null), ImmutableList.of() ); } @@ -243,7 +245,7 @@ public void testAlphaNumericMatchTooStrict() public void testAlphaNumericMatchExactlySingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "2", "2", false, false, true), + new BoundDimFilter("dim1", "2", "2", false, false, true, null), ImmutableList.of("2") ); } @@ -252,7 +254,7 @@ public void testAlphaNumericMatchExactlySingleValue() public void testAlphaNumericMatchSurroundingSingleValue() { assertFilterMatches( - new BoundDimFilter("dim1", "1", "3", true, true, true), + new BoundDimFilter("dim1", "1", "3", true, true, true, null), ImmutableList.of("2") ); } @@ -261,7 +263,7 @@ public void testAlphaNumericMatchSurroundingSingleValue() public void testAlphaNumericMatchNoUpperLimit() { assertFilterMatches( - new BoundDimFilter("dim1", "1", null, true, true, true), + new BoundDimFilter("dim1", "1", null, true, true, true, null), ImmutableList.of("1", "2", "4", "5") ); } @@ -270,11 +272,56 @@ public void testAlphaNumericMatchNoUpperLimit() public void testAlphaNumericMatchNoLowerLimit() { assertFilterMatches( - new BoundDimFilter("dim1", null, "2", true, true, true), + new BoundDimFilter("dim1", null, "2", true, true, true, null), ImmutableList.of("0", "3") ); } + @Test + public void testMatchWithExtractionFn() + { + String extractionJsFn = "function(str) { return 'super-' + str; }"; + ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false); + + String nullJsFn = "function(str) { return null; }"; + ExtractionFn makeNullFn = new JavaScriptExtractionFn(nullJsFn, false); + + assertFilterMatches( + new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + assertFilterMatches( + new BoundDimFilter("dim1", "super-ab", "super-abd", true, true, false, superFn), + ImmutableList.of("5") + ); + + assertFilterMatches( + new BoundDimFilter("dim1", "super-0", "super-10", false, false, true, superFn), + ImmutableList.of("1", "2", "3") + ); + + assertFilterMatches( + new BoundDimFilter("dim2", "super-", "super-zzzzzz", false, false, false, superFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + assertFilterMatches( + new BoundDimFilter("dim2", "super-null", "super-null", false, false, false, superFn), + ImmutableList.of("1", "2", "5") + ); + + assertFilterMatches( + new BoundDimFilter("dim3", "super-null", "super-null", false, false, false, superFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + + assertFilterMatches( + new BoundDimFilter("dim4", "super-null", "super-null", false, false, false, superFn), + ImmutableList.of("0", "1", "2", "3", "4", "5") + ); + } + private void assertFilterMatches( final DimFilter filter, final List expectedRows diff --git a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java index 917aefc7e6b7..660099b988a0 100644 --- a/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/ExtractionDimFilterTest.java @@ -160,7 +160,7 @@ public ExtractionType getExtractionType() @Test public void testEmpty() { - ExtractionFilter extractionFilter = new ExtractionFilter( + SelectorFilter extractionFilter = new SelectorFilter( "foo", "NFDJUKFNDSJFNS", DIM_EXTRACTION_FN ); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); @@ -170,7 +170,7 @@ public void testEmpty() @Test public void testNull() { - ExtractionFilter extractionFilter = new ExtractionFilter( + SelectorFilter extractionFilter = new SelectorFilter( "FDHJSFFHDS", "extractDimVal", DIM_EXTRACTION_FN ); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); @@ -180,7 +180,7 @@ public void testNull() @Test public void testNormal() { - ExtractionFilter extractionFilter = new ExtractionFilter( + SelectorFilter extractionFilter = new SelectorFilter( "foo", "extractDimVal", DIM_EXTRACTION_FN ); ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR); diff --git a/processing/src/test/java/io/druid/segment/filter/InFilterTest.java b/processing/src/test/java/io/druid/segment/filter/InFilterTest.java index 852cfe7ece58..0bbaea54cfbe 100644 --- a/processing/src/test/java/io/druid/segment/filter/InFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/InFilterTest.java @@ -32,6 +32,9 @@ import io.druid.data.input.impl.MapInputRowParser; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.filter.BoundDimFilter; import io.druid.query.filter.DimFilter; import io.druid.query.filter.Filter; import io.druid.query.filter.InDimFilter; @@ -46,6 +49,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; @@ -100,63 +104,177 @@ public void setUp() throws IOException @Test public void testSingleValueStringColumnWithoutNulls() { - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim0", null))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim0", "", ""))); - Assert.assertEquals(ImmutableList.of(0, 2), select(toInFilter("dim0", "a", "c"))); - Assert.assertEquals(ImmutableList.of(4), select(toInFilter("dim0", "e", "x"))); + assertFilterMatches( + toInFilter("dim0", null), + ImmutableList.of() + ); + + assertFilterMatches( + toInFilter("dim0", "", ""), + ImmutableList.of() + ); + + assertFilterMatches( + toInFilter("dim0", "a", "c"), + ImmutableList.of("a", "c") + ); + + assertFilterMatches( + toInFilter("dim0", "e", "x"), + ImmutableList.of("e") + ); } @Test public void testSingleValueStringColumnWithNulls() { - Assert.assertEquals(ImmutableList.of(0), select(toInFilter("dim1", null, ""))); - Assert.assertEquals(ImmutableList.of(0), select(toInFilter("dim1", ""))); - Assert.assertEquals(ImmutableList.of(0, 1, 5), select(toInFilter("dim1", null, "10", "abc"))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim1", "-1", "ab", "de"))); + assertFilterMatches( + toInFilter("dim1", null, ""), + ImmutableList.of("a") + ); + + assertFilterMatches( + toInFilter("dim1", ""), + ImmutableList.of("a") + ); + + assertFilterMatches( + toInFilter("dim1", null, "10", "abc"), + ImmutableList.of("a", "b", "f") + ); + + assertFilterMatches( + toInFilter("dim1", "-1", "ab", "de"), + ImmutableList.of() + ); } @Test public void testMultiValueStringColumn() { - Assert.assertEquals(ImmutableList.of(1, 2, 5), select(toInFilter("dim2", null))); - Assert.assertEquals(ImmutableList.of(1, 2, 5), select(toInFilter("dim2", "", (String)null))); - Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 5), select(toInFilter("dim2", null, "a"))); - Assert.assertEquals(ImmutableList.of(0, 1, 2, 5), select(toInFilter("dim2", null, "b"))); - Assert.assertEquals(ImmutableList.of(4), select(toInFilter("dim2", "c"))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim2", "d"))); + assertFilterMatches( + toInFilter("dim2", null), + ImmutableList.of("b", "c", "f") + ); + + assertFilterMatches( + toInFilter("dim2", "", (String)null), + ImmutableList.of("b", "c", "f") + ); + + assertFilterMatches( + toInFilter("dim2", null, "a"), + ImmutableList.of("a", "b", "c", "d", "f") + + ); + + assertFilterMatches( + toInFilter("dim2", null, "b"), + ImmutableList.of("a", "b", "c", "f") + + ); + + assertFilterMatches( + toInFilter("dim2", "c"), + ImmutableList.of("e") + ); + + assertFilterMatches( + toInFilter("dim2", "d"), + ImmutableList.of() + ); } @Test public void testMissingColumn() { - Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(toInFilter("dim3", null, (String)null))); - Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(toInFilter("dim3", ""))); - Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(toInFilter("dim3", null, "a"))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim3", "a"))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim3", "b"))); - Assert.assertEquals(ImmutableList.of(), select(toInFilter("dim3", "c"))); + assertFilterMatches( + toInFilter("dim3", null, (String)null), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + toInFilter("dim3", ""), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + toInFilter("dim3", null, "a"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + toInFilter("dim3", "a"), + ImmutableList.of() + ); + + assertFilterMatches( + toInFilter("dim3", "b"), + ImmutableList.of() + ); + + assertFilterMatches( + toInFilter("dim3", "c"), + ImmutableList.of() + ); + } + + @Test + public void testMatchWithExtractionFn() + { + String extractionJsFn = "function(str) { return 'super-' + str; }"; + ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false); + + String nullJsFn = "function(str) { if (str === null) { return 'YES'; } else { return 'NO';} }"; + ExtractionFn yesNullFn = new JavaScriptExtractionFn(nullJsFn, false); + + assertFilterMatches( + toInFilterWithFn("dim2", superFn, "super-null", "super-a", "super-b"), + ImmutableList.of("a", "b", "c", "d", "f") + ); + + assertFilterMatches( + toInFilterWithFn("dim2", yesNullFn, "YES"), + ImmutableList.of("b", "c", "f") + ); + + assertFilterMatches( + toInFilterWithFn("dim1", superFn, "super-null", "super-10", "super-def"), + ImmutableList.of("a", "b", "e") + ); + + assertFilterMatches( + toInFilterWithFn("dim3", yesNullFn, "NO"), + ImmutableList.of() + ); + + assertFilterMatches( + toInFilterWithFn("dim3", yesNullFn, "YES"), + ImmutableList.of("a", "b", "c", "d", "e", "f") + ); + + assertFilterMatches( + toInFilterWithFn("dim1", yesNullFn, "NO"), + ImmutableList.of("b", "c", "d", "e", "f") + ); } private DimFilter toInFilter(String dim, String value, String... values) { - return new InDimFilter(dim, Lists.asList(value, values)); + return new InDimFilter(dim, Lists.asList(value, values), null); } - private List select(final DimFilter filter) + private DimFilter toInFilterWithFn(String dim, ExtractionFn fn, String value, String... values) { - return Lists.newArrayList( - Iterables.transform( - selectColumnValuesMatchingFilter(filter, "dim0"), - new Function() - { - @Override - public Integer apply(String input) - { - Preconditions.checkArgument(input.length() == 1); - return ((int) input.charAt(0)) - ((int) 'a'); - } - } - ) - ); + return new InDimFilter(dim, Lists.asList(value, values), fn); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); } } diff --git a/processing/src/test/java/io/druid/segment/filter/JavascriptFilterTest.java b/processing/src/test/java/io/druid/segment/filter/JavascriptFilterTest.java new file mode 100644 index 000000000000..3bc471c42f38 --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/JavascriptFilterTest.java @@ -0,0 +1,179 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.MapLookupExtractor; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.JavaScriptDimFilter; +import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import org.joda.time.DateTime; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class JavascriptFilterTest extends BaseFilterTest +{ + private static final String TIMESTAMP_COLUMN = "timestamp"; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final List ROWS = ImmutableList.of( + PARSER.parse(ImmutableMap.of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))), + PARSER.parse(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of())), + PARSER.parse(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))), + PARSER.parse(ImmutableMap.of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))), + PARSER.parse(ImmutableMap.of("dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))), + PARSER.parse(ImmutableMap.of("dim0", "5", "dim1", "abc")) + ); + + public JavascriptFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(ROWS, indexBuilder, finisher, optimize); + } + + private final String jsNullFilter = "function(x) { return(x === null) }"; + + private String jsValueFilter(String value) + { + String jsFn = "function(x) { return(x === '" + value + "') }"; + return jsFn; + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + assertFilterMatches(new JavaScriptDimFilter("dim0", jsNullFilter, null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim0", jsValueFilter(""), null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim0", jsValueFilter("0"), null), ImmutableList.of("0")); + assertFilterMatches(new JavaScriptDimFilter("dim0", jsValueFilter("1"), null), ImmutableList.of("1")); + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + assertFilterMatches(new JavaScriptDimFilter("dim1", jsNullFilter, null), ImmutableList.of("0")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("10"), null), ImmutableList.of("1")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("2"), null), ImmutableList.of("2")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("1"), null), ImmutableList.of("3")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("def"), null), ImmutableList.of("4")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("abc"), null), ImmutableList.of("5")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("ab"), null), ImmutableList.of()); + } + + @Test + public void testMultiValueStringColumn() + { + // multi-val null...... + assertFilterMatches(new JavaScriptDimFilter("dim2", jsNullFilter, null), ImmutableList.of("1", "2", "5")); + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("a"), null), ImmutableList.of("0", "3")); + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("b"), null), ImmutableList.of("0")); + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("c"), null), ImmutableList.of("4")); + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("d"), null), ImmutableList.of()); + } + + @Test + public void testMissingColumnSpecifiedInDimensionList() + { + assertFilterMatches(new JavaScriptDimFilter("dim3", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new JavaScriptDimFilter("dim3", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim3", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim3", jsValueFilter("c"), null), ImmutableList.of()); + } + + @Test + public void testMissingColumnNotSpecifiedInDimensionList() + { + assertFilterMatches(new JavaScriptDimFilter("dim4", jsNullFilter, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new JavaScriptDimFilter("dim4", jsValueFilter("a"), null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim4", jsValueFilter("b"), null), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim4", jsValueFilter("c"), null), ImmutableList.of()); + } + + @Test + public void testJavascriptFilterWithLookupExtractionFn() + { + final Map stringMap = ImmutableMap.of( + "1", "HELLO", + "a", "HELLO", + "def", "HELLO", + "abc", "UNKNOWN" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches(new JavaScriptDimFilter("dim0", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("1")); + assertFilterMatches(new JavaScriptDimFilter("dim0", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "2", "3", "4", "5")); + + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("3", "4")); + assertFilterMatches(new JavaScriptDimFilter("dim1", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "5")); + + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("HELLO"), lookupFn), ImmutableList.of("0", "3")); + assertFilterMatches(new JavaScriptDimFilter("dim2", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); + + assertFilterMatches(new JavaScriptDimFilter("dim3", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim3", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + + assertFilterMatches(new JavaScriptDimFilter("dim4", jsValueFilter("HELLO"), lookupFn), ImmutableList.of()); + assertFilterMatches(new JavaScriptDimFilter("dim4", jsValueFilter("UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/NotFilterTest.java b/processing/src/test/java/io/druid/segment/filter/NotFilterTest.java index a919449c80d1..395578acc700 100644 --- a/processing/src/test/java/io/druid/segment/filter/NotFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/NotFilterTest.java @@ -79,19 +79,19 @@ public NotFilterTest( public void testNotSelector() { assertFilterMatches( - new NotDimFilter(new SelectorDimFilter("dim0", null)), + new NotDimFilter(new SelectorDimFilter("dim0", null, null)), ImmutableList.of("0", "1", "2", "3", "4", "5") ); assertFilterMatches( - new NotDimFilter(new SelectorDimFilter("dim0", "")), + new NotDimFilter(new SelectorDimFilter("dim0", "", null)), ImmutableList.of("0", "1", "2", "3", "4", "5") ); assertFilterMatches( - new NotDimFilter(new SelectorDimFilter("dim0", "0")), + new NotDimFilter(new SelectorDimFilter("dim0", "0", null)), ImmutableList.of("1", "2", "3", "4", "5") ); assertFilterMatches( - new NotDimFilter(new SelectorDimFilter("dim0", "1")), + new NotDimFilter(new SelectorDimFilter("dim0", "1", null)), ImmutableList.of("0", "2", "3", "4", "5") ); } diff --git a/processing/src/test/java/io/druid/segment/filter/RegexFilterTest.java b/processing/src/test/java/io/druid/segment/filter/RegexFilterTest.java new file mode 100644 index 000000000000..a4f3e20f45f4 --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/RegexFilterTest.java @@ -0,0 +1,159 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import org.joda.time.DateTime; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class RegexFilterTest extends BaseFilterTest +{ + private static final String TIMESTAMP_COLUMN = "timestamp"; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final List ROWS = ImmutableList.of( + PARSER.parse(ImmutableMap.of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))), + PARSER.parse(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of())), + PARSER.parse(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))), + PARSER.parse(ImmutableMap.of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))), + PARSER.parse(ImmutableMap.of("dim0", "4", "dim1", "abdef", "dim2", ImmutableList.of("c"))), + PARSER.parse(ImmutableMap.of("dim0", "5", "dim1", "abc")) + ); + + public RegexFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(ROWS, indexBuilder, finisher, optimize); + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + assertFilterMatches(new RegexDimFilter("dim0", ".*", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new RegexDimFilter("dim0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new RegexDimFilter("dim0", "5", null), ImmutableList.of("5")); + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + // RegexFilter always returns false for null row values. + assertFilterMatches(new RegexDimFilter("dim1", ".*", null), ImmutableList.of("1", "2", "3", "4", "5")); + assertFilterMatches(new RegexDimFilter("dim1", "10", null), ImmutableList.of("1")); + assertFilterMatches(new RegexDimFilter("dim1", "2", null), ImmutableList.of("2")); + assertFilterMatches(new RegexDimFilter("dim1", "1", null), ImmutableList.of("1", "3")); + assertFilterMatches(new RegexDimFilter("dim1", ".*def", null), ImmutableList.of("4")); + assertFilterMatches(new RegexDimFilter("dim1", "abc", null), ImmutableList.of("5")); + assertFilterMatches(new RegexDimFilter("dim1", "ab.*", null), ImmutableList.of("4", "5")); + } + + @Test + public void testMultiValueStringColumn() + { + assertFilterMatches(new RegexDimFilter("dim2", ".*", null), ImmutableList.of("0", "3", "4")); + assertFilterMatches(new RegexDimFilter("dim2", "a", null), ImmutableList.of("0", "3")); + assertFilterMatches(new RegexDimFilter("dim2", "b", null), ImmutableList.of("0")); + assertFilterMatches(new RegexDimFilter("dim2", "c", null), ImmutableList.of("4")); + assertFilterMatches(new RegexDimFilter("dim2", "d", null), ImmutableList.of()); + } + + @Test + public void testMissingColumnSpecifiedInDimensionList() + { + assertFilterMatches(new RegexDimFilter("dim3", "", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim3", "a", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim3", "b", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim3", "c", null), ImmutableList.of()); + } + + @Test + public void testMissingColumnNotSpecifiedInDimensionList() + { + assertFilterMatches(new RegexDimFilter("dim4", "", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim4", "a", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim4", "b", null), ImmutableList.of()); + assertFilterMatches(new RegexDimFilter("dim4", "c", null), ImmutableList.of()); + } + + @Test + public void testRegexWithExtractionFn() + { + String nullJsFn = "function(str) { if (str === null) { return 'NOT_NULL_ANYMORE'; } else { return str;} }"; + ExtractionFn changeNullFn = new JavaScriptExtractionFn(nullJsFn, false); + + assertFilterMatches(new RegexDimFilter("dim1", ".*ANYMORE", changeNullFn), ImmutableList.of("0")); + assertFilterMatches(new RegexDimFilter("dim1", "ab.*", changeNullFn), ImmutableList.of("4", "5")); + + assertFilterMatches(new RegexDimFilter("dim2", ".*ANYMORE", changeNullFn), ImmutableList.of("1", "2", "5")); + assertFilterMatches(new RegexDimFilter("dim2", "a.*", changeNullFn), ImmutableList.of("0", "3")); + + assertFilterMatches(new RegexDimFilter("dim3", ".*ANYMORE", changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new RegexDimFilter("dim3", "a.*", changeNullFn), ImmutableList.of()); + + assertFilterMatches(new RegexDimFilter("dim4", ".*ANYMORE", changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new RegexDimFilter("dim4", "a.*", changeNullFn), ImmutableList.of()); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/SearchQueryFilterTest.java b/processing/src/test/java/io/druid/segment/filter/SearchQueryFilterTest.java new file mode 100644 index 000000000000..5e38cee821eb --- /dev/null +++ b/processing/src/test/java/io/druid/segment/filter/SearchQueryFilterTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.filter; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.metamx.common.Pair; +import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.TimeAndDimsParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.JavaScriptExtractionFn; +import io.druid.query.filter.DimFilter; +import io.druid.query.filter.RegexDimFilter; +import io.druid.query.filter.SearchQueryDimFilter; +import io.druid.query.search.search.ContainsSearchQuerySpec; +import io.druid.query.search.search.SearchQuerySpec; +import io.druid.segment.IndexBuilder; +import io.druid.segment.StorageAdapter; +import org.joda.time.DateTime; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.Closeable; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class SearchQueryFilterTest extends BaseFilterTest +{ + private static final String TIMESTAMP_COLUMN = "timestamp"; + + private static final InputRowParser> PARSER = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TIMESTAMP_COLUMN, "iso", new DateTime("2000")), + new DimensionsSpec( + DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")), + null, + null + ) + ) + ); + + private static final List ROWS = ImmutableList.of( + PARSER.parse(ImmutableMap.of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))), + PARSER.parse(ImmutableMap.of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of())), + PARSER.parse(ImmutableMap.of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))), + PARSER.parse(ImmutableMap.of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))), + PARSER.parse(ImmutableMap.of("dim0", "4", "dim1", "abdef", "dim2", ImmutableList.of("c"))), + PARSER.parse(ImmutableMap.of("dim0", "5", "dim1", "abc")) + ); + + public SearchQueryFilterTest( + String testName, + IndexBuilder indexBuilder, + Function> finisher, + boolean optimize + ) + { + super(ROWS, indexBuilder, finisher, optimize); + } + + private SearchQuerySpec specForValue(String value) + { + return new ContainsSearchQuerySpec(value, true); + } + + @Test + public void testSingleValueStringColumnWithoutNulls() + { + assertFilterMatches(new SearchQueryDimFilter("dim0", specForValue(""), null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SearchQueryDimFilter("dim0", specForValue("0"), null), ImmutableList.of("0")); + assertFilterMatches(new SearchQueryDimFilter("dim0", specForValue("5"), null), ImmutableList.of("5")); + } + + @Test + public void testSingleValueStringColumnWithNulls() + { + // SearchQueryFilter always returns false for null row values. + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue(""), null), ImmutableList.of("1", "2", "3", "4", "5")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("10"), null), ImmutableList.of("1")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("2"), null), ImmutableList.of("2")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("1"), null), ImmutableList.of("1", "3")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("def"), null), ImmutableList.of("4")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("abc"), null), ImmutableList.of("5")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("ab"), null), ImmutableList.of("4", "5")); + } + + @Test + public void testMultiValueStringColumn() + { + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue(""), null), ImmutableList.of("0", "3", "4")); + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("a"), null), ImmutableList.of("0", "3")); + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("b"), null), ImmutableList.of("0")); + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("c"), null), ImmutableList.of("4")); + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("d"), null), ImmutableList.of()); + } + + @Test + public void testMissingColumnSpecifiedInDimensionList() + { + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue(""), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("a"), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("b"), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("c"), null), ImmutableList.of()); + } + + + @Test + public void testMissingColumnNotSpecifiedInDimensionList() + { + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue(""), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("a"), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("b"), null), ImmutableList.of()); + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("c"), null), ImmutableList.of()); + } + + + @Test + public void testSearchQueryWithExtractionFn() + { + String nullJsFn = "function(str) { if (str === null) { return 'NOT_NULL_ANYMORE'; } else { return str;} }"; + ExtractionFn changeNullFn = new JavaScriptExtractionFn(nullJsFn, false); + + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0")); + assertFilterMatches(new SearchQueryDimFilter("dim1", specForValue("ab"), changeNullFn), ImmutableList.of("4", "5")); + + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("1", "2", "5")); + assertFilterMatches(new SearchQueryDimFilter("dim2", specForValue("a"), changeNullFn), ImmutableList.of("0", "3")); + + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SearchQueryDimFilter("dim3", specForValue("a"), changeNullFn), ImmutableList.of()); + + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("ANYMORE"), changeNullFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SearchQueryDimFilter("dim4", specForValue("a"), changeNullFn), ImmutableList.of()); + } + + private void assertFilterMatches( + final DimFilter filter, + final List expectedRows + ) + { + Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0")); + Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter)); + } +} diff --git a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java index 8dcbdb4f3ee7..94b1bf845a6f 100644 --- a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java @@ -29,8 +29,14 @@ import io.druid.data.input.impl.MapInputRowParser; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; +import io.druid.query.extraction.MapLookupExtractor; import io.druid.query.filter.DimFilter; +import io.druid.query.filter.ExtractionDimFilter; +import io.druid.query.filter.InDimFilter; +import io.druid.query.filter.OrDimFilter; import io.druid.query.filter.SelectorDimFilter; +import io.druid.query.lookup.LookupExtractionFn; +import io.druid.query.lookup.LookupExtractor; import io.druid.segment.IndexBuilder; import io.druid.segment.StorageAdapter; import org.joda.time.DateTime; @@ -40,6 +46,7 @@ import org.junit.runners.Parameterized; import java.io.Closeable; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -81,54 +88,146 @@ public SelectorFilterTest( @Test public void testSingleValueStringColumnWithoutNulls() { - assertFilterMatches(new SelectorDimFilter("dim0", null), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim0", ""), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim0", "0"), ImmutableList.of("0")); - assertFilterMatches(new SelectorDimFilter("dim0", "1"), ImmutableList.of("1")); + assertFilterMatches(new SelectorDimFilter("dim0", null, null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim0", "", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim0", "0", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("dim0", "1", null), ImmutableList.of("1")); } @Test public void testSingleValueStringColumnWithNulls() { - assertFilterMatches(new SelectorDimFilter("dim1", null), ImmutableList.of("0")); - assertFilterMatches(new SelectorDimFilter("dim1", ""), ImmutableList.of("0")); - assertFilterMatches(new SelectorDimFilter("dim1", "10"), ImmutableList.of("1")); - assertFilterMatches(new SelectorDimFilter("dim1", "2"), ImmutableList.of("2")); - assertFilterMatches(new SelectorDimFilter("dim1", "1"), ImmutableList.of("3")); - assertFilterMatches(new SelectorDimFilter("dim1", "def"), ImmutableList.of("4")); - assertFilterMatches(new SelectorDimFilter("dim1", "abc"), ImmutableList.of("5")); - assertFilterMatches(new SelectorDimFilter("dim1", "ab"), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim1", null, null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("dim1", "", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("dim1", "10", null), ImmutableList.of("1")); + assertFilterMatches(new SelectorDimFilter("dim1", "2", null), ImmutableList.of("2")); + assertFilterMatches(new SelectorDimFilter("dim1", "1", null), ImmutableList.of("3")); + assertFilterMatches(new SelectorDimFilter("dim1", "def", null), ImmutableList.of("4")); + assertFilterMatches(new SelectorDimFilter("dim1", "abc", null), ImmutableList.of("5")); + assertFilterMatches(new SelectorDimFilter("dim1", "ab", null), ImmutableList.of()); } @Test public void testMultiValueStringColumn() { - assertFilterMatches(new SelectorDimFilter("dim2", null), ImmutableList.of("1", "2", "5")); - assertFilterMatches(new SelectorDimFilter("dim2", ""), ImmutableList.of("1", "2", "5")); - assertFilterMatches(new SelectorDimFilter("dim2", "a"), ImmutableList.of("0", "3")); - assertFilterMatches(new SelectorDimFilter("dim2", "b"), ImmutableList.of("0")); - assertFilterMatches(new SelectorDimFilter("dim2", "c"), ImmutableList.of("4")); - assertFilterMatches(new SelectorDimFilter("dim2", "d"), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim2", null, null), ImmutableList.of("1", "2", "5")); + assertFilterMatches(new SelectorDimFilter("dim2", "", null), ImmutableList.of("1", "2", "5")); + assertFilterMatches(new SelectorDimFilter("dim2", "a", null), ImmutableList.of("0", "3")); + assertFilterMatches(new SelectorDimFilter("dim2", "b", null), ImmutableList.of("0")); + assertFilterMatches(new SelectorDimFilter("dim2", "c", null), ImmutableList.of("4")); + assertFilterMatches(new SelectorDimFilter("dim2", "d", null), ImmutableList.of()); } @Test public void testMissingColumnSpecifiedInDimensionList() { - assertFilterMatches(new SelectorDimFilter("dim3", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim3", ""), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim3", "a"), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim3", "b"), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim3", "c"), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim3", null, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SelectorDimFilter("dim3", "", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SelectorDimFilter("dim3", "a", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim3", "b", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim3", "c", null), ImmutableList.of()); } @Test public void testMissingColumnNotSpecifiedInDimensionList() { - assertFilterMatches(new SelectorDimFilter("dim4", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim4", ""), ImmutableList.of("0", "1", "2", "3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim4", "a"), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim4", "b"), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim4", "c"), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim4", null, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SelectorDimFilter("dim4", "", null), ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(new SelectorDimFilter("dim4", "a", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim4", "b", null), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim4", "c", null), ImmutableList.of()); + } + + @Test + public void testSelectorWithLookupExtractionFn() + { + final Map stringMap = ImmutableMap.of( + "1", "HELLO", + "a", "HELLO", + "def", "HELLO", + "abc", "UNKNOWN" + ); + LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false); + LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true); + + assertFilterMatches(new SelectorDimFilter("dim0", "HELLO", lookupFn), ImmutableList.of("1")); + assertFilterMatches(new SelectorDimFilter("dim0", "UNKNOWN", lookupFn), ImmutableList.of("0", "2", "3", "4", "5")); + + assertFilterMatches(new SelectorDimFilter("dim1", "HELLO", lookupFn), ImmutableList.of("3", "4")); + assertFilterMatches(new SelectorDimFilter("dim1", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "5")); + + assertFilterMatches(new SelectorDimFilter("dim2", "HELLO", lookupFn), ImmutableList.of("0", "3")); + assertFilterMatches(new SelectorDimFilter("dim2", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "4", "5")); + + assertFilterMatches(new SelectorDimFilter("dim3", "HELLO", lookupFn), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim3", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + + assertFilterMatches(new SelectorDimFilter("dim4", "HELLO", lookupFn), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim4", "UNKNOWN", lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5")); + + final Map stringMap2 = ImmutableMap.of( + "2", "5" + ); + LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false); + LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true); + assertFilterMatches(new SelectorDimFilter("dim0", "5", lookupFn2), ImmutableList.of("2", "5")); + + final Map stringMap3 = ImmutableMap.of( + "1", "" + ); + LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false); + LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true); + assertFilterMatches(new SelectorDimFilter("dim0", null, lookupFn3), ImmutableList.of("0", "1", "2", "3", "4", "5")); + + final Map stringMap4 = ImmutableMap.of( + "9", "4" + ); + LookupExtractor mapExtractor4 = new MapLookupExtractor(stringMap4, false); + LookupExtractionFn lookupFn4 = new LookupExtractionFn(mapExtractor4, true, null, false, true); + + final Map stringMap5 = ImmutableMap.of( + "5", "44" + ); + LookupExtractor mapExtractor5 = new MapLookupExtractor(stringMap5, false); + LookupExtractionFn lookupFn5 = new LookupExtractionFn(mapExtractor5, true, null, false, true); + + final Map stringMap6 = ImmutableMap.of( + "5", "5" + ); + LookupExtractor mapExtractor6 = new MapLookupExtractor(stringMap6, false); + LookupExtractionFn lookupFn6 = new LookupExtractionFn(mapExtractor6, true, null, false, true); + + // optimize() tests, check that filter was converted to the proper form + SelectorDimFilter optFilter1 = new SelectorDimFilter("dim1", "UNKNOWN", lookupFn); + SelectorDimFilter optFilter2 = new SelectorDimFilter("dim0", "5", lookupFn2); + SelectorDimFilter optFilter3 = new SelectorDimFilter("dim0", null, lookupFn3); + SelectorDimFilter optFilter4 = new SelectorDimFilter("dim0", "5", lookupFn4); + SelectorDimFilter optFilter5 = new SelectorDimFilter("dim0", "5", lookupFn5); + SelectorDimFilter optFilter6 = new SelectorDimFilter("dim0", "5", lookupFn6); + + InDimFilter optFilter2Optimized = new InDimFilter("dim0", Arrays.asList("2", "5"), null); + SelectorDimFilter optFilter4Optimized = new SelectorDimFilter("dim0", "5", null); + SelectorDimFilter optFilter6Optimized = new SelectorDimFilter("dim0", "5", null); + + Assert.assertTrue(optFilter1 == optFilter1.optimize()); + Assert.assertTrue(optFilter2Optimized.equals(optFilter2.optimize())); + Assert.assertTrue(optFilter3 == optFilter3.optimize()); + Assert.assertTrue(optFilter4Optimized.equals(optFilter4.optimize())); + Assert.assertTrue(optFilter5 == optFilter5.optimize()); + Assert.assertTrue(optFilter6Optimized.equals(optFilter6.optimize())); + + assertFilterMatches(optFilter1, ImmutableList.of("0", "1", "2", "5")); + assertFilterMatches(optFilter2, ImmutableList.of("2", "5")); + assertFilterMatches(optFilter3, ImmutableList.of("0", "1", "2", "3", "4", "5")); + assertFilterMatches(optFilter4, ImmutableList.of("5")); + assertFilterMatches(optFilter5, ImmutableList.of()); + assertFilterMatches(optFilter6, ImmutableList.of("5")); + + // tests that ExtractionDimFilter (identical to SelectorDimFilter now) optimize() with lookup works + // remove these when ExtractionDimFilter is removed. + assertFilterMatches(new ExtractionDimFilter("dim1", "UNKNOWN", lookupFn, null), ImmutableList.of("0", "1", "2", "5")); + assertFilterMatches(new ExtractionDimFilter("dim0", "5", lookupFn2, null), ImmutableList.of("2", "5")); + assertFilterMatches(new ExtractionDimFilter("dim0", null, lookupFn3, null), ImmutableList.of("0", "1", "2", "3", "4", "5")); } private void assertFilterMatches( diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java index 4eb5b510177f..4ad6c90a95c9 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexStorageAdapterTest.java @@ -259,7 +259,7 @@ public void testResetSanity() throws IOException for (boolean descending : Arrays.asList(false, true)) { Sequence cursorSequence = adapter.makeCursors( - new SelectorFilter("sally", "bo"), + new SelectorFilter("sally", "bo", null), interval, QueryGranularity.NONE, descending diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java index 899e42b1f59b..4132248803ba 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java @@ -82,7 +82,7 @@ public static Collection constructorFeeder() throws IOException AggregatorFactory[] metrics = { new FilteredAggregatorFactory( new CountAggregatorFactory("cnt"), - new SelectorDimFilter("billy", "A") + new SelectorDimFilter("billy", "A", null) ) }; final IncrementalIndexSchema schema = new IncrementalIndexSchema( diff --git a/server/src/main/antlr4/io/druid/sql/antlr4/DruidSQL.g4 b/server/src/main/antlr4/io/druid/sql/antlr4/DruidSQL.g4 index f86a252fdb62..b6e781f64eee 100644 --- a/server/src/main/antlr4/io/druid/sql/antlr4/DruidSQL.g4 +++ b/server/src/main/antlr4/io/druid/sql/antlr4/DruidSQL.g4 @@ -297,9 +297,9 @@ selectorDimFilter returns [DimFilter filter] String dim = $dimension.text; String val = unescape($value.text); switch($op.type) { - case(EQ): $filter = new SelectorDimFilter(dim, val); break; - case(NEQ): $filter = new NotDimFilter(new SelectorDimFilter(dim, val)); break; - case(MATCH): $filter = new RegexDimFilter(dim, val); break; + case(EQ): $filter = new SelectorDimFilter(dim, val, null); break; + case(NEQ): $filter = new NotDimFilter(new SelectorDimFilter(dim, val, null)); break; + case(MATCH): $filter = new RegexDimFilter(dim, val, null); break; } } ; @@ -307,7 +307,7 @@ selectorDimFilter returns [DimFilter filter] inListDimFilter returns [DimFilter filter] : dimension=IDENT 'in' (OPEN! ( (list+=QUOTED_STRING (COMMA! list+=QUOTED_STRING)*) ) CLOSE!) { List filterList = new LinkedList(); - for(Token e : $list) filterList.add(new SelectorDimFilter($dimension.text, unescape(e.getText()))); + for(Token e : $list) filterList.add(new SelectorDimFilter($dimension.text, unescape(e.getText()), null)); $filter = new OrDimFilter(filterList); } ;