diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index 4eef1cbac9a1..98fa02f129cd 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -87,6 +87,7 @@ See [groupBy server configuration](../querying/groupbyquery.html#server-configur |Property|Description|Default| |--------|-----------|-------| |`druid.query.segmentMetadata.defaultHistory`|When no interval is specified in the query, use a default interval of defaultHistory before the end time of the most recent segment, specified in ISO8601 format. This property also controls the duration of the default interval used by GET /druid/v2/datasources/{dataSourceName} interactions for retrieving datasource dimensions/metrics.|P1W| +|`druid.query.segmentMetadata.defaultAnalysisTypes`|This can be used to set the Default Analysis Types for all segment metadata queries, this can be overridden when making the query|["cardinality", "interval", "minmax"]| ### SQL diff --git a/docs/content/querying/segmentmetadataquery.md b/docs/content/querying/segmentmetadataquery.md index 820db6cadfcf..e04edfc17b22 100644 --- a/docs/content/querying/segmentmetadataquery.md +++ b/docs/content/querying/segmentmetadataquery.md @@ -32,7 +32,7 @@ There are several main parts to a segment metadata query: |toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no| |merge|Merge all individual segment metadata results into a single result|no| |context|See [Context](../querying/query-context.html)|no| -|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"]. See section [analysisTypes](#analysistypes) for more details.|no| +|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "interval", "minmax"], but can be overridden with using this [BrokerConfig](../configuration/broker.html#segment-metadata-query-config). See section [analysisTypes](#analysistypes) for more details.|no| |lenientAggregatorMerge|If true, and if the "aggregators" analysisType is enabled, aggregators will be merged leniently. See below for details.|no| The format of the result is: @@ -106,7 +106,10 @@ The grammar is as follows: This is a list of properties that determines the amount of information returned about the columns, i.e. analyses to be performed on the columns. -By default, the "cardinality", "size", "interval", and "minmax" types will be used. If a property is not needed, omitting it from this list will result in a more efficient query. +By default, the "cardinality", "interval", and "minmax" types will be used. If a property is not needed, omitting it from this list will result in a more efficient query. + +The default analysis types can be set in the broker configuration via: + `druid.query.segmentMetadata.defaultAnalysisTypes` Types of column analyses are described below: diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryConfig.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryConfig.java index 8b4942c23dec..fd4add3da80b 100644 --- a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryConfig.java +++ b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryConfig.java @@ -20,19 +20,29 @@ package io.druid.query.metadata; import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.query.metadata.metadata.SegmentMetadataQuery; import org.joda.time.Period; import org.joda.time.format.ISOPeriodFormat; import org.joda.time.format.PeriodFormatter; +import java.util.EnumSet; public class SegmentMetadataQueryConfig { private static final String DEFAULT_PERIOD_STRING = "P1W"; private static final PeriodFormatter ISO_FORMATTER = ISOPeriodFormat.standard(); + private static final EnumSet DEFAULT_ANALYSIS_TYPES = EnumSet.of( + SegmentMetadataQuery.AnalysisType.CARDINALITY, + SegmentMetadataQuery.AnalysisType.INTERVAL, + SegmentMetadataQuery.AnalysisType.MINMAX + ); @JsonProperty private Period defaultHistory = ISO_FORMATTER.parsePeriod(DEFAULT_PERIOD_STRING); + @JsonProperty + private EnumSet defaultAnalysisTypes = DEFAULT_ANALYSIS_TYPES; + public SegmentMetadataQueryConfig(String period) { defaultHistory = ISO_FORMATTER.parsePeriod(period); @@ -46,4 +56,16 @@ public Period getDefaultHistory() { return defaultHistory; } + + public void setDefaultHistory(String period) + { + this.defaultHistory = ISO_FORMATTER.parsePeriod(period); + } + + public EnumSet getDefaultAnalysisTypes() { return defaultAnalysisTypes; } + + public void setDefaultAnalysisTypes(EnumSet defaultAnalysisTypes) + { + this.defaultAnalysisTypes = defaultAnalysisTypes; + } } diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index 5e284b01dbb7..071c6e84d3af 100644 --- a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -38,6 +38,7 @@ import io.druid.java.util.common.guava.MappedSequence; import io.druid.java.util.common.guava.Sequence; import io.druid.java.util.common.guava.nary.BinaryFn; +import io.druid.query.BySegmentSkippingQueryRunner; import io.druid.query.CacheStrategy; import io.druid.query.DefaultGenericQueryMetricsFactory; import io.druid.query.GenericQueryMetricsFactory; @@ -46,7 +47,6 @@ import io.druid.query.QueryPlus; import io.druid.query.QueryRunner; import io.druid.query.QueryToolChest; -import io.druid.query.ResultMergeQueryRunner; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactoryNotMergeableException; import io.druid.query.aggregation.MetricManipulationFn; @@ -89,7 +89,10 @@ public SegmentMetadataQueryQueryToolChest(SegmentMetadataQueryConfig config) } @Inject - public SegmentMetadataQueryQueryToolChest(SegmentMetadataQueryConfig config, GenericQueryMetricsFactory queryMetricsFactory) + public SegmentMetadataQueryQueryToolChest( + SegmentMetadataQueryConfig config, + GenericQueryMetricsFactory queryMetricsFactory + ) { this.config = config; this.queryMetricsFactory = queryMetricsFactory; @@ -98,7 +101,7 @@ public SegmentMetadataQueryQueryToolChest(SegmentMetadataQueryConfig config, Gen @Override public QueryRunner mergeResults(final QueryRunner runner) { - return new ResultMergeQueryRunner(runner) + return new BySegmentSkippingQueryRunner(runner) { @Override public Sequence doRun( @@ -107,21 +110,21 @@ public Sequence doRun( Map context ) { - Query query = queryPlus.getQuery(); + SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) queryPlus.getQuery()).withFinalizedAnalysisTypes(config); + QueryPlus updatedQueryPlus = queryPlus.withQuery(updatedQuery); return new MappedSequence<>( CombiningSequence.create( - baseRunner.run(queryPlus, context), - makeOrdering(query), - createMergeFn(query) + baseRunner.run(updatedQueryPlus, context), + makeOrdering(updatedQuery), + createMergeFn(updatedQuery) ), MERGE_TRANSFORM_FN ); } - @Override - protected Ordering makeOrdering(Query query) + private Ordering makeOrdering(SegmentMetadataQuery query) { - if (((SegmentMetadataQuery) query).isMerge()) { + if (query.isMerge()) { // Merge everything always return new Ordering() { @@ -138,17 +141,14 @@ public int compare( return query.getResultOrdering(); // No two elements should be equal, so it should never merge } - @Override - protected BinaryFn createMergeFn(final Query inQ) + private BinaryFn createMergeFn(final SegmentMetadataQuery inQ) { return new BinaryFn() { - private final SegmentMetadataQuery query = (SegmentMetadataQuery) inQ; - @Override public SegmentAnalysis apply(SegmentAnalysis arg1, SegmentAnalysis arg2) { - return mergeAnalyses(arg1, arg2, query.isLenientAggregatorMerge()); + return mergeAnalyses(arg1, arg2, inQ.isLenientAggregatorMerge()); } }; } @@ -189,8 +189,9 @@ public boolean isCacheable(SegmentMetadataQuery query, boolean willMergeRunners) @Override public byte[] computeCacheKey(SegmentMetadataQuery query) { - byte[] includerBytes = query.getToInclude().getCacheKey(); - byte[] analysisTypesBytes = query.getAnalysisTypesCacheKey(); + SegmentMetadataQuery updatedQuery = query.withFinalizedAnalysisTypes(config); + byte[] includerBytes = updatedQuery.getToInclude().getCacheKey(); + byte[] analysisTypesBytes = updatedQuery.getAnalysisTypesCacheKey(); return ByteBuffer.allocate(1 + includerBytes.length + analysisTypesBytes.length) .put(SEGMENT_METADATA_CACHE_PREFIX) .put(includerBytes) @@ -238,7 +239,6 @@ public List filterSegments(SegmentMetadataQuery qu if (!query.isUsingDefaultInterval()) { return segments; } - if (segments.size() <= 1) { return segments; } @@ -406,4 +406,9 @@ public static SegmentAnalysis finalizeAnalysis(SegmentAnalysis analysis) analysis.isRollup() ); } + + public SegmentMetadataQueryConfig getConfig() + { + return this.config; + } } diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java index a3938574b064..ded0107d0ea5 100644 --- a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java +++ b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java @@ -86,8 +86,9 @@ public QueryRunner createRunner(final Segment segment) @Override public Sequence run(QueryPlus inQ, Map responseContext) { - SegmentMetadataQuery query = (SegmentMetadataQuery) inQ.getQuery(); - final SegmentAnalyzer analyzer = new SegmentAnalyzer(query.getAnalysisTypes()); + SegmentMetadataQuery updatedQuery = ((SegmentMetadataQuery) inQ.getQuery()) + .withFinalizedAnalysisTypes(toolChest.getConfig()); + final SegmentAnalyzer analyzer = new SegmentAnalyzer(updatedQuery.getAnalysisTypes()); final Map analyzedColumns = analyzer.analyze(segment); final long numRows = analyzer.numRows(segment); long totalSize = 0; @@ -98,7 +99,7 @@ public Sequence run(QueryPlus inQ, Map columns = Maps.newTreeMap(); - ColumnIncluderator includerator = query.getToInclude(); + ColumnIncluderator includerator = updatedQuery.getToInclude(); for (Map.Entry entry : analyzedColumns.entrySet()) { final String columnName = entry.getKey(); final ColumnAnalysis column = entry.getValue(); @@ -110,12 +111,12 @@ public Sequence run(QueryPlus inQ, Map retIntervals = query.analyzingInterval() ? + List retIntervals = updatedQuery.analyzingInterval() ? Collections.singletonList(segment.getDataInterval()) : null; final Map aggregators; Metadata metadata = null; - if (query.hasAggregators()) { + if (updatedQuery.hasAggregators()) { metadata = segment.asStorageAdapter().getMetadata(); if (metadata != null && metadata.getAggregators() != null) { aggregators = Maps.newHashMap(); @@ -130,7 +131,7 @@ public Sequence run(QueryPlus inQ, Map run(QueryPlus inQ, Map run(QueryPlus inQ, Map DEFAULT_ANALYSIS_TYPES = EnumSet.of( - AnalysisType.CARDINALITY, - AnalysisType.INTERVAL, - AnalysisType.MINMAX - ); - private final ColumnIncluderator toInclude; private final boolean merge; private final boolean usingDefaultInterval; @@ -125,7 +120,7 @@ public SegmentMetadataQuery( } this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude; this.merge = merge == null ? false : merge; - this.analysisTypes = (analysisTypes == null) ? DEFAULT_ANALYSIS_TYPES : analysisTypes; + this.analysisTypes = analysisTypes; Preconditions.checkArgument( dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource, "SegmentMetadataQuery only supports table or union datasource" @@ -254,6 +249,23 @@ public Query withColumns(ColumnIncluderator includerator) return Druids.SegmentMetadataQueryBuilder.copy(this).toInclude(includerator).build(); } + public SegmentMetadataQuery withFinalizedAnalysisTypes(SegmentMetadataQueryConfig config) + { + if (analysisTypes != null) { + return this; + } + return Druids.SegmentMetadataQueryBuilder + .copy(this) + .analysisTypes(config.getDefaultAnalysisTypes()) + .build(); + } + + @Override + public List getIntervals() + { + return this.getQuerySegmentSpec().getIntervals(); + } + @Override public String toString() { diff --git a/processing/src/test/java/io/druid/guice/SegmentMetadataQueryConfigTest.java b/processing/src/test/java/io/druid/guice/SegmentMetadataQueryConfigTest.java new file mode 100644 index 000000000000..68212d8f7081 --- /dev/null +++ b/processing/src/test/java/io/druid/guice/SegmentMetadataQueryConfigTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.guice; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.inject.Binder; +import com.google.inject.Guice; +import com.google.inject.Injector; +import com.google.inject.Module; +import com.google.inject.Provides; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.query.metadata.SegmentMetadataQueryConfig; +import io.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.Properties; + +public class SegmentMetadataQueryConfigTest +{ + @Test + public void testSerdeSegmentMetadataQueryConfig() throws Exception + { + Injector injector = Guice.createInjector( + new Module() + { + @Override + public void configure(Binder binder) + { + binder.install(new PropertiesModule(Arrays.asList("test.runtime.properties"))); + binder.install(new ConfigModule()); + binder.install(new DruidGuiceExtensions()); + JsonConfigProvider.bind(binder, "druid.query.segmentMetadata", SegmentMetadataQueryConfig.class); + } + + @Provides + @LazySingleton + public ObjectMapper jsonMapper() + { + return new DefaultObjectMapper(); + } + } + ); + + + Properties props = injector.getInstance(Properties.class); + SegmentMetadataQueryConfig config = injector.getInstance(SegmentMetadataQueryConfig.class); + + EnumSet expectedDefaultAnalysis = config.getDefaultAnalysisTypes(); + String actualDefaultAnalysis = props.getProperty("druid.query.segmentMetadata.defaultAnalysisTypes"); + + Iterator it = expectedDefaultAnalysis.iterator(); + StringBuilder sb = new StringBuilder(); + sb.append('['); + while (it.hasNext()) { + SegmentMetadataQuery.AnalysisType e = it.next(); + sb.append("\""+e+"\""); + if (it.hasNext()) { + sb.append(',').append(' '); + } + } + sb.append(']'); + + String expectedDefaultAnalysisAsString = sb.toString(); + + Assert.assertEquals( + expectedDefaultAnalysisAsString, + actualDefaultAnalysis + ); + Assert.assertEquals( + props.getProperty("druid.query.segmentMetadata.defaultHistory"), + config.getDefaultHistory().toString() + ); + } +} diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 681cd3049cd2..18ad069f0bc3 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -60,7 +60,7 @@ public void testCacheStrategy() throws Exception ); CacheStrategy strategy = - new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(query); + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy(query); // Test cache key generation byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x02, 0x04}; diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java index fbb8e25cf57d..c2d7518b1cbc 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryTest.java @@ -143,8 +143,12 @@ public SegmentMetadataQueryTest( { final String id1 = differentIds ? "testSegment1" : "testSegment"; final String id2 = differentIds ? "testSegment2" : "testSegment"; - this.runner1 = mmap1 ? makeMMappedQueryRunner(id1, rollup1, FACTORY) : makeIncrementalIndexQueryRunner(id1, rollup1, FACTORY); - this.runner2 = mmap2 ? makeMMappedQueryRunner(id2, rollup2, FACTORY) : makeIncrementalIndexQueryRunner(id2, rollup2, FACTORY); + this.runner1 = mmap1 + ? makeMMappedQueryRunner(id1, rollup1, FACTORY) + : makeIncrementalIndexQueryRunner(id1, rollup1, FACTORY); + this.runner2 = mmap2 + ? makeMMappedQueryRunner(id2, rollup2, FACTORY) + : makeIncrementalIndexQueryRunner(id2, rollup2, FACTORY); this.mmap1 = mmap1; this.mmap2 = mmap2; this.rollup1 = rollup1; @@ -242,7 +246,7 @@ public SegmentMetadataQueryTest( null, null ) - // null_column will be included only for incremental index, which makes a little bigger result than expected + // null_column will be included only for incremental index, which makes a little bigger result than expected ), mmap2 ? 123969 : 124664, 1209, null, @@ -1092,12 +1096,56 @@ public void testCacheKeyWithListColumnIncluderator() .toInclude(new ListColumnIncluderator(Arrays.asList("fo", "o"))) .build(); - final byte[] oneColumnQueryCacheKey = new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(oneColumnQuery) - .computeCacheKey(oneColumnQuery); + final byte[] oneColumnQueryCacheKey = new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy( + oneColumnQuery) + .computeCacheKey( + oneColumnQuery); - final byte[] twoColumnQueryCacheKey = new SegmentMetadataQueryQueryToolChest(null).getCacheStrategy(twoColumnQuery) - .computeCacheKey(twoColumnQuery); + final byte[] twoColumnQueryCacheKey = new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy( + twoColumnQuery) + .computeCacheKey( + twoColumnQuery); Assert.assertFalse(Arrays.equals(oneColumnQueryCacheKey, twoColumnQueryCacheKey)); } + + @Test + public void testAnanlysisTypesBeingSet() + { + + SegmentMetadataQuery query1 = Druids.newSegmentMetadataQueryBuilder() + .dataSource("testing") + .toInclude(new ListColumnIncluderator(Arrays.asList("foo"))) + .build(); + + SegmentMetadataQuery query2 = Druids.newSegmentMetadataQueryBuilder() + .dataSource("testing") + .toInclude(new ListColumnIncluderator(Arrays.asList("foo"))) + .analysisTypes(SegmentMetadataQuery.AnalysisType.MINMAX) + .build(); + + SegmentMetadataQueryConfig emptyCfg = new SegmentMetadataQueryConfig(); + SegmentMetadataQueryConfig analysisCfg = new SegmentMetadataQueryConfig(); + analysisCfg.setDefaultAnalysisTypes(EnumSet.of(SegmentMetadataQuery.AnalysisType.CARDINALITY)); + + EnumSet analysis1 = query1.withFinalizedAnalysisTypes(emptyCfg) + .getAnalysisTypes(); + EnumSet analysis2 = query2.withFinalizedAnalysisTypes(emptyCfg) + .getAnalysisTypes(); + EnumSet analysisWCfg1 = query1.withFinalizedAnalysisTypes(analysisCfg) + .getAnalysisTypes(); + EnumSet analysisWCfg2 = query2.withFinalizedAnalysisTypes(analysisCfg) + .getAnalysisTypes(); + + EnumSet expectedAnalysis1 = new SegmentMetadataQueryConfig().getDefaultAnalysisTypes(); + EnumSet expectedAnalysis2 = EnumSet.of(SegmentMetadataQuery.AnalysisType.MINMAX); + EnumSet expectedAnalysisWCfg1 = EnumSet.of(SegmentMetadataQuery.AnalysisType.CARDINALITY); + EnumSet expectedAnalysisWCfg2 = EnumSet.of(SegmentMetadataQuery.AnalysisType.MINMAX); + + Assert.assertEquals(analysis1, expectedAnalysis1); + Assert.assertEquals(analysis2, expectedAnalysis2); + Assert.assertEquals(analysisWCfg1, expectedAnalysisWCfg1); + Assert.assertEquals(analysisWCfg2, expectedAnalysisWCfg2); + } + } diff --git a/processing/src/test/resources/test.runtime.properties b/processing/src/test/resources/test.runtime.properties index d84cf995ee1f..f93e3398ac7c 100644 --- a/processing/src/test/resources/test.runtime.properties +++ b/processing/src/test/resources/test.runtime.properties @@ -8,3 +8,5 @@ druid.metadata.storage.tables.taskLock=fff_tasklock druid.metadata.storage.tables.audit=ggg_audit druid.metadata.storage.tables.dataSource=hhh_dataSource druid.metadata.storage.tables.supervisors=iii_supervisors +druid.query.segmentMetadata.defaultAnalysisTypes=["cardinality", "size"] +druid.query.segmentMetadata.defaultHistory=P2W