From d2c28d423ce9f922f6080599798977ca4465f761 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 23 Feb 2024 21:49:47 +0530 Subject: [PATCH 01/26] Add storeCompactionState annotation function --- .../apache/druid/msq/exec/ControllerImpl.java | 74 ++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index d62bcce04ddc..a5d3fca0845e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -20,6 +20,7 @@ package org.apache.druid.msq.exec; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -41,6 +42,7 @@ import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.druid.client.ImmutableSegmentLoadInfo; +import org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec; import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.data.input.StringTuple; import org.apache.druid.data.input.impl.DimensionSchema; @@ -62,6 +64,8 @@ import org.apache.druid.frame.write.InvalidNullByteException; import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; +import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -188,6 +192,7 @@ import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.DimensionHandlerUtils; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; @@ -204,6 +209,7 @@ import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.http.ResultFormat; import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -238,6 +244,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.StreamSupport; @@ -1715,12 +1722,77 @@ private void publishSegmentsIfNeeded( { if (queryKernel.isSuccess() && MSQControllerTask.isIngestion(task.getQuerySpec())) { final StageId finalStageId = queryKernel.getStageId(queryDef.getFinalStageDefinition().getStageNumber()); + queryDef.getFinalStageDefinition().getClusterBy(); //noinspection unchecked @SuppressWarnings("unchecked") final Set segments = (Set) queryKernel.getResultObjectForStage(finalStageId); + DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) + .getProcessorFactory()).getDataSchema(); + ClusterBy clusterBy = queryDef.getFinalStageDefinition().getClusterBy(); log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); - publishAllSegments(segments); + Function, Set> compactionStateAnnotateFunction = compactionStateAnnotateFunction( + true, + context.jsonMapper(), + dataSchema, + clusterBy + ); + publishAllSegments(compactionStateAnnotateFunction.apply(segments)); + } + } + + public Function, Set> compactionStateAnnotateFunction( + boolean storeCompactionState, + ObjectMapper jsonMapper, + DataSchema dataSchema, + ClusterBy clusterBy + ) + { + if (storeCompactionState) { + IndexSpec indexSpec = task().getQuerySpec().getTuningConfig().getIndexSpec(); + GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); + DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); + Map transformSpec = dataSchema.getTransformSpec() == null + || TransformSpec.NONE.equals(dataSchema.getTransformSpec()) + ? null + : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec() + .getFilter()).asMap( + jsonMapper); + List metricsSpec = dataSchema.getAggregators() == null + ? null + : jsonMapper.convertValue( + dataSchema.getAggregators(), + new TypeReference>() + { + } + ); + + PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec( + task().getQuerySpec() + .getTuningConfig() + .getRowsPerSegment(), + null, + clusterBy.getColumns() + .stream() + .map(KeyColumn::columnName) + .collect(Collectors.toList()), + false + ); + + final CompactionState compactionState = new CompactionState( + partitionSpec, + dimensionsSpec, + metricsSpec, + transformSpec, + indexSpec.asMap(jsonMapper), + granularitySpec.asMap(jsonMapper) + ); + return segments -> segments + .stream() + .map(s -> s.withLastCompactionState(compactionState)) + .collect(Collectors.toSet()); + } else { + return Function.identity(); } } From 555d5d5cb60de04d8d7b131874bc13a7e3600564 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 26 Feb 2024 11:19:30 +0530 Subject: [PATCH 02/26] Add flag and change some config sources --- .../apache/druid/msq/exec/ControllerImpl.java | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index a5d3fca0845e..292c720398b0 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -80,6 +80,7 @@ import org.apache.druid.indexing.common.actions.SegmentTransactionalReplaceAction; import org.apache.druid.indexing.common.actions.TaskAction; import org.apache.druid.indexing.common.actions.TaskActionClient; +import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.indexing.common.task.batch.TooManyBucketsException; import org.apache.druid.indexing.common.task.batch.parallel.TombstoneHelper; import org.apache.druid.indexing.overlord.SegmentPublishResult; @@ -1729,27 +1730,40 @@ private void publishSegmentsIfNeeded( final Set segments = (Set) queryKernel.getResultObjectForStage(finalStageId); DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) .getProcessorFactory()).getDataSchema(); - ClusterBy clusterBy = queryDef.getFinalStageDefinition().getClusterBy(); - log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); + + List partitionDimensions = segments.isEmpty() + ? Collections.emptyList() + : ((DimensionRangeShardSpec) segments.stream() + .findFirst() + .get() + .getShardSpec()).getDimensions(); + + Function, Set> compactionStateAnnotateFunction = compactionStateAnnotateFunction( - true, + task(), context.jsonMapper(), dataSchema, - clusterBy + partitionDimensions ); + log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); publishAllSegments(compactionStateAnnotateFunction.apply(segments)); } } - public Function, Set> compactionStateAnnotateFunction( - boolean storeCompactionState, + public static Function, Set> compactionStateAnnotateFunction( + MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, - ClusterBy clusterBy + List partitionDimensions ) { + final boolean storeCompactionState = task.getContextValue( + Tasks.STORE_COMPACTION_STATE_KEY, + Tasks.DEFAULT_STORE_COMPACTION_STATE + ); + if (storeCompactionState) { - IndexSpec indexSpec = task().getQuerySpec().getTuningConfig().getIndexSpec(); + IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); Map transformSpec = dataSchema.getTransformSpec() == null @@ -1767,15 +1781,15 @@ public Function, Set> compactionStateAnnotateFunct } ); + // Even if partition dimensions is empty, use DimensionRangePartitionsSpec to record other info + // such as rowsPerSegment + PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec( - task().getQuerySpec() - .getTuningConfig() - .getRowsPerSegment(), + task.getQuerySpec() + .getTuningConfig() + .getRowsPerSegment(), null, - clusterBy.getColumns() - .stream() - .map(KeyColumn::columnName) - .collect(Collectors.toList()), + partitionDimensions, false ); From 0ac20e49cd314f1ebe6fcb74dd40ae9b160d4ff2 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 26 Feb 2024 12:56:11 +0530 Subject: [PATCH 03/26] Add type check for shard spec before casting --- .../apache/druid/msq/exec/ControllerImpl.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 292c720398b0..a54021239c16 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -66,6 +66,7 @@ import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -234,6 +235,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Queue; import java.util.Set; @@ -1731,13 +1733,18 @@ private void publishSegmentsIfNeeded( DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) .getProcessorFactory()).getDataSchema(); - List partitionDimensions = segments.isEmpty() - ? Collections.emptyList() - : ((DimensionRangeShardSpec) segments.stream() - .findFirst() - .get() - .getShardSpec()).getDimensions(); + ShardSpec shardSpec = segments.isEmpty() + ? null + : segments.stream() + .findFirst() + .get() + .getShardSpec(); + List partitionDimensions = Collections.emptyList(); + if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) + || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); + } Function, Set> compactionStateAnnotateFunction = compactionStateAnnotateFunction( task(), From a6d3dc0bcbdd7a7a03f3934e8c9eece5b0d1de56 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 26 Feb 2024 15:37:08 +0530 Subject: [PATCH 04/26] Check if there is a segment granularity in the context and revise the granularity spec accordingly --- .../apache/druid/msq/exec/ControllerImpl.java | 96 +++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index a54021239c16..44322d20fbc5 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -202,10 +202,12 @@ import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec; import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec; import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DruidServerMetadata; +import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.planner.ColumnMapping; import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; @@ -1725,7 +1727,6 @@ private void publishSegmentsIfNeeded( { if (queryKernel.isSuccess() && MSQControllerTask.isIngestion(task.getQuerySpec())) { final StageId finalStageId = queryKernel.getStageId(queryDef.getFinalStageDefinition().getStageNumber()); - queryDef.getFinalStageDefinition().getClusterBy(); //noinspection unchecked @SuppressWarnings("unchecked") @@ -1733,26 +1734,36 @@ private void publishSegmentsIfNeeded( DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) .getProcessorFactory()).getDataSchema(); - ShardSpec shardSpec = segments.isEmpty() - ? null - : segments.stream() - .findFirst() - .get() - .getShardSpec(); - List partitionDimensions = Collections.emptyList(); + Function, Set> compactionStateAnnotateFunction = Function.identity(); - if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) - || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { - partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); + final boolean storeCompactionState = task.getContextValue( + Tasks.STORE_COMPACTION_STATE_KEY, + Tasks.DEFAULT_STORE_COMPACTION_STATE + ); + + if (storeCompactionState) { + ShardSpec shardSpec = segments.isEmpty() + ? null + : segments.stream() + .findFirst() + .get() + .getShardSpec(); + List partitionDimensions = Collections.emptyList(); + + if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) + || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); + } + + compactionStateAnnotateFunction = compactionStateAnnotateFunction( + task(), + context.jsonMapper(), + dataSchema, + partitionDimensions + ); + log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); } - Function, Set> compactionStateAnnotateFunction = compactionStateAnnotateFunction( - task(), - context.jsonMapper(), - dataSchema, - partitionDimensions - ); - log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); publishAllSegments(compactionStateAnnotateFunction.apply(segments)); } } @@ -1764,25 +1775,37 @@ public static Function, Set> compactionStateAnnota List partitionDimensions ) { - final boolean storeCompactionState = task.getContextValue( - Tasks.STORE_COMPACTION_STATE_KEY, - Tasks.DEFAULT_STORE_COMPACTION_STATE - ); - - if (storeCompactionState) { IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); - DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); - Map transformSpec = dataSchema.getTransformSpec() == null - || TransformSpec.NONE.equals(dataSchema.getTransformSpec()) - ? null - : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec() - .getFilter()).asMap( - jsonMapper); - List metricsSpec = dataSchema.getAggregators() == null - ? null - : jsonMapper.convertValue( - dataSchema.getAggregators(), + + if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) { + + // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY + Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext( + jsonMapper, + task.getQuerySpec() + .getQuery() + .getContext() + ); + granularitySpec = new UniformGranularitySpec( + segmentGranularity, + granularitySpec.getQueryGranularity(), + granularitySpec.isRollup(), + granularitySpec.inputIntervals() + ); + } + + DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); + Map transformSpec = dataSchema.getTransformSpec() == null + || TransformSpec.NONE.equals(dataSchema.getTransformSpec()) + ? null + : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec() + .getFilter()).asMap( + jsonMapper); + List metricsSpec = dataSchema.getAggregators() == null + ? null + : jsonMapper.convertValue( + dataSchema.getAggregators(), new TypeReference>() { } @@ -1812,9 +1835,6 @@ public static Function, Set> compactionStateAnnota .stream() .map(s -> s.withLastCompactionState(compactionState)) .collect(Collectors.toSet()); - } else { - return Function.identity(); - } } /** From b218280e308517154d6b285baba93e32a37a4fd4 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 26 Feb 2024 16:08:17 +0530 Subject: [PATCH 05/26] Check if there is a segment granularity in the context and revise the granularity spec accordingly --- .../main/java/org/apache/druid/msq/exec/ControllerImpl.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 44322d20fbc5..dae65c75a785 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1778,7 +1778,8 @@ public static Function, Set> compactionStateAnnota IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); - if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) { + if (granularitySpec instanceof ArbitraryGranularitySpec + && task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) { // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext( From 33b5a82fd22731c057e48bb230b4289ca3128a4b Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 26 Feb 2024 17:38:31 +0530 Subject: [PATCH 06/26] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 144 ++++++++++-------- 1 file changed, 80 insertions(+), 64 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index dae65c75a785..eaaf6f74b3ea 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -65,8 +65,8 @@ import org.apache.druid.indexer.TaskState; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; -import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -1736,66 +1736,69 @@ private void publishSegmentsIfNeeded( Function, Set> compactionStateAnnotateFunction = Function.identity(); - final boolean storeCompactionState = task.getContextValue( - Tasks.STORE_COMPACTION_STATE_KEY, - Tasks.DEFAULT_STORE_COMPACTION_STATE - ); + + Object storeCompactionStateValue = task.getQuerySpec() + .getQuery() + .getContext() + .get(Tasks.STORE_COMPACTION_STATE_KEY); + + final boolean storeCompactionState = storeCompactionStateValue != null + ? (Boolean) storeCompactionStateValue + : Tasks.DEFAULT_STORE_COMPACTION_STATE; if (storeCompactionState) { - ShardSpec shardSpec = segments.isEmpty() - ? null - : segments.stream() - .findFirst() - .get() - .getShardSpec(); - List partitionDimensions = Collections.emptyList(); - - if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) - || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { - partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); - } + + ShardSpec shardSpec = segments.isEmpty() ? null : segments.stream().findFirst().get().getShardSpec(); compactionStateAnnotateFunction = compactionStateAnnotateFunction( task(), context.jsonMapper(), dataSchema, - partitionDimensions + shardSpec, + queryDef.getQueryId() ); - log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); } + log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); publishAllSegments(compactionStateAnnotateFunction.apply(segments)); } } public static Function, Set> compactionStateAnnotateFunction( - MSQControllerTask task, - ObjectMapper jsonMapper, - DataSchema dataSchema, - List partitionDimensions + MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId ) { - IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); - GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); + DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); + if (!destination.isReplaceTimeChunks()) { + // Only do this for replace queries, whether originating directly or via compaction + log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE", queryId); + return Function.identity(); + } - if (granularitySpec instanceof ArbitraryGranularitySpec - && task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) { + GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); - // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY - Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext( - jsonMapper, - task.getQuerySpec() - .getQuery() - .getContext() - ); - granularitySpec = new UniformGranularitySpec( - segmentGranularity, - granularitySpec.getQueryGranularity(), - granularitySpec.isRollup(), - granularitySpec.inputIntervals() - ); + if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) { + // This is a defensive check. Should never enter here. + log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set", queryId); + return Function.identity(); } + // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY + Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext( + jsonMapper, + task.getQuerySpec() + .getQuery() + .getContext() + ); + + granularitySpec = new UniformGranularitySpec( + segmentGranularity, + granularitySpec.getQueryGranularity(), + granularitySpec.isRollup(), + granularitySpec.inputIntervals() + ); + + DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); Map transformSpec = dataSchema.getTransformSpec() == null || TransformSpec.NONE.equals(dataSchema.getTransformSpec()) @@ -1806,36 +1809,49 @@ public static Function, Set> compactionStateAnnota List metricsSpec = dataSchema.getAggregators() == null ? null : jsonMapper.convertValue( - dataSchema.getAggregators(), - new TypeReference>() - { - } - ); - - // Even if partition dimensions is empty, use DimensionRangePartitionsSpec to record other info - // such as rowsPerSegment - - PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec( - task.getQuerySpec() - .getTuningConfig() - .getRowsPerSegment(), + dataSchema.getAggregators(), new TypeReference>() + { + }); + + PartitionsSpec partitionSpec; + + if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) + || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); + partitionSpec = new DimensionRangePartitionsSpec( + task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null, partitionDimensions, false ); - final CompactionState compactionState = new CompactionState( - partitionSpec, - dimensionsSpec, - metricsSpec, - transformSpec, - indexSpec.asMap(jsonMapper), - granularitySpec.asMap(jsonMapper) + } else if (shardSpec != null && Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { + partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null); + } else { + log.error( + "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type", + queryId ); - return segments -> segments - .stream() - .map(s -> s.withLastCompactionState(compactionState)) - .collect(Collectors.toSet()); + return Function.identity(); + } + + IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); + + final CompactionState compactionState = new CompactionState( + partitionSpec, + dimensionsSpec, + metricsSpec, + transformSpec, + indexSpec.asMap(jsonMapper), + granularitySpec.asMap(jsonMapper) + ); + + log.info("Query [%s] storing compaction state in segments", queryId); + + return segments -> segments + .stream() + .map(s -> s.withLastCompactionState(compactionState)) + .collect(Collectors.toSet()); } /** From cf37c65785af6f36d7de8ff765339a4f5187e619 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 5 Mar 2024 10:57:30 +0530 Subject: [PATCH 07/26] Add tests for compaction state --- .run/Coordinator w_ MSQ.run.xml | 19 ++ .../apache/druid/msq/exec/ControllerImpl.java | 40 +-- .../msq/util/MultiStageQueryContext.java | 2 +- .../apache/druid/msq/exec/MSQReplaceTest.java | 244 +++++++++++++++++- .../apache/druid/msq/test/MSQTestBase.java | 16 ++ 5 files changed, 295 insertions(+), 26 deletions(-) create mode 100644 .run/Coordinator w_ MSQ.run.xml diff --git a/.run/Coordinator w_ MSQ.run.xml b/.run/Coordinator w_ MSQ.run.xml new file mode 100644 index 000000000000..074b578ea722 --- /dev/null +++ b/.run/Coordinator w_ MSQ.run.xml @@ -0,0 +1,19 @@ + + + + \ No newline at end of file diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index eaaf6f74b3ea..1dbad6b3e228 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1731,24 +1731,25 @@ private void publishSegmentsIfNeeded( //noinspection unchecked @SuppressWarnings("unchecked") final Set segments = (Set) queryKernel.getResultObjectForStage(finalStageId); - DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) - .getProcessorFactory()).getDataSchema(); Function, Set> compactionStateAnnotateFunction = Function.identity(); + Boolean storeCompactionState = (Boolean) task.getQuerySpec() + .getQuery() + .getContext() + .get(Tasks.STORE_COMPACTION_STATE_KEY); - Object storeCompactionStateValue = task.getQuerySpec() - .getQuery() - .getContext() - .get(Tasks.STORE_COMPACTION_STATE_KEY); + if (storeCompactionState == null) { + storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE; - final boolean storeCompactionState = storeCompactionStateValue != null - ? (Boolean) storeCompactionStateValue - : Tasks.DEFAULT_STORE_COMPACTION_STATE; + } + + if (!segments.isEmpty() && storeCompactionState) { + DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) + .getProcessorFactory()).getDataSchema(); - if (storeCompactionState) { - ShardSpec shardSpec = segments.isEmpty() ? null : segments.stream().findFirst().get().getShardSpec(); + ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec(); compactionStateAnnotateFunction = compactionStateAnnotateFunction( task(), @@ -1757,6 +1758,7 @@ private void publishSegmentsIfNeeded( shardSpec, queryDef.getQueryId() ); + } log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); @@ -1771,7 +1773,7 @@ public static Function, Set> compactionStateAnnota DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); if (!destination.isReplaceTimeChunks()) { // Only do this for replace queries, whether originating directly or via compaction - log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE", queryId); + log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE.", queryId); return Function.identity(); } @@ -1779,7 +1781,7 @@ public static Function, Set> compactionStateAnnota if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) { // This is a defensive check. Should never enter here. - log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set", queryId); + log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set.", queryId); return Function.identity(); } @@ -1815,8 +1817,8 @@ public static Function, Set> compactionStateAnnota PartitionsSpec partitionSpec; - if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) - || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) + || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); partitionSpec = new DimensionRangePartitionsSpec( task.getQuerySpec().getTuningConfig().getRowsPerSegment(), @@ -1825,12 +1827,12 @@ public static Function, Set> compactionStateAnnota false ); - } else if (shardSpec != null && Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { + } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null); } else { log.error( - "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type", - queryId + "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].", + queryId, shardSpec.getType() ); return Function.identity(); } @@ -1846,7 +1848,7 @@ public static Function, Set> compactionStateAnnota granularitySpec.asMap(jsonMapper) ); - log.info("Query [%s] storing compaction state in segments", queryId); + log.info("Query [%s] storing compaction state in segments.", queryId); return segments -> segments .stream() diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java index b7340343c810..6ab213a06c4d 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java @@ -126,7 +126,7 @@ public class MultiStageQueryContext public static final String DEFAULT_CLUSTER_STATISTICS_MERGE_MODE = ClusterStatisticsMergeMode.SEQUENTIAL.toString(); public static final String CTX_ROWS_PER_SEGMENT = "rowsPerSegment"; - static final int DEFAULT_ROWS_PER_SEGMENT = 3000000; + public static final int DEFAULT_ROWS_PER_SEGMENT = 3000000; public static final String CTX_ROWS_PER_PAGE = "rowsPerPage"; static final int DEFAULT_ROWS_PER_PAGE = 100000; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index ea7adc866ee0..580e2c0c9926 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -23,18 +23,34 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.data.input.impl.DimensionSchema; +import org.apache.druid.data.input.impl.DimensionsSpec; +import org.apache.druid.data.input.impl.DoubleDimensionSchema; +import org.apache.druid.data.input.impl.FloatDimensionSchema; +import org.apache.druid.data.input.impl.LongDimensionSchema; +import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; +import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; +import org.apache.druid.indexer.partitions.PartitionsSpec; import org.apache.druid.indexing.common.TaskLockType; import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction; import org.apache.druid.indexing.common.task.Tasks; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.granularity.GranularityType; import org.apache.druid.msq.test.CounterSnapshotMatcher; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.test.MSQTestFileUtils; import org.apache.druid.msq.test.MSQTestTaskActionClient; +import org.apache.druid.msq.util.MultiStageQueryContext; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.indexing.granularity.GranularitySpec; +import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec; +import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.partition.DimensionRangeShardSpec; @@ -62,14 +78,18 @@ public class MSQReplaceTest extends MSQTestBase { - private static final String WITH_REPLACE_LOCK = "WITH_REPLACE_LOCK"; - private static final Map QUERY_CONTEXT_WITH_REPLACE_LOCK = + private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "WITH_REPLACE_LOCK_AND_COMPACTION_STATE"; + private static final Map QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE = ImmutableMap.builder() .putAll(DEFAULT_MSQ_CONTEXT) .put( Tasks.TASK_LOCK_TYPE, StringUtils.toLowerCase(TaskLockType.REPLACE.name()) ) + .put( + Tasks.STORE_COMPACTION_STATE_KEY, + true + ) .build(); @Parameterized.Parameters(name = "{index}:with context {0}") @@ -80,8 +100,8 @@ public static Collection data() {DURABLE_STORAGE, DURABLE_STORAGE_MSQ_CONTEXT}, {FAULT_TOLERANCE, FAULT_TOLERANCE_MSQ_CONTEXT}, {PARALLEL_MERGE, PARALLEL_MERGE_MSQ_CONTEXT}, - {WITH_REPLACE_LOCK, QUERY_CONTEXT_WITH_REPLACE_LOCK} - }; + {WITH_REPLACE_LOCK_AND_COMPACTION_STATE, QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE}, + }; return Arrays.asList(data); } @@ -169,6 +189,13 @@ public void testReplaceOnFooWithAll() .with().segmentRowsProcessed(6), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.DAY + ) + ) .verifyResults(); } @@ -218,6 +245,13 @@ public void testReplaceOnFooWithWhere() .with().segmentRowsProcessed(1), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.DAY + ) + ) .verifyResults(); } @@ -293,6 +327,13 @@ public void testReplaceOnFoo1WithAllExtern() throws IOException .with().rows(1, 1, 1).frames(1, 1, 1), 1, 0, "shuffle" ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new LongDimensionSchema("cnt")), + GranularityType.HOUR + ) + ) .verifyResults(); } @@ -359,6 +400,13 @@ public void testReplaceOnFoo1WithWhereExtern() throws IOException .with().segmentRowsProcessed(4), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new StringDimensionSchema("user")), + GranularityType.HOUR + ) + ) .verifyResults(); } @@ -430,6 +478,13 @@ public void testReplaceSegmentEntireTable() .with().segmentRowsProcessed(6), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.ALL + ) + ) .verifyResults(); } @@ -513,6 +568,13 @@ public void testReplaceSegmentsRepartitionTable() .with().segmentRowsProcessed(6), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.MONTH + ) + ) .verifyResults(); } @@ -586,6 +648,13 @@ public void testReplaceWithWhereClause() .with().segmentRowsProcessed(2), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.MONTH + ) + ) .verifyResults(); } @@ -662,6 +731,13 @@ public void testReplaceWhereClauseLargerThanData() .with().segmentRowsProcessed(2), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.MONTH + ) + ) .verifyResults(); } @@ -712,6 +788,8 @@ public void testReplaceTimeChunks() .version(MSQTestTaskActionClient.VERSION) .size(1) .build(); + + Mockito.doReturn(ImmutableSet.of(existingDataSegment)) .when(testTaskActionClient) .submit(new RetrieveUsedSegmentsAction( @@ -726,7 +804,6 @@ public void testReplaceTimeChunks() + "WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2000-01-03' " + "PARTITIONED BY MONTH") .setExpectedDataSource("foo") - .setQueryContext(DEFAULT_MSQ_CONTEXT) .setExpectedRowSignature(rowSignature) .setQueryContext(context) .setExpectedDestinationIntervals(Collections.singletonList(Intervals.of("2000-01-01T/2000-03-01T"))) @@ -740,6 +817,12 @@ public void testReplaceTimeChunks() ImmutableList.of( new Object[]{946684800000L, 1.0f}, new Object[]{946771200000L, 2.0f} + )) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.MONTH ) ) .verifyResults(); @@ -797,6 +880,13 @@ public void testReplaceTimeChunksLargerThanData() new Object[]{946771200000L, 2.0f} ) ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.MONTH + ) + ) .verifyResults(); } @@ -816,6 +906,8 @@ public void testReplaceAllOverEternitySegment() .dataSource("foo") .build(); + PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null); + Mockito.doReturn(ImmutableSet.of(existingDataSegment)) .when(testTaskActionClient) .submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class)); @@ -849,6 +941,14 @@ public void testReplaceAllOverEternitySegment() new Object[]{946771200000L, 2.0f} ) ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema( + "m1")), + GranularityType.MONTH + ) + ) .verifyResults(); } @@ -871,6 +971,16 @@ public void testReplaceOnFoo1Range() .setQueryContext(context) .setExpectedSegment(expectedFooSegments()) .setExpectedResultRows(expectedFooRows()) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.singletonList("dim1"), + Arrays.asList( + new StringDimensionSchema("dim1"), + new LongDimensionSchema("cnt") + ), + GranularityType.DAY + ) + ) .verifyResults(); } @@ -903,6 +1013,67 @@ public void testReplaceSegmentsInsertIntoNewTable() new Object[]{978480000000L, 6.0f} ) ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("m1")), + GranularityType.ALL + ) + ) + .verifyResults(); + } + + @Test + public void testReplaceSegmentsWithQuarterSegmentGranularity() + { + RowSignature rowSignature = RowSignature.builder() + .add("__time", ColumnType.LONG) + .add("m1", ColumnType.FLOAT) + .add("m2", ColumnType.DOUBLE) + .build(); + + testIngestQuery().setSql(" REPLACE INTO foobar " + + "OVERWRITE ALL " + + "SELECT __time, m1, m2 " + + "FROM foo " + + "PARTITIONED by TIME_FLOOR(__time, 'P3M') ") + .setExpectedDataSource("foobar") + .setExpectedRowSignature(rowSignature) + .setQueryContext(context) + .setExpectedDestinationIntervals(Intervals.ONLY_ETERNITY) + .setExpectedSegment(ImmutableSet.of(SegmentId.of( + "foobar", + Intervals.of( + "2000-01-01T00:00:00.000Z/2000-04-01T00:00:00.000Z"), + "test", + 0 + ), + SegmentId.of( + "foobar", + Intervals.of( + "2001-01-01T00:00:00.000Z/2001-04-01T00:00:00.000Z"), + "test", + 0 + ) + ) + ) + .setExpectedResultRows( + ImmutableList.of( + new Object[]{946684800000L, 1.0f, 1.0}, + new Object[]{946771200000L, 2.0f, 2.0}, + new Object[]{946857600000L, 3.0f, 3.0}, + new Object[]{978307200000L, 4.0f, 4.0}, + new Object[]{978393600000L, 5.0f, 5.0}, + new Object[]{978480000000L, 6.0f, 6.0} + ) + ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Arrays.asList(new FloatDimensionSchema("m1"), new DoubleDimensionSchema("m2")), + GranularityType.QUARTER + ) + ) .verifyResults(); } @@ -916,7 +1087,7 @@ public void testReplaceWithClusteredByDescendingThrowsException() + "FROM foo " + "PARTITIONED BY ALL TIME " + "CLUSTERED BY m2, m1 DESC" - ) + ) .setExpectedValidationErrorMatcher( invalidSqlIs("Invalid CLUSTERED BY clause [`m1` DESC]: cannot sort in descending order.") ) @@ -979,6 +1150,13 @@ public void testReplaceUnnestSegmentEntireTable() .with().segmentRowsProcessed(8), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new StringDimensionSchema("d")), + GranularityType.ALL + ) + ) .verifyResults(); } @@ -1042,6 +1220,13 @@ public void testReplaceUnnestWithVirtualColumnSegmentEntireTable() .with().segmentRowsProcessed(12), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.emptyList(), + Collections.singletonList(new FloatDimensionSchema("d")), + GranularityType.ALL + ) + ) .verifyResults(); } @@ -1116,6 +1301,13 @@ public void testReplaceUnnestSegmentWithTimeFilter() .with().segmentRowsProcessed(8), 1, 0 ) + .setExpectedLastCompactionState( + expectedCompactionState( + Collections.singletonList("d"), + Collections.singletonList(new StringDimensionSchema("d")), + GranularityType.DAY + ) + ) .verifyResults(); } @@ -1579,4 +1771,44 @@ private List expectedFooRows() )); return expectedRows; } + private CompactionState expectedCompactionState(List partitionDimensions, List dimensions, + GranularityType segmentGranularity + ){ + if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){ + return null; + } + PartitionsSpec partitionsSpec; + if (partitionDimensions.isEmpty()) { + partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null); + + } else { + partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null, + partitionDimensions, false + ); + } + DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder(). + setDimensions(dimensions) + .setDimensionExclusions(Collections.singletonList( + "__time")) + .build(); + + IndexSpec indexSpec = new IndexSpec(null, null, null, null, null, null, null); + GranularitySpec granularitySpec = new UniformGranularitySpec( + segmentGranularity.getDefaultGranularity(), + GranularityType.NONE.getDefaultGranularity(), + false, + Intervals.ONLY_ETERNITY + ); + List metricsSpec = Collections.emptyList(); + + return new CompactionState( + partitionsSpec, + dimensionsSpec, + metricsSpec, + null, + indexSpec.asMap(objectMapper), + granularitySpec.asMap(objectMapper) + ); + + } } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index f8fc01b9369f..101947fce545 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -181,6 +181,7 @@ import org.apache.druid.storage.StorageConnectorModule; import org.apache.druid.storage.StorageConnectorProvider; import org.apache.druid.storage.local.LocalFileStorageConnector; +import org.apache.druid.timeline.CompactionState; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.PruneLoadSpec; import org.apache.druid.timeline.SegmentId; @@ -866,6 +867,7 @@ public abstract class MSQTester> protected MSQSpec expectedMSQSpec = null; protected MSQTuningConfig expectedTuningConfig = null; protected Set expectedSegments = null; + protected CompactionState expectedLastCompactionState = null; protected Set expectedTombstoneIntervals = null; protected List expectedResultRows = null; protected Matcher expectedValidationErrorMatcher = null; @@ -912,6 +914,12 @@ public Builder setExpectedSegment(Set expectedSegments) return asBuilder(); } + public Builder setExpectedLastCompactionState(CompactionState expectedLastCompactionState) + { + this.expectedLastCompactionState = expectedLastCompactionState; + return asBuilder(); + } + public Builder setExpectedTombstoneIntervals(Set tombstoneIntervals) { Preconditions.checkArgument(!tombstoneIntervals.isEmpty(), "Segments cannot be empty"); @@ -1279,6 +1287,12 @@ public void verifyResults() // SegmentGeneratorFrameProcessorFactory. We can get the tombstone segment ids published by taking a set // difference of all the segments published with the segments that are created by the SegmentGeneratorFrameProcessorFactory if (!testTaskActionClient.getPublishedSegments().isEmpty()) { + if (expectedLastCompactionState != null){ + CompactionState compactionState = testTaskActionClient.getPublishedSegments().stream().findFirst().get() + .getLastCompactionState(); + Assert.assertEquals(expectedLastCompactionState, compactionState); + + } Set publishedSegmentIds = testTaskActionClient.getPublishedSegments() .stream() .map(DataSegment::getId) @@ -1496,4 +1510,6 @@ private static List resultSignatureFromRowSignat } return retVal; } + } + From f877b91c2addb81845f5b131f94e7ee21b254088 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 5 Mar 2024 11:04:18 +0530 Subject: [PATCH 08/26] Corrections --- .run/Coordinator w_ MSQ.run.xml | 19 ------------------- .../apache/druid/msq/exec/MSQReplaceTest.java | 1 - 2 files changed, 20 deletions(-) delete mode 100644 .run/Coordinator w_ MSQ.run.xml diff --git a/.run/Coordinator w_ MSQ.run.xml b/.run/Coordinator w_ MSQ.run.xml deleted file mode 100644 index 074b578ea722..000000000000 --- a/.run/Coordinator w_ MSQ.run.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - \ No newline at end of file diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 580e2c0c9926..9654fd3c1439 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -44,7 +44,6 @@ import org.apache.druid.msq.test.MSQTestFileUtils; import org.apache.druid.msq.test.MSQTestTaskActionClient; import org.apache.druid.msq.util.MultiStageQueryContext; -import org.apache.druid.query.QueryContexts; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; From 64646059e11552281a53fe35c97a6a90bffb9512 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 10:45:01 +0530 Subject: [PATCH 09/26] Address review comments --- .../apache/druid/msq/exec/ControllerImpl.java | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 1dbad6b3e228..18460078b70c 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -207,7 +207,6 @@ import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.DruidServerMetadata; -import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.planner.ColumnMapping; import org.apache.druid.sql.calcite.planner.ColumnMappings; import org.apache.druid.sql.calcite.rel.DruidQuery; @@ -1745,8 +1744,8 @@ private void publishSegmentsIfNeeded( } if (!segments.isEmpty() && storeCompactionState) { - DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId) - .getProcessorFactory()).getDataSchema(); + DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel + .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema(); ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec(); @@ -1773,15 +1772,7 @@ public static Function, Set> compactionStateAnnota DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); if (!destination.isReplaceTimeChunks()) { // Only do this for replace queries, whether originating directly or via compaction - log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE.", queryId); - return Function.identity(); - } - - GranularitySpec granularitySpec = dataSchema.getGranularitySpec(); - - if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) { - // This is a defensive check. Should never enter here. - log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set.", queryId); + log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryId); return Function.identity(); } @@ -1793,17 +1784,15 @@ public static Function, Set> compactionStateAnnota .getContext() ); - granularitySpec = new UniformGranularitySpec( + GranularitySpec granularitySpec = new UniformGranularitySpec( segmentGranularity, - granularitySpec.getQueryGranularity(), - granularitySpec.isRollup(), - granularitySpec.inputIntervals() + dataSchema.getGranularitySpec().getQueryGranularity(), + dataSchema.getGranularitySpec().isRollup(), + dataSchema.getGranularitySpec().inputIntervals() ); - DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); - Map transformSpec = dataSchema.getTransformSpec() == null - || TransformSpec.NONE.equals(dataSchema.getTransformSpec()) + Map transformSpec = TransformSpec.NONE.equals(dataSchema.getTransformSpec()) ? null : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec() .getFilter()).asMap( From b24a7c9e8a99564dd0f176d11f30e1c0256b7351 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 10:47:33 +0530 Subject: [PATCH 10/26] Remove unused var --- .../src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 9654fd3c1439..b9674ab39d47 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -905,8 +905,6 @@ public void testReplaceAllOverEternitySegment() .dataSource("foo") .build(); - PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null); - Mockito.doReturn(ImmutableSet.of(existingDataSegment)) .when(testTaskActionClient) .submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class)); From 1a0517cc02142b9d4fef7346f2178907a9ae5d17 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 11:08:21 +0530 Subject: [PATCH 11/26] Fix compilation errors due to junit5 migration --- .../apache/druid/msq/exec/MSQReplaceTest.java | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 10d34c402a26..3743a42eb073 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -75,7 +75,7 @@ public class MSQReplaceTest extends MSQTestBase { - private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "WITH_REPLACE_LOCK_AND_COMPACTION_STATE"; + private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "with_replace_lock_and_compaction_state"; private static final Map QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE = ImmutableMap.builder() .putAll(DEFAULT_MSQ_CONTEXT) @@ -181,7 +181,7 @@ public void testReplaceOnFooWithAll(String contextName, Map cont ) .setExpectedLastCompactionState( expectedCompactionState( - Collections.emptyList(), + context, Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.DAY ) @@ -238,7 +238,7 @@ public void testReplaceOnFooWithWhere(String contextName, Map co ) .setExpectedLastCompactionState( expectedCompactionState( - Collections.emptyList(), + context, Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.DAY ) @@ -321,7 +321,7 @@ public void testReplaceOnFoo1WithAllExtern(String contextName, Map c ) .setExpectedLastCompactionState( expectedCompactionState( - Collections.emptyList(), + context, Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.MONTH ) @@ -731,7 +731,7 @@ public void testReplaceWhereClauseLargerThanData(String contextName, Map contex .version(MSQTestTaskActionClient.VERSION) .size(1) .build(); - - Mockito.doReturn(ImmutableSet.of(existingDataSegment)) .when(testTaskActionClient) .submit(new RetrieveUsedSegmentsAction( @@ -821,7 +819,7 @@ public void testReplaceTimeChunks(String contextName, Map contex )) .setExpectedLastCompactionState( expectedCompactionState( - Collections.emptyList(), + context, Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.MONTH ) @@ -884,7 +882,7 @@ public void testReplaceTimeChunksLargerThanData(String contextName, Map conte .setExpectedResultRows(expectedFooRows()) .setExpectedLastCompactionState( expectedCompactionState( - Collections.singletonList("dim1"), + context, Collections.singletonList("dim1"), Arrays.asList( new StringDimensionSchema("dim1"), new LongDimensionSchema("cnt") @@ -1057,7 +1055,7 @@ public void testReplaceSegmentsInsertIntoNewTable(String contextName, Map context) { RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) @@ -1111,7 +1110,7 @@ public void testReplaceSegmentsWithQuarterSegmentGranularity() ) .setExpectedLastCompactionState( expectedCompactionState( - Collections.emptyList(), + context, Collections.emptyList(), Arrays.asList(new FloatDimensionSchema("m1"), new DoubleDimensionSchema("m2")), GranularityType.QUARTER ) @@ -1196,7 +1195,7 @@ public void testReplaceUnnestSegmentEntireTable(String contextName, Map expectedFooRows() )); return expectedRows; } - private CompactionState expectedCompactionState(List partitionDimensions, List dimensions, - GranularityType segmentGranularity + private CompactionState expectedCompactionState( + Map context, List partitionDimensions, List dimensions, + GranularityType segmentGranularity ){ if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){ return null; From f40252316d02392fe4b00610bef6cd47a2f15b28 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 18:31:03 +0530 Subject: [PATCH 12/26] Separate compactionStateAnnotationFunction to a common place, and other minor changes. --- .../apache/druid/msq/exec/ControllerImpl.java | 117 +++++++++--------- .../common/task/AbstractBatchIndexTask.java | 8 +- .../druid/indexing/common/task/IndexTask.java | 2 +- .../parallel/ParallelIndexSupervisorTask.java | 2 +- .../druid/timeline/CompactionState.java | 28 +++++ 5 files changed, 88 insertions(+), 69 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index aba6fb2d1097..d7c92bdc097f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1729,10 +1729,11 @@ private void publishSegmentsIfNeeded( Function, Set> compactionStateAnnotateFunction = Function.identity(); - Boolean storeCompactionState = (Boolean) task.getQuerySpec() - .getQuery() - .getContext() - .get(Tasks.STORE_COMPACTION_STATE_KEY); + Boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext()) + .getBoolean( + Tasks.STORE_COMPACTION_STATE_KEY, + Tasks.DEFAULT_STORE_COMPACTION_STATE + ); if (storeCompactionState == null) { storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE; @@ -1740,20 +1741,26 @@ private void publishSegmentsIfNeeded( } if (!segments.isEmpty() && storeCompactionState) { - DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel - .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema(); + DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); + if (!destination.isReplaceTimeChunks()) { + // Only do this for replace queries, whether originating directly or via compaction + log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryDef.getQueryId()); + } else { - ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec(); + DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel + .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema(); - compactionStateAnnotateFunction = compactionStateAnnotateFunction( - task(), - context.jsonMapper(), - dataSchema, - shardSpec, - queryDef.getQueryId() - ); + ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec(); + compactionStateAnnotateFunction = prepareCompactionStateAnnotateFunction( + task(), + context.jsonMapper(), + dataSchema, + shardSpec, + queryDef.getQueryId() + ); + } } log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); @@ -1761,24 +1768,34 @@ private void publishSegmentsIfNeeded( } } - public static Function, Set> compactionStateAnnotateFunction( + public static Function, Set> prepareCompactionStateAnnotateFunction( MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId ) { - DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); - if (!destination.isReplaceTimeChunks()) { - // Only do this for replace queries, whether originating directly or via compaction - log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryId); + PartitionsSpec partitionSpec; + + if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) + || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); + partitionSpec = new DimensionRangePartitionsSpec( + task.getQuerySpec().getTuningConfig().getRowsPerSegment(), + null, + partitionDimensions, + false + ); + + } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { + partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null); + } else { + log.error( + "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].", + queryId, shardSpec.getType() + ); return Function.identity(); } - // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY - Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext( - jsonMapper, - task.getQuerySpec() - .getQuery() - .getContext() - ); + Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec() + .getDestination()).getSegmentGranularity(); GranularitySpec granularitySpec = new UniformGranularitySpec( segmentGranularity, @@ -1800,31 +1817,12 @@ public static Function, Set> compactionStateAnnota { }); - PartitionsSpec partitionSpec; - - if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) - || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { - List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); - partitionSpec = new DimensionRangePartitionsSpec( - task.getQuerySpec().getTuningConfig().getRowsPerSegment(), - null, - partitionDimensions, - false - ); - - } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { - partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null); - } else { - log.error( - "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].", - queryId, shardSpec.getType() - ); - return Function.identity(); - } IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); - final CompactionState compactionState = new CompactionState( + log.info("Query [%s] storing compaction state in segments.", queryId); + + return CompactionState.compactionStateAnnotateFunction( partitionSpec, dimensionsSpec, metricsSpec, @@ -1832,13 +1830,6 @@ public static Function, Set> compactionStateAnnota indexSpec.asMap(jsonMapper), granularitySpec.asMap(jsonMapper) ); - - log.info("Query [%s] storing compaction state in segments.", queryId); - - return segments -> segments - .stream() - .map(s -> s.withLastCompactionState(compactionState)) - .collect(Collectors.toSet()); } /** @@ -1901,7 +1892,8 @@ private static QueryDefinition makeQueryDefinition( } } else { shuffleSpecFactory = querySpec.getDestination() - .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context())); + .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery() + .context())); queryToPlan = querySpec.getQuery(); } @@ -2003,9 +1995,11 @@ private static QueryDefinition makeQueryDefinition( if (filesIterator.hasNext()) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.RUNTIME_FAILURE) - .build("Found files at provided export destination[%s]. Export is only allowed to " - + "an empty path. Please provide an empty path/subdirectory or move the existing files.", - exportStorageProvider.getBasePath()); + .build( + "Found files at provided export destination[%s]. Export is only allowed to " + + "an empty path. Please provide an empty path/subdirectory or move the existing files.", + exportStorageProvider.getBasePath() + ); } } catch (IOException e) { @@ -2037,7 +2031,6 @@ private static QueryDefinition makeQueryDefinition( } - private static DataSchema generateDataSchema( MSQSpec querySpec, RowSignature querySignature, @@ -2486,7 +2479,9 @@ private static MSQStatusReport makeStatusReport( workerStatsMap = taskLauncher.getWorkerStats(); } - SegmentLoadStatusFetcher.SegmentLoadWaiterStatus status = segmentLoadWaiter == null ? null : segmentLoadWaiter.status(); + SegmentLoadStatusFetcher.SegmentLoadWaiterStatus status = segmentLoadWaiter == null + ? null + : segmentLoadWaiter.status(); return new MSQStatusReport( taskState, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index 4a76e688fb7a..fcad74070cb2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -583,7 +583,7 @@ public static boolean isGuaranteedRollup( return tuningConfig.isForceGuaranteedRollup(); } - public static Function, Set> compactionStateAnnotateFunction( + public static Function, Set> prepareCompactionStateAnnotateFunction( boolean storeCompactionState, TaskToolbox toolbox, IngestionSpec ingestionSpec @@ -604,7 +604,7 @@ public static Function, Set> compactionStateAnnota ? null : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference>() {}); - final CompactionState compactionState = new CompactionState( + return CompactionState.compactionStateAnnotateFunction( tuningConfig.getPartitionsSpec(), dimensionsSpec, metricsSpec, @@ -612,10 +612,6 @@ public static Function, Set> compactionStateAnnota tuningConfig.getIndexSpec().asMap(toolbox.getJsonMapper()), granularitySpec.asMap(toolbox.getJsonMapper()) ); - return segments -> segments - .stream() - .map(s -> s.withLastCompactionState(compactionState)) - .collect(Collectors.toSet()); } else { return Function.identity(); } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index 50e13a93c0be..a23e918cbda9 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -991,7 +991,7 @@ private TaskStatus generateAndPublishSegments( Tasks.DEFAULT_STORE_COMPACTION_STATE ); final Function, Set> annotateFunction = - compactionStateAnnotateFunction( + prepareCompactionStateAnnotateFunction( storeCompactionState, toolbox, ingestionSchema diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index db497dff5ecf..45bd9518712e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -1149,7 +1149,7 @@ private void publishSegments( Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE ); - final Function, Set> annotateFunction = compactionStateAnnotateFunction( + final Function, Set> annotateFunction = prepareCompactionStateAnnotateFunction( storeCompactionState, toolbox, ingestionSchema diff --git a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java index cb9ddf1a93b5..43d53ffe3312 100644 --- a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java +++ b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java @@ -27,6 +27,9 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; /** * This class describes what compaction task spec was used to create a given segment. @@ -146,4 +149,29 @@ public String toString() ", metricsSpec=" + metricsSpec + '}'; } + + public static Function, Set> compactionStateAnnotateFunction( + PartitionsSpec partitionsSpec, + DimensionsSpec dimensionsSpec, + List metricsSpec, + Map transformSpec, + Map indexSpec, + Map granularitySpec + ) + { + CompactionState compactionState = new CompactionState( + partitionsSpec, + dimensionsSpec, + metricsSpec, + transformSpec, + indexSpec, + granularitySpec + ); + + return segments -> segments + .stream() + .map(s -> s.withLastCompactionState(compactionState)) + .collect(Collectors.toSet()); + } + } From 13f2c9903e2d85c2ffae46f2df0b9e6c0e5c6539 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 18:43:50 +0530 Subject: [PATCH 13/26] Checkstyle fixes --- .../java/org/apache/druid/msq/exec/MSQReplaceTest.java | 9 ++++++--- .../test/java/org/apache/druid/msq/test/MSQTestBase.java | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 3743a42eb073..ef625cc813e3 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -1829,11 +1829,14 @@ private List expectedFooRows() )); return expectedRows; } + private CompactionState expectedCompactionState( Map context, List partitionDimensions, List dimensions, GranularityType segmentGranularity - ){ - if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){ + ) + { + if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) + || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))) { return null; } PartitionsSpec partitionsSpec; @@ -1842,7 +1845,7 @@ private CompactionState expectedCompactionState( } else { partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null, - partitionDimensions, false + partitionDimensions, false ); } DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder(). diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java index ad4035239ccd..7bc2cdda32a6 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java @@ -1268,7 +1268,7 @@ public void verifyResults() // SegmentGeneratorFrameProcessorFactory. We can get the tombstone segment ids published by taking a set // difference of all the segments published with the segments that are created by the SegmentGeneratorFrameProcessorFactory if (!testTaskActionClient.getPublishedSegments().isEmpty()) { - if (expectedLastCompactionState != null){ + if (expectedLastCompactionState != null) { CompactionState compactionState = testTaskActionClient.getPublishedSegments().stream().findFirst().get() .getLastCompactionState(); Assert.assertEquals(expectedLastCompactionState, compactionState); From 3b57dfae83cf17f5fbcc9c0059a5a9a7ab7aab4c Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 19:09:36 +0530 Subject: [PATCH 14/26] Try again --- .../apache/druid/msq/exec/ControllerImpl.java | 6 +- .../apache/druid/msq/exec/MSQReplaceTest.java | 55 ++++++++++++------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index d7c92bdc097f..fd8bce7dc3fe 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1769,7 +1769,11 @@ private void publishSegmentsIfNeeded( } public static Function, Set> prepareCompactionStateAnnotateFunction( - MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId + MSQControllerTask task, + ObjectMapper jsonMapper, + DataSchema dataSchema, + ShardSpec shardSpec, + String queryId ) { PartitionsSpec partitionSpec; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index ef625cc813e3..945c5bc227f0 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -181,7 +181,8 @@ public void testReplaceOnFooWithAll(String contextName, Map cont ) .setExpectedLastCompactionState( expectedCompactionState( - context, Collections.emptyList(), + context, + Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.DAY ) @@ -238,7 +239,8 @@ public void testReplaceOnFooWithWhere(String contextName, Map co ) .setExpectedLastCompactionState( expectedCompactionState( - context, Collections.emptyList(), + context, + Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.DAY ) @@ -321,7 +323,8 @@ public void testReplaceOnFoo1WithAllExtern(String contextName, Map c ) .setExpectedLastCompactionState( expectedCompactionState( - context, Collections.emptyList(), + context, + Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.MONTH ) @@ -731,7 +738,8 @@ public void testReplaceWhereClauseLargerThanData(String contextName, Map contex )) .setExpectedLastCompactionState( expectedCompactionState( - context, Collections.emptyList(), + context, + Collections.emptyList(), Collections.singletonList(new FloatDimensionSchema("m1")), GranularityType.MONTH ) @@ -882,7 +891,8 @@ public void testReplaceTimeChunksLargerThanData(String contextName, Map conte .setExpectedResultRows(expectedFooRows()) .setExpectedLastCompactionState( expectedCompactionState( - context, Collections.singletonList("dim1"), + context, + Collections.singletonList("dim1"), Arrays.asList( new StringDimensionSchema("dim1"), new LongDimensionSchema("cnt") @@ -1055,7 +1067,8 @@ public void testReplaceSegmentsInsertIntoNewTable(String contextName, Map Date: Fri, 22 Mar 2024 19:49:04 +0530 Subject: [PATCH 15/26] Update doc --- docs/multi-stage-query/reference.md | 39 +++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 0b10e14b50f9..1d64d29c34f7 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -346,26 +346,27 @@ If you're using the web console, you can specify the context parameters through The following table lists the context parameters for the MSQ task engine: -| Parameter | Description | Default value | -|---|---|---| -| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | -| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | -| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | -| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| -| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | -| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | +| Parameter | Description | Default value | +|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---| +| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | +| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | +| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | +| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| +| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | +| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | | `segmentSortOrder` | INSERT or REPLACE

Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.

You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list | -| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | -| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | -| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | -| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | -| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | -| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | -| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | -| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | -| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | -| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | -| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | +| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | +| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | +| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | +| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | +| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | +| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | +| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | +| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | +| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | +| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form. | empty list | +| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | +| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` | ## Joins From e59c1bcabe3cbc8b16f7eefff13177957547d29d Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 22 Mar 2024 19:54:46 +0530 Subject: [PATCH 16/26] Revert additional indentation changes --- docs/multi-stage-query/reference.md | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 1d64d29c34f7..505294edefa5 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through The following table lists the context parameters for the MSQ task engine: -| Parameter | Description | Default value | -|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---| -| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | -| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | -| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | -| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| -| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | -| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | +| Parameter | Description | Default value | +|---|---|---| +| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | +| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | +| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | +| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| +| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | +| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | | `segmentSortOrder` | INSERT or REPLACE

Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.

You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list | -| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | -| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | -| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | -| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | -| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | -| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | -| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | -| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | -| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | -| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form. | empty list | -| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | -| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` | +| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | +| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | +| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | +| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | +| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | +| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | +| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | +| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | +| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | +| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | +| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | +| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` | ## Joins From cbc582d236c256a8949940375619711a08b0b82c Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Sat, 23 Mar 2024 12:39:58 +0530 Subject: [PATCH 17/26] Resolve coverage test for druid-processing --- docs/multi-stage-query/reference.md | 2 +- .../apache/druid/msq/exec/ControllerImpl.java | 7 +-- .../druid/timeline/DataSegmentTest.java | 43 ++++++++++++++++++- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 505294edefa5..9bb4388e208f 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -366,7 +366,7 @@ The following table lists the context parameters for the MSQ task engine: | `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | | `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | | `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | -| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` | +| `storeCompactionState` | REPLACE

REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [storeCompactionState](../ingestion/tasks.md#context-parameters) task context flag. | `false` | ## Joins diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index fd8bce7dc3fe..eb8e7ba29ea2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1729,17 +1729,12 @@ private void publishSegmentsIfNeeded( Function, Set> compactionStateAnnotateFunction = Function.identity(); - Boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext()) + boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext()) .getBoolean( Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE ); - if (storeCompactionState == null) { - storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE; - - } - if (!segments.isEmpty() && storeCompactionState) { DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java index 97c3d7a2aaac..aebdb9493000 100644 --- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.RangeSet; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; @@ -47,6 +48,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.function.Function; /** */ @@ -361,10 +364,48 @@ public void testWithLastCompactionState() .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()) .shardSpec(getShardSpec(7)) .size(0) - .build(); + .build(); Assert.assertEquals(segment1, segment2.withLastCompactionState(compactionState)); } + @Test + public void testAnnotateWithLastCompactionState() + { + final CompactionState compactionState = new CompactionState( + new DynamicPartitionsSpec(null, null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), + ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), + ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), + Collections.singletonMap("test", "map"), + Collections.singletonMap("test2", "map2") + ); + + final Function, Set> annotateFn = CompactionState.compactionStateAnnotateFunction( + new DynamicPartitionsSpec(null, null), + new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), + ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), + ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), + Collections.singletonMap("test", "map"), + Collections.singletonMap("test2", "map2") + ); + final DataSegment segment1 = DataSegment.builder() + .dataSource("foo") + .interval(Intervals.of("2012-01-01/2012-01-02")) + .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()) + .shardSpec(getShardSpec(7)) + .size(0) + .lastCompactionState(compactionState) + .build(); + final DataSegment segment2 = DataSegment.builder() + .dataSource("foo") + .interval(Intervals.of("2012-01-01/2012-01-02")) + .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()) + .shardSpec(getShardSpec(7)) + .size(0) + .build(); + Assert.assertEquals(ImmutableSet.of(segment1), annotateFn.apply(ImmutableSet.of(segment2))); + } + @Test public void testTombstoneType() { From 316e378376fe4b9eee6c49cf02c966c861a39edc Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 1 Apr 2024 09:35:34 +0530 Subject: [PATCH 18/26] Suppress spelling error --- website/.spelling | 1 + 1 file changed, 1 insertion(+) diff --git a/website/.spelling b/website/.spelling index 37e43c9d0c0b..e013d4eebc56 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1328,6 +1328,7 @@ valueFormat IOConfig compactionTask compactionTasks +storeCompactionState ingestSegmentFirehose numShards IngestSegment From c87ff9fe6aa48673248c4daa7da855965fdf7854 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 2 Apr 2024 11:05:04 +0530 Subject: [PATCH 19/26] Address review comments --- docs/multi-stage-query/reference.md | 40 +++++----- .../apache/druid/msq/exec/ControllerImpl.java | 75 +++++++++++-------- .../apache/druid/msq/exec/MSQReplaceTest.java | 44 +++++------ .../common/task/AbstractBatchIndexTask.java | 4 +- .../indexing/common/task/CompactionTask.java | 5 +- .../druid/indexing/common/task/IndexTask.java | 2 +- .../parallel/ParallelIndexSupervisorTask.java | 2 +- .../partitions/DynamicPartitionsSpec.java | 4 + .../druid/timeline/CompactionState.java | 2 +- .../druid/timeline/DataSegmentTest.java | 47 ++++++++---- website/.spelling | 1 - 11 files changed, 127 insertions(+), 99 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 9bb4388e208f..0220a2ebca8e 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through The following table lists the context parameters for the MSQ task engine: -| Parameter | Description | Default value | -|---|---|---| -| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | -| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | -| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | -| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| -| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | -| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | +| Parameter | Description | Default value | +|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---| +| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | +| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | +| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | +| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| +| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | +| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | | `segmentSortOrder` | INSERT or REPLACE

Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.

You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list | -| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | -| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | -| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | -| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | -| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | -| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | -| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | -| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | -| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | -| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | -| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | -| `storeCompactionState` | REPLACE

REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [storeCompactionState](../ingestion/tasks.md#context-parameters) task context flag. | `false` | +| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | +| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | +| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | +| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | +| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | +| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | +| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | +| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | +| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | +| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form. | empty list | +| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | +| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag. | `false` | ## Joins diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index eb8e7ba29ea2..fd2408b2b4c1 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -67,6 +67,7 @@ import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; +import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -219,6 +220,7 @@ import org.apache.druid.timeline.partition.NumberedPartialShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.ShardSpec; +import org.apache.druid.timeline.partition.SingleDimensionShardSpec; import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -1725,9 +1727,7 @@ private void publishSegmentsIfNeeded( //noinspection unchecked @SuppressWarnings("unchecked") - final Set segments = (Set) queryKernel.getResultObjectForStage(finalStageId); - - Function, Set> compactionStateAnnotateFunction = Function.identity(); + Set segments = (Set) queryKernel.getResultObjectForStage(finalStageId); boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext()) .getBoolean( @@ -1736,34 +1736,35 @@ private void publishSegmentsIfNeeded( ); if (!segments.isEmpty() && storeCompactionState) { - DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); if (!destination.isReplaceTimeChunks()) { - // Only do this for replace queries, whether originating directly or via compaction - log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryDef.getQueryId()); + // Store compaction state only for replace queries. + log.error( + "storeCompactionState flag set for a non-REPLACE query [%s]. Ignoring the flag for now.", + queryDef.getQueryId() + ); } else { - DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema(); ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec(); - compactionStateAnnotateFunction = prepareCompactionStateAnnotateFunction( + Function, Set> compactionStateAnnotateFunction = addCompactionStateToSegments( task(), context.jsonMapper(), dataSchema, shardSpec, queryDef.getQueryId() ); + segments = compactionStateAnnotateFunction.apply(segments); } } - log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size()); - publishAllSegments(compactionStateAnnotateFunction.apply(segments)); + publishAllSegments(segments); } } - public static Function, Set> prepareCompactionStateAnnotateFunction( + private static Function, Set> addCompactionStateToSegments( MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, @@ -1771,30 +1772,41 @@ public static Function, Set> prepareCompactionStat String queryId ) { + final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig(); PartitionsSpec partitionSpec; - if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE) - || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) { + // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task. + if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) { + String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension(); + partitionSpec = new SingleDimensionPartitionsSpec( + tuningConfig.getRowsPerSegment(), + null, + partitionDimension, + false + ); + } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) { List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); partitionSpec = new DimensionRangePartitionsSpec( - task.getQuerySpec().getTuningConfig().getRowsPerSegment(), + tuningConfig.getRowsPerSegment(), null, partitionDimensions, false ); - } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { - partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null); + // Using Long.MAX_VALUE for MaxTotalRows as that is the default used by a compaction task. + partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS); } else { - log.error( - "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].", - queryId, shardSpec.getType() - ); - return Function.identity(); + throw new MSQException( + UnknownFault.forMessage( + StringUtils.format( + "Query[%s] cannot store compaction state in segments as shard spec of unsupported type[%s].", + queryId, + shardSpec.getType() + ))); } - Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec() - .getDestination()).getSegmentGranularity(); + Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec().getDestination()) + .getSegmentGranularity(); GranularitySpec granularitySpec = new UniformGranularitySpec( segmentGranularity, @@ -1806,9 +1818,9 @@ public static Function, Set> prepareCompactionStat DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec(); Map transformSpec = TransformSpec.NONE.equals(dataSchema.getTransformSpec()) ? null - : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec() - .getFilter()).asMap( - jsonMapper); + : new ClientCompactionTaskTransformSpec( + dataSchema.getTransformSpec().getFilter() + ).asMap(jsonMapper); List metricsSpec = dataSchema.getAggregators() == null ? null : jsonMapper.convertValue( @@ -1817,11 +1829,11 @@ public static Function, Set> prepareCompactionStat }); - IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec(); + IndexSpec indexSpec = tuningConfig.getIndexSpec(); - log.info("Query [%s] storing compaction state in segments.", queryId); + log.info("Query[%s] storing compaction state in segments.", queryId); - return CompactionState.compactionStateAnnotateFunction( + return CompactionState.addCompactionStateToSegments( partitionSpec, dimensionsSpec, metricsSpec, @@ -1891,8 +1903,9 @@ private static QueryDefinition makeQueryDefinition( } } else { shuffleSpecFactory = querySpec.getDestination() - .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery() - .context())); + .getShuffleSpecFactory( + MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()) + ); queryToPlan = querySpec.getQuery(); } diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 945c5bc227f0..be15bfdcd996 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -83,10 +83,7 @@ public class MSQReplaceTest extends MSQTestBase Tasks.TASK_LOCK_TYPE, StringUtils.toLowerCase(TaskLockType.REPLACE.name()) ) - .put( - Tasks.STORE_COMPACTION_STATE_KEY, - true - ) + .put( Tasks.STORE_COMPACTION_STATE_KEY, true) .build(); public static Collection data() @@ -954,8 +951,7 @@ public void testReplaceAllOverEternitySegment(String contextName, Map expectedFooRows() } private CompactionState expectedCompactionState( - Map context, List partitionDimensions, List dimensions, + Map context, + List partitionDimensions, + List dimensions, GranularityType segmentGranularity ) { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java index fcad74070cb2..c4ffcb8aae41 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java @@ -583,7 +583,7 @@ public static boolean isGuaranteedRollup( return tuningConfig.isForceGuaranteedRollup(); } - public static Function, Set> prepareCompactionStateAnnotateFunction( + public static Function, Set> addCompactionStateToSegments( boolean storeCompactionState, TaskToolbox toolbox, IngestionSpec ingestionSpec @@ -604,7 +604,7 @@ public static Function, Set> prepareCompactionStat ? null : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference>() {}); - return CompactionState.compactionStateAnnotateFunction( + return CompactionState.addCompactionStateToSegments( tuningConfig.getPartitionsSpec(), dimensionsSpec, metricsSpec, diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java index 59a0a499f917..72be46785ab8 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java @@ -1219,10 +1219,7 @@ CompactionTuningConfig computeTuningConfig() final DynamicPartitionsSpec dynamicPartitionsSpec = (DynamicPartitionsSpec) partitionsSpec; partitionsSpec = new DynamicPartitionsSpec( dynamicPartitionsSpec.getMaxRowsPerSegment(), - // Setting maxTotalRows to Long.MAX_VALUE to respect the computed maxRowsPerSegment. - // If this is set to something too small, compactionTask can generate small segments - // which need to be compacted again, which in turn making auto compaction stuck in the same interval. - dynamicPartitionsSpec.getMaxTotalRowsOr(Long.MAX_VALUE) + dynamicPartitionsSpec.getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS) ); } return newTuningConfig.withPartitionsSpec(partitionsSpec); diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java index a23e918cbda9..7822e58f40cf 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java @@ -991,7 +991,7 @@ private TaskStatus generateAndPublishSegments( Tasks.DEFAULT_STORE_COMPACTION_STATE ); final Function, Set> annotateFunction = - prepareCompactionStateAnnotateFunction( + addCompactionStateToSegments( storeCompactionState, toolbox, ingestionSchema diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java index 45bd9518712e..9929f12a6c71 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java @@ -1149,7 +1149,7 @@ private void publishSegments( Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE ); - final Function, Set> annotateFunction = prepareCompactionStateAnnotateFunction( + final Function, Set> annotateFunction = addCompactionStateToSegments( storeCompactionState, toolbox, ingestionSchema diff --git a/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java b/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java index 05dec7cb58e7..2c5d294f3c1c 100644 --- a/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java +++ b/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java @@ -34,6 +34,10 @@ public class DynamicPartitionsSpec implements PartitionsSpec * Default maxTotalRows for most task types except compaction task. */ public static final long DEFAULT_MAX_TOTAL_ROWS = 20_000_000; + // Using MAX_VALUE as the default for setting maxTotalRows for compaction to respect the computed maxRowsPerSegment. + // If this is set to something too small, compactionTask can generate small segments + // which need to be compacted again, which in turn making auto compaction stuck in the same interval. + public static final long DEFAULT_COMPACTION_MAX_TOTAL_ROWS = Long.MAX_VALUE; static final String NAME = "dynamic"; private final int maxRowsPerSegment; diff --git a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java index 43d53ffe3312..2c6e0d96c397 100644 --- a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java +++ b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java @@ -150,7 +150,7 @@ public String toString() '}'; } - public static Function, Set> compactionStateAnnotateFunction( + public static Function, Set> addCompactionStateToSegments( PartitionsSpec partitionsSpec, DimensionsSpec dimensionsSpec, List metricsSpec, diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java index aebdb9493000..afb743d7a1af 100644 --- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java @@ -364,30 +364,45 @@ public void testWithLastCompactionState() .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()) .shardSpec(getShardSpec(7)) .size(0) - .build(); + .build(); Assert.assertEquals(segment1, segment2.withLastCompactionState(compactionState)); } @Test public void testAnnotateWithLastCompactionState() { - final CompactionState compactionState = new CompactionState( - new DynamicPartitionsSpec(null, null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), - ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), - ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), - Collections.singletonMap("test", "map"), - Collections.singletonMap("test2", "map2") + DynamicPartitionsSpec dynamicPartitionsSpec = new DynamicPartitionsSpec(null, null); + DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of( + "bar", + "foo" + ))); + List metricsSpec = ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")); + Map transformSpec = ImmutableMap.of( + "filter", + ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo" ) ); + Map indexSpec = Collections.singletonMap("test", "map"); + Map granularitySpec = Collections.singletonMap("test2", "map"); - final Function, Set> annotateFn = CompactionState.compactionStateAnnotateFunction( - new DynamicPartitionsSpec(null, null), - new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), - ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), - ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), - Collections.singletonMap("test", "map"), - Collections.singletonMap("test2", "map2") + final CompactionState compactionState = new CompactionState( + dynamicPartitionsSpec, + dimensionsSpec, + metricsSpec, + transformSpec, + indexSpec, + granularitySpec ); + + final Function, Set> addCompactionStateFunction = + CompactionState.addCompactionStateToSegments( + dynamicPartitionsSpec, + dimensionsSpec, + metricsSpec, + transformSpec, + indexSpec, + granularitySpec + ); + final DataSegment segment1 = DataSegment.builder() .dataSource("foo") .interval(Intervals.of("2012-01-01/2012-01-02")) @@ -403,7 +418,7 @@ public void testAnnotateWithLastCompactionState() .shardSpec(getShardSpec(7)) .size(0) .build(); - Assert.assertEquals(ImmutableSet.of(segment1), annotateFn.apply(ImmutableSet.of(segment2))); + Assert.assertEquals(ImmutableSet.of(segment1), addCompactionStateFunction.apply(ImmutableSet.of(segment2))); } @Test diff --git a/website/.spelling b/website/.spelling index e013d4eebc56..37e43c9d0c0b 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1328,7 +1328,6 @@ valueFormat IOConfig compactionTask compactionTasks -storeCompactionState ingestSegmentFirehose numShards IngestSegment From f18650d923e7b57922629f076ccf7edf09eb6b35 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 2 Apr 2024 11:19:25 +0530 Subject: [PATCH 20/26] Resolve checkstyle errors --- .../java/org/apache/druid/msq/exec/ControllerImpl.java | 2 +- .../java/org/apache/druid/msq/exec/MSQReplaceTest.java | 7 ++----- .../java/org/apache/druid/timeline/DataSegmentTest.java | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index fd2408b2b4c1..cc150e9f4d38 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1739,7 +1739,7 @@ private void publishSegmentsIfNeeded( DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination(); if (!destination.isReplaceTimeChunks()) { // Store compaction state only for replace queries. - log.error( + log.warn( "storeCompactionState flag set for a non-REPLACE query [%s]. Ignoring the flag for now.", queryDef.getQueryId() ); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index be15bfdcd996..9d8196bd50a0 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -79,11 +79,8 @@ public class MSQReplaceTest extends MSQTestBase private static final Map QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE = ImmutableMap.builder() .putAll(DEFAULT_MSQ_CONTEXT) - .put( - Tasks.TASK_LOCK_TYPE, - StringUtils.toLowerCase(TaskLockType.REPLACE.name()) - ) - .put( Tasks.STORE_COMPACTION_STATE_KEY, true) + .put(Tasks.TASK_LOCK_TYPE, StringUtils.toLowerCase(TaskLockType.REPLACE.name())) + .put(Tasks.STORE_COMPACTION_STATE_KEY, true) .build(); public static Collection data() diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java index afb743d7a1af..3f0667b870c9 100644 --- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java +++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java @@ -379,7 +379,7 @@ public void testAnnotateWithLastCompactionState() List metricsSpec = ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")); Map transformSpec = ImmutableMap.of( "filter", - ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo" ) + ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo") ); Map indexSpec = Collections.singletonMap("test", "map"); Map granularitySpec = Collections.singletonMap("test2", "map"); From 49053db00c94b70005a79d04a5bf8aee41ec0506 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 2 Apr 2024 11:40:49 +0530 Subject: [PATCH 21/26] Remove redundant comment --- .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 1 - 1 file changed, 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index cc150e9f4d38..93b665c32bce 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1793,7 +1793,6 @@ private static Function, Set> addCompactionStateTo false ); } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { - // Using Long.MAX_VALUE for MaxTotalRows as that is the default used by a compaction task. partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS); } else { throw new MSQException( From 29ea760b83a6ae9787e5228aadf629c8009c3d30 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Thu, 4 Apr 2024 09:37:10 +0530 Subject: [PATCH 22/26] Revert maxTotalRows to null --- .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 93b665c32bce..3373c1efd520 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1793,7 +1793,7 @@ private static Function, Set> addCompactionStateTo false ); } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { - partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS); + partitionSpec = new DynamicPartitionsSpec(null, null); } else { throw new MSQException( UnknownFault.forMessage( From 7e43b5deb113b73c47454325d83fb37c503102ff Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Fri, 5 Apr 2024 15:28:34 +0530 Subject: [PATCH 23/26] Address review comments and fix tests --- docs/multi-stage-query/reference.md | 40 +++++++++---------- .../apache/druid/msq/exec/ControllerImpl.java | 3 +- .../apache/druid/msq/exec/MSQReplaceTest.java | 2 +- 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md index 0220a2ebca8e..7320edfc6200 100644 --- a/docs/multi-stage-query/reference.md +++ b/docs/multi-stage-query/reference.md @@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through The following table lists the context parameters for the MSQ task engine: -| Parameter | Description | Default value | -|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---| -| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | -| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | -| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | -| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| -| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | -| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | +| Parameter | Description | Default value | +|---|---|---| +| `maxNumTasks` | SELECT, INSERT, REPLACE

The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.

May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 | +| `taskAssignment` | SELECT, INSERT, REPLACE

Determines how many tasks to use. Possible values include:
  • `max`: Uses as many tasks as possible, up to `maxNumTasks`.
  • `auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
| `max` | +| `finalizeAggregations` | SELECT, INSERT, REPLACE

Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` | +| `arrayIngestMode` | INSERT, REPLACE

Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)| +| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE

Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` | +| `rowsInMemory` | INSERT or REPLACE

Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 | | `segmentSortOrder` | INSERT or REPLACE

Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.

You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list | -| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | -| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | -| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | -| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | -| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | -| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | -| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | -| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | -| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | -| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form. | empty list | -| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | -| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag. | `false` | +| `maxParseExceptions`| SELECT, INSERT, REPLACE

Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 | +| `rowsPerSegment` | INSERT or REPLACE

The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 | +| `indexSpec` | INSERT or REPLACE

An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). | +| `durableShuffleStorage` | SELECT, INSERT, REPLACE

Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error.

| `false` | +| `faultTolerance` | SELECT, INSERT, REPLACE

Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` | +| `selectDestination` | SELECT

Controls where the final result of the select query is written.
Use `taskReport`(the default) to write select results to the task report. This is not scalable since task reports size explodes for large results
Use `durableStorage` to write results to durable storage location. For large results sets, its recommended to use `durableStorage` . To configure durable storage see [`this`](#durable-storage) section. | `taskReport` | +| `waitUntilSegmentsLoad` | INSERT, REPLACE

If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` | +| `includeSegmentSource` | SELECT, INSERT, REPLACE

Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` | +| `rowsPerPage` | SELECT

The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.
This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 | +| `skipTypeVerification` | INSERT or REPLACE

During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.

Provide the column list as comma-separated values or as a JSON array in string form.| empty list | +| `failOnEmptyInsert` | INSERT or REPLACE

When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` | +| `storeCompactionState` | REPLACE

When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag. | `false` | ## Joins diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 3373c1efd520..9a66153f8305 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1775,7 +1775,6 @@ private static Function, Set> addCompactionStateTo final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig(); PartitionsSpec partitionSpec; - // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task. if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) { String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension(); partitionSpec = new SingleDimensionPartitionsSpec( @@ -1793,6 +1792,8 @@ private static Function, Set> addCompactionStateTo false ); } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { + // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task. + // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment. partitionSpec = new DynamicPartitionsSpec(null, null); } else { throw new MSQException( diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 9d8196bd50a0..6e0ad4e2bf47 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -1855,7 +1855,7 @@ private CompactionState expectedCompactionState( } PartitionsSpec partitionsSpec; if (partitionDimensions.isEmpty()) { - partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null); + partitionsSpec = new DynamicPartitionsSpec(null, null); } else { partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null, From a282e32e6782f060d939d05640ca8c5652dd8ff5 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 8 Apr 2024 16:36:59 +0530 Subject: [PATCH 24/26] Correct values in DynamicPartitionSpec. --- .../org/apache/druid/msq/exec/ControllerImpl.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 9a66153f8305..8d0d006d9606 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1775,15 +1775,7 @@ private static Function, Set> addCompactionStateTo final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig(); PartitionsSpec partitionSpec; - if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) { - String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension(); - partitionSpec = new SingleDimensionPartitionsSpec( - tuningConfig.getRowsPerSegment(), - null, - partitionDimension, - false - ); - } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) { + if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) { List partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions(); partitionSpec = new DimensionRangePartitionsSpec( tuningConfig.getRowsPerSegment(), @@ -1794,8 +1786,9 @@ private static Function, Set> addCompactionStateTo } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task. // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment. - partitionSpec = new DynamicPartitionsSpec(null, null); + partitionSpec = new DynamicPartitionsSpec(tuningConfig.getRowsPerSegment(), Long.MAX_VALUE); } else { + // SingleDimenionShardSpec and other shard specs are never created in MSQ. throw new MSQException( UnknownFault.forMessage( StringUtils.format( From b6bc0a594981af85763436b521ba00b87bbcb1fd Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Mon, 8 Apr 2024 17:11:57 +0530 Subject: [PATCH 25/26] Fix checkstyle --- .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 8d0d006d9606..87a00e3a9af4 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -67,7 +67,6 @@ import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec; import org.apache.druid.indexer.partitions.DynamicPartitionsSpec; import org.apache.druid.indexer.partitions.PartitionsSpec; -import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec; import org.apache.druid.indexing.common.LockGranularity; import org.apache.druid.indexing.common.TaskLock; import org.apache.druid.indexing.common.TaskLockType; @@ -220,7 +219,6 @@ import org.apache.druid.timeline.partition.NumberedPartialShardSpec; import org.apache.druid.timeline.partition.NumberedShardSpec; import org.apache.druid.timeline.partition.ShardSpec; -import org.apache.druid.timeline.partition.SingleDimensionShardSpec; import org.apache.druid.utils.CollectionUtils; import org.joda.time.DateTime; import org.joda.time.Interval; From 6a4edc9789dace8456537649811ab676abd81dc0 Mon Sep 17 00:00:00 2001 From: Vishesh Garg Date: Tue, 9 Apr 2024 10:27:55 +0530 Subject: [PATCH 26/26] Fix tests --- .../main/java/org/apache/druid/msq/exec/ControllerImpl.java | 3 +-- .../test/java/org/apache/druid/msq/exec/MSQReplaceTest.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java index 87a00e3a9af4..fe10b3509a60 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java @@ -1782,8 +1782,7 @@ private static Function, Set> addCompactionStateTo false ); } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) { - // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task. - // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment. + // MSQ tasks don't use maxTotalRows. Hence using LONG.MAX_VALUE. partitionSpec = new DynamicPartitionsSpec(tuningConfig.getRowsPerSegment(), Long.MAX_VALUE); } else { // SingleDimenionShardSpec and other shard specs are never created in MSQ. diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java index 6e0ad4e2bf47..fed19b7132da 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java @@ -1855,7 +1855,7 @@ private CompactionState expectedCompactionState( } PartitionsSpec partitionsSpec; if (partitionDimensions.isEmpty()) { - partitionsSpec = new DynamicPartitionsSpec(null, null); + partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, Long.MAX_VALUE); } else { partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null,