From d2c28d423ce9f922f6080599798977ca4465f761 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 23 Feb 2024 21:49:47 +0530
Subject: [PATCH 01/26] Add storeCompactionState annotation function

---
 .../apache/druid/msq/exec/ControllerImpl.java | 74 ++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index d62bcce04ddc..a5d3fca0845e 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -20,6 +20,7 @@
 package org.apache.druid.msq.exec;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -41,6 +42,7 @@
 import it.unimi.dsi.fastutil.ints.IntSet;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.druid.client.ImmutableSegmentLoadInfo;
+import org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec;
 import org.apache.druid.common.guava.FutureUtils;
 import org.apache.druid.data.input.StringTuple;
 import org.apache.druid.data.input.impl.DimensionSchema;
@@ -62,6 +64,8 @@
 import org.apache.druid.frame.write.InvalidNullByteException;
 import org.apache.druid.indexer.TaskState;
 import org.apache.druid.indexer.TaskStatus;
+import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexing.common.LockGranularity;
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskLockType;
@@ -188,6 +192,7 @@
 import org.apache.druid.query.groupby.GroupByQueryConfig;
 import org.apache.druid.query.scan.ScanQuery;
 import org.apache.druid.segment.DimensionHandlerUtils;
+import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.column.ColumnHolder;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
@@ -204,6 +209,7 @@
 import org.apache.druid.sql.calcite.rel.DruidQuery;
 import org.apache.druid.sql.http.ResultFormat;
 import org.apache.druid.storage.ExportStorageProvider;
+import org.apache.druid.timeline.CompactionState;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.SegmentTimeline;
 import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
@@ -238,6 +244,7 @@
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Consumer;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import java.util.stream.StreamSupport;
@@ -1715,12 +1722,77 @@ private void publishSegmentsIfNeeded(
   {
     if (queryKernel.isSuccess() && MSQControllerTask.isIngestion(task.getQuerySpec())) {
       final StageId finalStageId = queryKernel.getStageId(queryDef.getFinalStageDefinition().getStageNumber());
+      queryDef.getFinalStageDefinition().getClusterBy();
 
       //noinspection unchecked
       @SuppressWarnings("unchecked")
       final Set<DataSegment> segments = (Set<DataSegment>) queryKernel.getResultObjectForStage(finalStageId);
+      DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
+                                                                                  .getProcessorFactory()).getDataSchema();
+      ClusterBy clusterBy = queryDef.getFinalStageDefinition().getClusterBy();
       log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
-      publishAllSegments(segments);
+      Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = compactionStateAnnotateFunction(
+          true,
+          context.jsonMapper(),
+          dataSchema,
+          clusterBy
+      );
+      publishAllSegments(compactionStateAnnotateFunction.apply(segments));
+    }
+  }
+
+  public Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+      boolean storeCompactionState,
+      ObjectMapper jsonMapper,
+      DataSchema dataSchema,
+      ClusterBy clusterBy
+  )
+  {
+    if (storeCompactionState) {
+      IndexSpec indexSpec = task().getQuerySpec().getTuningConfig().getIndexSpec();
+      GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
+      DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
+      Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
+                                          || TransformSpec.NONE.equals(dataSchema.getTransformSpec())
+                                          ? null
+                                          : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec()
+                                                                                            .getFilter()).asMap(
+                                              jsonMapper);
+      List<Object> metricsSpec = dataSchema.getAggregators() == null
+                                 ? null
+                                 : jsonMapper.convertValue(
+                                     dataSchema.getAggregators(),
+                                     new TypeReference<List<Object>>()
+                                     {
+                                     }
+                                 );
+
+      PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec(
+          task().getQuerySpec()
+                .getTuningConfig()
+                .getRowsPerSegment(),
+          null,
+          clusterBy.getColumns()
+                   .stream()
+                   .map(KeyColumn::columnName)
+                   .collect(Collectors.toList()),
+          false
+      );
+
+      final CompactionState compactionState = new CompactionState(
+          partitionSpec,
+          dimensionsSpec,
+          metricsSpec,
+          transformSpec,
+          indexSpec.asMap(jsonMapper),
+          granularitySpec.asMap(jsonMapper)
+      );
+      return segments -> segments
+          .stream()
+          .map(s -> s.withLastCompactionState(compactionState))
+          .collect(Collectors.toSet());
+    } else {
+      return Function.identity();
     }
   }
 

From 555d5d5cb60de04d8d7b131874bc13a7e3600564 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 26 Feb 2024 11:19:30 +0530
Subject: [PATCH 02/26] Add flag and change some config sources

---
 .../apache/druid/msq/exec/ControllerImpl.java | 44 ++++++++++++-------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index a5d3fca0845e..292c720398b0 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -80,6 +80,7 @@
 import org.apache.druid.indexing.common.actions.SegmentTransactionalReplaceAction;
 import org.apache.druid.indexing.common.actions.TaskAction;
 import org.apache.druid.indexing.common.actions.TaskActionClient;
+import org.apache.druid.indexing.common.task.Tasks;
 import org.apache.druid.indexing.common.task.batch.TooManyBucketsException;
 import org.apache.druid.indexing.common.task.batch.parallel.TombstoneHelper;
 import org.apache.druid.indexing.overlord.SegmentPublishResult;
@@ -1729,27 +1730,40 @@ private void publishSegmentsIfNeeded(
       final Set<DataSegment> segments = (Set<DataSegment>) queryKernel.getResultObjectForStage(finalStageId);
       DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
                                                                                   .getProcessorFactory()).getDataSchema();
-      ClusterBy clusterBy = queryDef.getFinalStageDefinition().getClusterBy();
-      log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
+
+      List<String> partitionDimensions = segments.isEmpty()
+                                         ? Collections.emptyList()
+                                         : ((DimensionRangeShardSpec) segments.stream()
+                                                                              .findFirst()
+                                                                              .get()
+                                                                              .getShardSpec()).getDimensions();
+
+
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = compactionStateAnnotateFunction(
-          true,
+          task(),
           context.jsonMapper(),
           dataSchema,
-          clusterBy
+          partitionDimensions
       );
+      log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
       publishAllSegments(compactionStateAnnotateFunction.apply(segments));
     }
   }
 
-  public Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
-      boolean storeCompactionState,
+  public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+      MSQControllerTask task,
       ObjectMapper jsonMapper,
       DataSchema dataSchema,
-      ClusterBy clusterBy
+      List<String> partitionDimensions
   )
   {
+    final boolean storeCompactionState = task.getContextValue(
+        Tasks.STORE_COMPACTION_STATE_KEY,
+        Tasks.DEFAULT_STORE_COMPACTION_STATE
+    );
+
     if (storeCompactionState) {
-      IndexSpec indexSpec = task().getQuerySpec().getTuningConfig().getIndexSpec();
+      IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
       GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
       DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
       Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
@@ -1767,15 +1781,15 @@ public Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunct
                                      }
                                  );
 
+      // Even if partition dimensions is empty, use DimensionRangePartitionsSpec to record other info
+      // such as rowsPerSegment
+
       PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec(
-          task().getQuerySpec()
-                .getTuningConfig()
-                .getRowsPerSegment(),
+          task.getQuerySpec()
+              .getTuningConfig()
+              .getRowsPerSegment(),
           null,
-          clusterBy.getColumns()
-                   .stream()
-                   .map(KeyColumn::columnName)
-                   .collect(Collectors.toList()),
+          partitionDimensions,
           false
       );
 

From 0ac20e49cd314f1ebe6fcb74dd40ae9b160d4ff2 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 26 Feb 2024 12:56:11 +0530
Subject: [PATCH 03/26] Add type check for shard spec before casting

---
 .../apache/druid/msq/exec/ControllerImpl.java | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 292c720398b0..a54021239c16 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -66,6 +66,7 @@
 import org.apache.druid.indexer.TaskStatus;
 import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.LockGranularity;
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskLockType;
@@ -234,6 +235,7 @@
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Optional;
 import java.util.Queue;
 import java.util.Set;
@@ -1731,13 +1733,18 @@ private void publishSegmentsIfNeeded(
       DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
                                                                                   .getProcessorFactory()).getDataSchema();
 
-      List<String> partitionDimensions = segments.isEmpty()
-                                         ? Collections.emptyList()
-                                         : ((DimensionRangeShardSpec) segments.stream()
-                                                                              .findFirst()
-                                                                              .get()
-                                                                              .getShardSpec()).getDimensions();
+      ShardSpec shardSpec = segments.isEmpty()
+                            ? null
+                            : segments.stream()
+                                                                 .findFirst()
+                                                                 .get()
+                                                                 .getShardSpec();
+      List<String> partitionDimensions = Collections.emptyList();
 
+      if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
+                                || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+        partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
+      }
 
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = compactionStateAnnotateFunction(
           task(),

From a6d3dc0bcbdd7a7a03f3934e8c9eece5b0d1de56 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 26 Feb 2024 15:37:08 +0530
Subject: [PATCH 04/26] Check if there is a segment granularity in the context
 and revise the granularity spec accordingly

---
 .../apache/druid/msq/exec/ControllerImpl.java | 96 +++++++++++--------
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index a54021239c16..44322d20fbc5 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -202,10 +202,12 @@
 import org.apache.druid.segment.indexing.DataSchema;
 import org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec;
 import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
 import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.coordination.DruidServerMetadata;
+import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
 import org.apache.druid.sql.calcite.planner.ColumnMapping;
 import org.apache.druid.sql.calcite.planner.ColumnMappings;
 import org.apache.druid.sql.calcite.rel.DruidQuery;
@@ -1725,7 +1727,6 @@ private void publishSegmentsIfNeeded(
   {
     if (queryKernel.isSuccess() && MSQControllerTask.isIngestion(task.getQuerySpec())) {
       final StageId finalStageId = queryKernel.getStageId(queryDef.getFinalStageDefinition().getStageNumber());
-      queryDef.getFinalStageDefinition().getClusterBy();
 
       //noinspection unchecked
       @SuppressWarnings("unchecked")
@@ -1733,26 +1734,36 @@ private void publishSegmentsIfNeeded(
       DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
                                                                                   .getProcessorFactory()).getDataSchema();
 
-      ShardSpec shardSpec = segments.isEmpty()
-                            ? null
-                            : segments.stream()
-                                                                 .findFirst()
-                                                                 .get()
-                                                                 .getShardSpec();
-      List<String> partitionDimensions = Collections.emptyList();
+      Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
 
-      if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
-                                || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
-        partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
+      final boolean storeCompactionState = task.getContextValue(
+          Tasks.STORE_COMPACTION_STATE_KEY,
+          Tasks.DEFAULT_STORE_COMPACTION_STATE
+      );
+
+      if (storeCompactionState) {
+        ShardSpec shardSpec = segments.isEmpty()
+                              ? null
+                              : segments.stream()
+                                        .findFirst()
+                                        .get()
+                                        .getShardSpec();
+        List<String> partitionDimensions = Collections.emptyList();
+
+        if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
+                                  || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+          partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
+        }
+
+        compactionStateAnnotateFunction = compactionStateAnnotateFunction(
+            task(),
+            context.jsonMapper(),
+            dataSchema,
+            partitionDimensions
+        );
+        log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
       }
 
-      Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = compactionStateAnnotateFunction(
-          task(),
-          context.jsonMapper(),
-          dataSchema,
-          partitionDimensions
-      );
-      log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
       publishAllSegments(compactionStateAnnotateFunction.apply(segments));
     }
   }
@@ -1764,25 +1775,37 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
       List<String> partitionDimensions
   )
   {
-    final boolean storeCompactionState = task.getContextValue(
-        Tasks.STORE_COMPACTION_STATE_KEY,
-        Tasks.DEFAULT_STORE_COMPACTION_STATE
-    );
-
-    if (storeCompactionState) {
       IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
       GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
-      DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
-      Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
-                                          || TransformSpec.NONE.equals(dataSchema.getTransformSpec())
-                                          ? null
-                                          : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec()
-                                                                                            .getFilter()).asMap(
-                                              jsonMapper);
-      List<Object> metricsSpec = dataSchema.getAggregators() == null
-                                 ? null
-                                 : jsonMapper.convertValue(
-                                     dataSchema.getAggregators(),
+
+    if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) {
+
+      // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY
+      Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext(
+          jsonMapper,
+          task.getQuerySpec()
+              .getQuery()
+              .getContext()
+      );
+      granularitySpec = new UniformGranularitySpec(
+          segmentGranularity,
+          granularitySpec.getQueryGranularity(),
+          granularitySpec.isRollup(),
+          granularitySpec.inputIntervals()
+      );
+    }
+
+    DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
+    Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
+                                        || TransformSpec.NONE.equals(dataSchema.getTransformSpec())
+                                        ? null
+                                        : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec()
+                                                                                          .getFilter()).asMap(
+                                            jsonMapper);
+    List<Object> metricsSpec = dataSchema.getAggregators() == null
+                               ? null
+                               : jsonMapper.convertValue(
+                                   dataSchema.getAggregators(),
                                      new TypeReference<List<Object>>()
                                      {
                                      }
@@ -1812,9 +1835,6 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
           .stream()
           .map(s -> s.withLastCompactionState(compactionState))
           .collect(Collectors.toSet());
-    } else {
-      return Function.identity();
-    }
   }
 
   /**

From b218280e308517154d6b285baba93e32a37a4fd4 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 26 Feb 2024 16:08:17 +0530
Subject: [PATCH 05/26] Check if there is a segment granularity in the context
 and revise the granularity spec accordingly

---
 .../main/java/org/apache/druid/msq/exec/ControllerImpl.java    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 44322d20fbc5..dae65c75a785 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1778,7 +1778,8 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
       IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
       GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
 
-    if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) {
+    if (granularitySpec instanceof ArbitraryGranularitySpec
+        && task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) {
 
       // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY
       Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext(

From 33b5a82fd22731c057e48bb230b4289ca3128a4b Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 26 Feb 2024 17:38:31 +0530
Subject: [PATCH 06/26] Address review comments

---
 .../apache/druid/msq/exec/ControllerImpl.java | 144 ++++++++++--------
 1 file changed, 80 insertions(+), 64 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index dae65c75a785..eaaf6f74b3ea 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -65,8 +65,8 @@
 import org.apache.druid.indexer.TaskState;
 import org.apache.druid.indexer.TaskStatus;
 import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
-import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.LockGranularity;
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskLockType;
@@ -1736,66 +1736,69 @@ private void publishSegmentsIfNeeded(
 
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
 
-      final boolean storeCompactionState = task.getContextValue(
-          Tasks.STORE_COMPACTION_STATE_KEY,
-          Tasks.DEFAULT_STORE_COMPACTION_STATE
-      );
+
+      Object storeCompactionStateValue = task.getQuerySpec()
+                                             .getQuery()
+                                             .getContext()
+                                             .get(Tasks.STORE_COMPACTION_STATE_KEY);
+
+      final boolean storeCompactionState = storeCompactionStateValue != null
+                                           ? (Boolean) storeCompactionStateValue
+                                           : Tasks.DEFAULT_STORE_COMPACTION_STATE;
 
       if (storeCompactionState) {
-        ShardSpec shardSpec = segments.isEmpty()
-                              ? null
-                              : segments.stream()
-                                        .findFirst()
-                                        .get()
-                                        .getShardSpec();
-        List<String> partitionDimensions = Collections.emptyList();
-
-        if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
-                                  || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
-          partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
-        }
+
+        ShardSpec shardSpec = segments.isEmpty() ? null : segments.stream().findFirst().get().getShardSpec();
 
         compactionStateAnnotateFunction = compactionStateAnnotateFunction(
             task(),
             context.jsonMapper(),
             dataSchema,
-            partitionDimensions
+            shardSpec,
+            queryDef.getQueryId()
         );
-        log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
       }
 
+      log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
       publishAllSegments(compactionStateAnnotateFunction.apply(segments));
     }
   }
 
   public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
-      MSQControllerTask task,
-      ObjectMapper jsonMapper,
-      DataSchema dataSchema,
-      List<String> partitionDimensions
+      MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId
   )
   {
-      IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
-      GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
+    DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
+    if (!destination.isReplaceTimeChunks()) {
+      // Only do this for replace queries, whether originating directly or via compaction
+      log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE", queryId);
+      return Function.identity();
+    }
 
-    if (granularitySpec instanceof ArbitraryGranularitySpec
-        && task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) != null) {
+    GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
 
-      // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY
-      Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext(
-          jsonMapper,
-          task.getQuerySpec()
-              .getQuery()
-              .getContext()
-      );
-      granularitySpec = new UniformGranularitySpec(
-          segmentGranularity,
-          granularitySpec.getQueryGranularity(),
-          granularitySpec.isRollup(),
-          granularitySpec.inputIntervals()
-      );
+    if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) {
+      // This is a defensive check. Should never enter here.
+      log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set", queryId);
+      return Function.identity();
     }
 
+    // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY
+    Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext(
+        jsonMapper,
+        task.getQuerySpec()
+            .getQuery()
+            .getContext()
+    );
+
+    granularitySpec = new UniformGranularitySpec(
+        segmentGranularity,
+        granularitySpec.getQueryGranularity(),
+        granularitySpec.isRollup(),
+        granularitySpec.inputIntervals()
+    );
+
+
     DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
     Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
                                         || TransformSpec.NONE.equals(dataSchema.getTransformSpec())
@@ -1806,36 +1809,49 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
     List<Object> metricsSpec = dataSchema.getAggregators() == null
                                ? null
                                : jsonMapper.convertValue(
-                                   dataSchema.getAggregators(),
-                                     new TypeReference<List<Object>>()
-                                     {
-                                     }
-                                 );
-
-      // Even if partition dimensions is empty, use DimensionRangePartitionsSpec to record other info
-      // such as rowsPerSegment
-
-      PartitionsSpec partitionSpec = new DimensionRangePartitionsSpec(
-          task.getQuerySpec()
-              .getTuningConfig()
-              .getRowsPerSegment(),
+                                   dataSchema.getAggregators(), new TypeReference<List<Object>>()
+                                   {
+                                   });
+
+    PartitionsSpec partitionSpec;
+
+    if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
+                              || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+      List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
+      partitionSpec = new DimensionRangePartitionsSpec(
+          task.getQuerySpec().getTuningConfig().getRowsPerSegment(),
           null,
           partitionDimensions,
           false
       );
 
-      final CompactionState compactionState = new CompactionState(
-          partitionSpec,
-          dimensionsSpec,
-          metricsSpec,
-          transformSpec,
-          indexSpec.asMap(jsonMapper),
-          granularitySpec.asMap(jsonMapper)
+    } else if (shardSpec != null && Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
+      partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null);
+    } else {
+      log.error(
+          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type",
+          queryId
       );
-      return segments -> segments
-          .stream()
-          .map(s -> s.withLastCompactionState(compactionState))
-          .collect(Collectors.toSet());
+      return Function.identity();
+    }
+
+    IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
+
+    final CompactionState compactionState = new CompactionState(
+        partitionSpec,
+        dimensionsSpec,
+        metricsSpec,
+        transformSpec,
+        indexSpec.asMap(jsonMapper),
+        granularitySpec.asMap(jsonMapper)
+    );
+
+    log.info("Query [%s] storing compaction state in segments", queryId);
+
+    return segments -> segments
+        .stream()
+        .map(s -> s.withLastCompactionState(compactionState))
+        .collect(Collectors.toSet());
   }
 
   /**

From cf37c65785af6f36d7de8ff765339a4f5187e619 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 5 Mar 2024 10:57:30 +0530
Subject: [PATCH 07/26] Add tests for compaction state

---
 .run/Coordinator w_ MSQ.run.xml               |  19 ++
 .../apache/druid/msq/exec/ControllerImpl.java |  40 +--
 .../msq/util/MultiStageQueryContext.java      |   2 +-
 .../apache/druid/msq/exec/MSQReplaceTest.java | 244 +++++++++++++++++-
 .../apache/druid/msq/test/MSQTestBase.java    |  16 ++
 5 files changed, 295 insertions(+), 26 deletions(-)
 create mode 100644 .run/Coordinator w_ MSQ.run.xml

diff --git a/.run/Coordinator w_ MSQ.run.xml b/.run/Coordinator w_ MSQ.run.xml
new file mode 100644
index 000000000000..074b578ea722
--- /dev/null
+++ b/.run/Coordinator w_ MSQ.run.xml	
@@ -0,0 +1,19 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="Coordinator w/ MSQ" type="Application" factoryName="Application">
+    <option name="ALTERNATIVE_JRE_PATH" value="zulu-1.8" />
+    <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
+    <option name="INCLUDE_PROVIDED_SCOPE" value="true" />
+    <option name="MAIN_CLASS_NAME" value="org.apache.druid.cli.Main" />
+    <module name="druid-multi-stage-query" />
+    <option name="PROGRAM_PARAMETERS" value="server coordinator" />
+    <option name="VM_PARAMETERS" value="-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Xmx256M -Xmx256M -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Dorg.jboss.logging.provider=slf4j -Dlog4j.configurationFile=$PROJECT_DIR$/processing/src/main/resources/log4j2.xml -Ddruid.host=localhost -Ddruid.service=coordinator -Ddruid.extensions.directory=$PROJECT_DIR$/distribution/target/extensions/ -Ddruid.extensions.loadList=[\&quot;druid-histogram\&quot;,\&quot;mysql-metadata-storage\&quot;] -Ddruid.zk.service.host=0.0.0.0 -Ddruid.metadata.storage.type=mysql -Ddruid.metadata.storage.connector.connectURI=&quot;jdbc:mysql://0.0.0.0:3306/druid&quot; -Ddruid.metadata.storage.connector.user=druid -Ddruid.metadata.storage.connector.password=diurd -Ddruid.announcer.type=batch -Ddruid.coordinator.period=PT10S -Ddruid.coordinator.startDelay=PT5S -Ddruid.coordinator.dutyGroups=[\&quot;compaction\&quot;] -Ddruid.coordinator.compaction.duties=[\&quot;compactSegments\&quot;] -Ddruid.coordinator.compaction.period=PT60S" />
+    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
+      <option name="credential" />
+      <option name="region" />
+      <option name="useCurrentConnection" value="false" />
+    </extension>
+    <method v="2">
+      <option name="Make" enabled="true" />
+    </method>
+  </configuration>
+</component>
\ No newline at end of file
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index eaaf6f74b3ea..1dbad6b3e228 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1731,24 +1731,25 @@ private void publishSegmentsIfNeeded(
       //noinspection unchecked
       @SuppressWarnings("unchecked")
       final Set<DataSegment> segments = (Set<DataSegment>) queryKernel.getResultObjectForStage(finalStageId);
-      DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
-                                                                                  .getProcessorFactory()).getDataSchema();
 
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
 
+      Boolean storeCompactionState = (Boolean) task.getQuerySpec()
+                                                   .getQuery()
+                                                   .getContext()
+                                                   .get(Tasks.STORE_COMPACTION_STATE_KEY);
 
-      Object storeCompactionStateValue = task.getQuerySpec()
-                                             .getQuery()
-                                             .getContext()
-                                             .get(Tasks.STORE_COMPACTION_STATE_KEY);
+      if (storeCompactionState == null) {
+        storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE;
 
-      final boolean storeCompactionState = storeCompactionStateValue != null
-                                           ? (Boolean) storeCompactionStateValue
-                                           : Tasks.DEFAULT_STORE_COMPACTION_STATE;
+      }
+
+      if (!segments.isEmpty() && storeCompactionState) {
+        DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
+                                                                                    .getProcessorFactory()).getDataSchema();
 
-      if (storeCompactionState) {
 
-        ShardSpec shardSpec = segments.isEmpty() ? null : segments.stream().findFirst().get().getShardSpec();
+        ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec();
 
         compactionStateAnnotateFunction = compactionStateAnnotateFunction(
             task(),
@@ -1757,6 +1758,7 @@ private void publishSegmentsIfNeeded(
             shardSpec,
             queryDef.getQueryId()
         );
+
       }
 
       log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
@@ -1771,7 +1773,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
     DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
     if (!destination.isReplaceTimeChunks()) {
       // Only do this for replace queries, whether originating directly or via compaction
-      log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE", queryId);
+      log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE.", queryId);
       return Function.identity();
     }
 
@@ -1779,7 +1781,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
 
     if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) {
       // This is a defensive check. Should never enter here.
-      log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set", queryId);
+      log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set.", queryId);
       return Function.identity();
     }
 
@@ -1815,8 +1817,8 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
 
     PartitionsSpec partitionSpec;
 
-    if (shardSpec != null && (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
-                              || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+    if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
+         || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
       List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
       partitionSpec = new DimensionRangePartitionsSpec(
           task.getQuerySpec().getTuningConfig().getRowsPerSegment(),
@@ -1825,12 +1827,12 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
           false
       );
 
-    } else if (shardSpec != null && Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
+    } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
       partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null);
     } else {
       log.error(
-          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type",
-          queryId
+          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].",
+          queryId, shardSpec.getType()
       );
       return Function.identity();
     }
@@ -1846,7 +1848,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
         granularitySpec.asMap(jsonMapper)
     );
 
-    log.info("Query [%s] storing compaction state in segments", queryId);
+    log.info("Query [%s] storing compaction state in segments.", queryId);
 
     return segments -> segments
         .stream()
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java
index b7340343c810..6ab213a06c4d 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/util/MultiStageQueryContext.java
@@ -126,7 +126,7 @@ public class MultiStageQueryContext
   public static final String DEFAULT_CLUSTER_STATISTICS_MERGE_MODE = ClusterStatisticsMergeMode.SEQUENTIAL.toString();
 
   public static final String CTX_ROWS_PER_SEGMENT = "rowsPerSegment";
-  static final int DEFAULT_ROWS_PER_SEGMENT = 3000000;
+  public static final int DEFAULT_ROWS_PER_SEGMENT = 3000000;
 
   public static final String CTX_ROWS_PER_PAGE = "rowsPerPage";
   static final int DEFAULT_ROWS_PER_PAGE = 100000;
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index ea7adc866ee0..580e2c0c9926 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -23,18 +23,34 @@
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.DoubleDimensionSchema;
+import org.apache.druid.data.input.impl.FloatDimensionSchema;
+import org.apache.druid.data.input.impl.LongDimensionSchema;
+import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexing.common.TaskLockType;
 import org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction;
 import org.apache.druid.indexing.common.task.Tasks;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.java.util.common.granularity.GranularityType;
 import org.apache.druid.msq.test.CounterSnapshotMatcher;
 import org.apache.druid.msq.test.MSQTestBase;
 import org.apache.druid.msq.test.MSQTestFileUtils;
 import org.apache.druid.msq.test.MSQTestTaskActionClient;
+import org.apache.druid.msq.util.MultiStageQueryContext;
+import org.apache.druid.query.QueryContexts;
+import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+import org.apache.druid.segment.indexing.granularity.UniformGranularitySpec;
+import org.apache.druid.timeline.CompactionState;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.SegmentId;
 import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
@@ -62,14 +78,18 @@
 public class MSQReplaceTest extends MSQTestBase
 {
 
-  private static final String WITH_REPLACE_LOCK = "WITH_REPLACE_LOCK";
-  private static final Map<String, Object> QUERY_CONTEXT_WITH_REPLACE_LOCK =
+  private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "WITH_REPLACE_LOCK_AND_COMPACTION_STATE";
+  private static final Map<String, Object> QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE =
       ImmutableMap.<String, Object>builder()
                   .putAll(DEFAULT_MSQ_CONTEXT)
                   .put(
                       Tasks.TASK_LOCK_TYPE,
                       StringUtils.toLowerCase(TaskLockType.REPLACE.name())
                   )
+                  .put(
+                      Tasks.STORE_COMPACTION_STATE_KEY,
+                      true
+                  )
                   .build();
 
   @Parameterized.Parameters(name = "{index}:with context {0}")
@@ -80,8 +100,8 @@ public static Collection<Object[]> data()
         {DURABLE_STORAGE, DURABLE_STORAGE_MSQ_CONTEXT},
         {FAULT_TOLERANCE, FAULT_TOLERANCE_MSQ_CONTEXT},
         {PARALLEL_MERGE, PARALLEL_MERGE_MSQ_CONTEXT},
-        {WITH_REPLACE_LOCK, QUERY_CONTEXT_WITH_REPLACE_LOCK}
-    };
+        {WITH_REPLACE_LOCK_AND_COMPACTION_STATE, QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE},
+        };
     return Arrays.asList(data);
   }
 
@@ -169,6 +189,13 @@ public void testReplaceOnFooWithAll()
                              .with().segmentRowsProcessed(6),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.DAY
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -218,6 +245,13 @@ public void testReplaceOnFooWithWhere()
                              .with().segmentRowsProcessed(1),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.DAY
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -293,6 +327,13 @@ public void testReplaceOnFoo1WithAllExtern() throws IOException
                              .with().rows(1, 1, 1).frames(1, 1, 1),
                          1, 0, "shuffle"
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new LongDimensionSchema("cnt")),
+                             GranularityType.HOUR
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -359,6 +400,13 @@ public void testReplaceOnFoo1WithWhereExtern() throws IOException
                              .with().segmentRowsProcessed(4),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new StringDimensionSchema("user")),
+                             GranularityType.HOUR
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -430,6 +478,13 @@ public void testReplaceSegmentEntireTable()
                              .with().segmentRowsProcessed(6),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.ALL
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -513,6 +568,13 @@ public void testReplaceSegmentsRepartitionTable()
                              .with().segmentRowsProcessed(6),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.MONTH
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -586,6 +648,13 @@ public void testReplaceWithWhereClause()
                              .with().segmentRowsProcessed(2),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.MONTH
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -662,6 +731,13 @@ public void testReplaceWhereClauseLargerThanData()
                              .with().segmentRowsProcessed(2),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.MONTH
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -712,6 +788,8 @@ public void testReplaceTimeChunks()
                                                        .version(MSQTestTaskActionClient.VERSION)
                                                        .size(1)
                                                        .build();
+
+
     Mockito.doReturn(ImmutableSet.of(existingDataSegment))
            .when(testTaskActionClient)
            .submit(new RetrieveUsedSegmentsAction(
@@ -726,7 +804,6 @@ public void testReplaceTimeChunks()
                              + "WHERE __time >= TIMESTAMP '2000-01-01' AND __time < TIMESTAMP '2000-01-03' "
                              + "PARTITIONED BY MONTH")
                      .setExpectedDataSource("foo")
-                     .setQueryContext(DEFAULT_MSQ_CONTEXT)
                      .setExpectedRowSignature(rowSignature)
                      .setQueryContext(context)
                      .setExpectedDestinationIntervals(Collections.singletonList(Intervals.of("2000-01-01T/2000-03-01T")))
@@ -740,6 +817,12 @@ public void testReplaceTimeChunks()
                          ImmutableList.of(
                              new Object[]{946684800000L, 1.0f},
                              new Object[]{946771200000L, 2.0f}
+                         ))
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.MONTH
                          )
                      )
                      .verifyResults();
@@ -797,6 +880,13 @@ public void testReplaceTimeChunksLargerThanData()
                              new Object[]{946771200000L, 2.0f}
                          )
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.MONTH
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -816,6 +906,8 @@ public void testReplaceAllOverEternitySegment()
                                                  .dataSource("foo")
                                                  .build();
 
+    PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null);
+
     Mockito.doReturn(ImmutableSet.of(existingDataSegment))
            .when(testTaskActionClient)
            .submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class));
@@ -849,6 +941,14 @@ public void testReplaceAllOverEternitySegment()
                              new Object[]{946771200000L, 2.0f}
                          )
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema(
+                                 "m1")),
+                             GranularityType.MONTH
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -871,6 +971,16 @@ public void testReplaceOnFoo1Range()
                      .setQueryContext(context)
                      .setExpectedSegment(expectedFooSegments())
                      .setExpectedResultRows(expectedFooRows())
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.singletonList("dim1"),
+                             Arrays.asList(
+                                 new StringDimensionSchema("dim1"),
+                                 new LongDimensionSchema("cnt")
+                             ),
+                             GranularityType.DAY
+                         )
+                     )
                      .verifyResults();
 
   }
@@ -903,6 +1013,67 @@ public void testReplaceSegmentsInsertIntoNewTable()
                              new Object[]{978480000000L, 6.0f}
                          )
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
+                             GranularityType.ALL
+                         )
+                     )
+                     .verifyResults();
+  }
+
+  @Test
+  public void testReplaceSegmentsWithQuarterSegmentGranularity()
+  {
+    RowSignature rowSignature = RowSignature.builder()
+                                            .add("__time", ColumnType.LONG)
+                                            .add("m1", ColumnType.FLOAT)
+                                            .add("m2", ColumnType.DOUBLE)
+                                            .build();
+
+    testIngestQuery().setSql(" REPLACE INTO foobar "
+                             + "OVERWRITE ALL "
+                             + "SELECT __time, m1, m2 "
+                             + "FROM foo "
+                             + "PARTITIONED by TIME_FLOOR(__time, 'P3M') ")
+                     .setExpectedDataSource("foobar")
+                     .setExpectedRowSignature(rowSignature)
+                     .setQueryContext(context)
+                     .setExpectedDestinationIntervals(Intervals.ONLY_ETERNITY)
+                     .setExpectedSegment(ImmutableSet.of(SegmentId.of(
+                                                             "foobar",
+                                                             Intervals.of(
+                                                                 "2000-01-01T00:00:00.000Z/2000-04-01T00:00:00.000Z"),
+                                                             "test",
+                                                             0
+                                                         ),
+                                                         SegmentId.of(
+                                                             "foobar",
+                                                             Intervals.of(
+                                                                 "2001-01-01T00:00:00.000Z/2001-04-01T00:00:00.000Z"),
+                                                             "test",
+                                                             0
+                                                         )
+                                         )
+                     )
+                     .setExpectedResultRows(
+                         ImmutableList.of(
+                             new Object[]{946684800000L, 1.0f, 1.0},
+                             new Object[]{946771200000L, 2.0f, 2.0},
+                             new Object[]{946857600000L, 3.0f, 3.0},
+                             new Object[]{978307200000L, 4.0f, 4.0},
+                             new Object[]{978393600000L, 5.0f, 5.0},
+                             new Object[]{978480000000L, 6.0f, 6.0}
+                         )
+                     )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Arrays.asList(new FloatDimensionSchema("m1"), new DoubleDimensionSchema("m2")),
+                             GranularityType.QUARTER
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -916,7 +1087,7 @@ public void testReplaceWithClusteredByDescendingThrowsException()
                              + "FROM foo "
                              + "PARTITIONED BY ALL TIME "
                              + "CLUSTERED BY m2, m1 DESC"
-                             )
+                     )
                      .setExpectedValidationErrorMatcher(
                          invalidSqlIs("Invalid CLUSTERED BY clause [`m1` DESC]: cannot sort in descending order.")
                      )
@@ -979,6 +1150,13 @@ public void testReplaceUnnestSegmentEntireTable()
                              .with().segmentRowsProcessed(8),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new StringDimensionSchema("d")),
+                             GranularityType.ALL
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -1042,6 +1220,13 @@ public void testReplaceUnnestWithVirtualColumnSegmentEntireTable()
                              .with().segmentRowsProcessed(12),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.emptyList(),
+                             Collections.singletonList(new FloatDimensionSchema("d")),
+                             GranularityType.ALL
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -1116,6 +1301,13 @@ public void testReplaceUnnestSegmentWithTimeFilter()
                              .with().segmentRowsProcessed(8),
                          1, 0
                      )
+                     .setExpectedLastCompactionState(
+                         expectedCompactionState(
+                             Collections.singletonList("d"),
+                             Collections.singletonList(new StringDimensionSchema("d")),
+                             GranularityType.DAY
+                         )
+                     )
                      .verifyResults();
   }
 
@@ -1579,4 +1771,44 @@ private List<Object[]> expectedFooRows()
     ));
     return expectedRows;
   }
+  private CompactionState expectedCompactionState(List<String> partitionDimensions, List<DimensionSchema> dimensions,
+                                                  GranularityType segmentGranularity
+  ){
+    if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){
+      return null;
+    }
+    PartitionsSpec partitionsSpec;
+    if (partitionDimensions.isEmpty()) {
+      partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null);
+
+    } else {
+      partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null,
+                                                       partitionDimensions, false
+      );
+    }
+    DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder().
+        setDimensions(dimensions)
+        .setDimensionExclusions(Collections.singletonList(
+            "__time"))
+        .build();
+
+    IndexSpec indexSpec = new IndexSpec(null, null, null, null, null, null, null);
+    GranularitySpec granularitySpec = new UniformGranularitySpec(
+        segmentGranularity.getDefaultGranularity(),
+        GranularityType.NONE.getDefaultGranularity(),
+        false,
+        Intervals.ONLY_ETERNITY
+    );
+    List<Object> metricsSpec = Collections.emptyList();
+
+    return new CompactionState(
+        partitionsSpec,
+        dimensionsSpec,
+        metricsSpec,
+        null,
+        indexSpec.asMap(objectMapper),
+        granularitySpec.asMap(objectMapper)
+    );
+
+  }
 }
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
index f8fc01b9369f..101947fce545 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
@@ -181,6 +181,7 @@
 import org.apache.druid.storage.StorageConnectorModule;
 import org.apache.druid.storage.StorageConnectorProvider;
 import org.apache.druid.storage.local.LocalFileStorageConnector;
+import org.apache.druid.timeline.CompactionState;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.PruneLoadSpec;
 import org.apache.druid.timeline.SegmentId;
@@ -866,6 +867,7 @@ public abstract class MSQTester<Builder extends MSQTester<Builder>>
     protected MSQSpec expectedMSQSpec = null;
     protected MSQTuningConfig expectedTuningConfig = null;
     protected Set<SegmentId> expectedSegments = null;
+    protected CompactionState expectedLastCompactionState = null;
     protected Set<Interval> expectedTombstoneIntervals = null;
     protected List<Object[]> expectedResultRows = null;
     protected Matcher<Throwable> expectedValidationErrorMatcher = null;
@@ -912,6 +914,12 @@ public Builder setExpectedSegment(Set<SegmentId> expectedSegments)
       return asBuilder();
     }
 
+    public Builder setExpectedLastCompactionState(CompactionState expectedLastCompactionState)
+    {
+      this.expectedLastCompactionState = expectedLastCompactionState;
+      return asBuilder();
+    }
+
     public Builder setExpectedTombstoneIntervals(Set<Interval> tombstoneIntervals)
     {
       Preconditions.checkArgument(!tombstoneIntervals.isEmpty(), "Segments cannot be empty");
@@ -1279,6 +1287,12 @@ public void verifyResults()
         // SegmentGeneratorFrameProcessorFactory. We can get the tombstone segment ids published by taking a set
         // difference of all the segments published with the segments that are created by the SegmentGeneratorFrameProcessorFactory
         if (!testTaskActionClient.getPublishedSegments().isEmpty()) {
+          if (expectedLastCompactionState != null){
+            CompactionState compactionState = testTaskActionClient.getPublishedSegments().stream().findFirst().get()
+                                                                  .getLastCompactionState();
+            Assert.assertEquals(expectedLastCompactionState, compactionState);
+
+          }
           Set<SegmentId> publishedSegmentIds = testTaskActionClient.getPublishedSegments()
                                                                    .stream()
                                                                    .map(DataSegment::getId)
@@ -1496,4 +1510,6 @@ private static List<MSQResultsReport.ColumnAndType> resultSignatureFromRowSignat
     }
     return retVal;
   }
+
 }
+

From f877b91c2addb81845f5b131f94e7ee21b254088 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 5 Mar 2024 11:04:18 +0530
Subject: [PATCH 08/26] Corrections

---
 .run/Coordinator w_ MSQ.run.xml               | 19 -------------------
 .../apache/druid/msq/exec/MSQReplaceTest.java |  1 -
 2 files changed, 20 deletions(-)
 delete mode 100644 .run/Coordinator w_ MSQ.run.xml

diff --git a/.run/Coordinator w_ MSQ.run.xml b/.run/Coordinator w_ MSQ.run.xml
deleted file mode 100644
index 074b578ea722..000000000000
--- a/.run/Coordinator w_ MSQ.run.xml	
+++ /dev/null
@@ -1,19 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Coordinator w/ MSQ" type="Application" factoryName="Application">
-    <option name="ALTERNATIVE_JRE_PATH" value="zulu-1.8" />
-    <option name="ALTERNATIVE_JRE_PATH_ENABLED" value="true" />
-    <option name="INCLUDE_PROVIDED_SCOPE" value="true" />
-    <option name="MAIN_CLASS_NAME" value="org.apache.druid.cli.Main" />
-    <module name="druid-multi-stage-query" />
-    <option name="PROGRAM_PARAMETERS" value="server coordinator" />
-    <option name="VM_PARAMETERS" value="-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Xmx256M -Xmx256M -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Dorg.jboss.logging.provider=slf4j -Dlog4j.configurationFile=$PROJECT_DIR$/processing/src/main/resources/log4j2.xml -Ddruid.host=localhost -Ddruid.service=coordinator -Ddruid.extensions.directory=$PROJECT_DIR$/distribution/target/extensions/ -Ddruid.extensions.loadList=[\&quot;druid-histogram\&quot;,\&quot;mysql-metadata-storage\&quot;] -Ddruid.zk.service.host=0.0.0.0 -Ddruid.metadata.storage.type=mysql -Ddruid.metadata.storage.connector.connectURI=&quot;jdbc:mysql://0.0.0.0:3306/druid&quot; -Ddruid.metadata.storage.connector.user=druid -Ddruid.metadata.storage.connector.password=diurd -Ddruid.announcer.type=batch -Ddruid.coordinator.period=PT10S -Ddruid.coordinator.startDelay=PT5S -Ddruid.coordinator.dutyGroups=[\&quot;compaction\&quot;] -Ddruid.coordinator.compaction.duties=[\&quot;compactSegments\&quot;] -Ddruid.coordinator.compaction.period=PT60S" />
-    <extension name="software.aws.toolkits.jetbrains.core.execution.JavaAwsConnectionExtension">
-      <option name="credential" />
-      <option name="region" />
-      <option name="useCurrentConnection" value="false" />
-    </extension>
-    <method v="2">
-      <option name="Make" enabled="true" />
-    </method>
-  </configuration>
-</component>
\ No newline at end of file
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 580e2c0c9926..9654fd3c1439 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -44,7 +44,6 @@
 import org.apache.druid.msq.test.MSQTestFileUtils;
 import org.apache.druid.msq.test.MSQTestTaskActionClient;
 import org.apache.druid.msq.util.MultiStageQueryContext;
-import org.apache.druid.query.QueryContexts;
 import org.apache.druid.segment.IndexSpec;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;

From 64646059e11552281a53fe35c97a6a90bffb9512 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 10:45:01 +0530
Subject: [PATCH 09/26] Address review comments

---
 .../apache/druid/msq/exec/ControllerImpl.java | 27 ++++++-------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 1dbad6b3e228..18460078b70c 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -207,7 +207,6 @@
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.coordination.DruidServerMetadata;
-import org.apache.druid.sql.calcite.parser.DruidSqlInsert;
 import org.apache.druid.sql.calcite.planner.ColumnMapping;
 import org.apache.druid.sql.calcite.planner.ColumnMappings;
 import org.apache.druid.sql.calcite.rel.DruidQuery;
@@ -1745,8 +1744,8 @@ private void publishSegmentsIfNeeded(
       }
 
       if (!segments.isEmpty() && storeCompactionState) {
-        DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel.getStageDefinition(finalStageId)
-                                                                                    .getProcessorFactory()).getDataSchema();
+        DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel
+            .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema();
 
 
         ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec();
@@ -1773,15 +1772,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
     DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
     if (!destination.isReplaceTimeChunks()) {
       // Only do this for replace queries, whether originating directly or via compaction
-      log.error("Query [%s] skipping storing compaction state in segments as query not of type REPLACE.", queryId);
-      return Function.identity();
-    }
-
-    GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
-
-    if (task.getQuerySpec().getQuery().getContext().get(DruidSqlInsert.SQL_INSERT_SEGMENT_GRANULARITY) == null) {
-      // This is a defensive check. Should never enter here.
-      log.error("Query [%s] skipping storing compaction state in segments as segment granularity not set.", queryId);
+      log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryId);
       return Function.identity();
     }
 
@@ -1793,17 +1784,15 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
             .getContext()
     );
 
-    granularitySpec = new UniformGranularitySpec(
+    GranularitySpec granularitySpec = new UniformGranularitySpec(
         segmentGranularity,
-        granularitySpec.getQueryGranularity(),
-        granularitySpec.isRollup(),
-        granularitySpec.inputIntervals()
+        dataSchema.getGranularitySpec().getQueryGranularity(),
+        dataSchema.getGranularitySpec().isRollup(),
+        dataSchema.getGranularitySpec().inputIntervals()
     );
 
-
     DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
-    Map<String, Object> transformSpec = dataSchema.getTransformSpec() == null
-                                        || TransformSpec.NONE.equals(dataSchema.getTransformSpec())
+    Map<String, Object> transformSpec = TransformSpec.NONE.equals(dataSchema.getTransformSpec())
                                         ? null
                                         : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec()
                                                                                           .getFilter()).asMap(

From b24a7c9e8a99564dd0f176d11f30e1c0256b7351 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 10:47:33 +0530
Subject: [PATCH 10/26] Remove unused var

---
 .../src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 9654fd3c1439..b9674ab39d47 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -905,8 +905,6 @@ public void testReplaceAllOverEternitySegment()
                                                  .dataSource("foo")
                                                  .build();
 
-    PartitionsSpec partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null);
-
     Mockito.doReturn(ImmutableSet.of(existingDataSegment))
            .when(testTaskActionClient)
            .submit(ArgumentMatchers.isA(RetrieveUsedSegmentsAction.class));

From 1a0517cc02142b9d4fef7346f2178907a9ae5d17 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 11:08:21 +0530
Subject: [PATCH 11/26] Fix compilation errors due to junit5 migration

---
 .../apache/druid/msq/exec/MSQReplaceTest.java | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 10d34c402a26..3743a42eb073 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -75,7 +75,7 @@
 public class MSQReplaceTest extends MSQTestBase
 {
 
-  private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "WITH_REPLACE_LOCK_AND_COMPACTION_STATE";
+  private static final String WITH_REPLACE_LOCK_AND_COMPACTION_STATE = "with_replace_lock_and_compaction_state";
   private static final Map<String, Object> QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE =
       ImmutableMap.<String, Object>builder()
                   .putAll(DEFAULT_MSQ_CONTEXT)
@@ -181,7 +181,7 @@ public void testReplaceOnFooWithAll(String contextName, Map<String, Object> cont
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.DAY
                          )
@@ -238,7 +238,7 @@ public void testReplaceOnFooWithWhere(String contextName, Map<String, Object> co
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.DAY
                          )
@@ -321,7 +321,7 @@ public void testReplaceOnFoo1WithAllExtern(String contextName, Map<String, Objec
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new LongDimensionSchema("cnt")),
                              GranularityType.HOUR
                          )
@@ -395,7 +395,7 @@ public void testReplaceOnFoo1WithWhereExtern(String contextName, Map<String, Obj
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new StringDimensionSchema("user")),
                              GranularityType.HOUR
                          )
@@ -475,7 +475,7 @@ public void testReplaceSegmentEntireTable(String contextName, Map<String, Object
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.ALL
                          )
@@ -566,7 +566,7 @@ public void testReplaceSegmentsRepartitionTable(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -647,7 +647,7 @@ public void testReplaceWithWhereClause(String contextName, Map<String, Object> c
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -731,7 +731,7 @@ public void testReplaceWhereClauseLargerThanData(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -789,8 +789,6 @@ public void testReplaceTimeChunks(String contextName, Map<String, Object> contex
                                                        .version(MSQTestTaskActionClient.VERSION)
                                                        .size(1)
                                                        .build();
-
-
     Mockito.doReturn(ImmutableSet.of(existingDataSegment))
            .when(testTaskActionClient)
            .submit(new RetrieveUsedSegmentsAction(
@@ -821,7 +819,7 @@ public void testReplaceTimeChunks(String contextName, Map<String, Object> contex
                          ))
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -884,7 +882,7 @@ public void testReplaceTimeChunksLargerThanData(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -944,7 +942,7 @@ public void testReplaceAllOverEternitySegment(String contextName, Map<String, Ob
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema(
                                  "m1")),
                              GranularityType.MONTH
@@ -975,7 +973,7 @@ public void testReplaceOnFoo1Range(String contextName, Map<String, Object> conte
                      .setExpectedResultRows(expectedFooRows())
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.singletonList("dim1"),
+                             context, Collections.singletonList("dim1"),
                              Arrays.asList(
                                  new StringDimensionSchema("dim1"),
                                  new LongDimensionSchema("cnt")
@@ -1057,7 +1055,7 @@ public void testReplaceSegmentsInsertIntoNewTable(String contextName, Map<String
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.ALL
                          )
@@ -1065,8 +1063,9 @@ public void testReplaceSegmentsInsertIntoNewTable(String contextName, Map<String
                      .verifyResults();
   }
 
-  @Test
-  public void testReplaceSegmentsWithQuarterSegmentGranularity()
+  @MethodSource("data")
+  @ParameterizedTest(name = "{index}:with context {0}")
+  public void testReplaceSegmentsWithQuarterSegmentGranularity(String contextName, Map<String, Object> context)
   {
     RowSignature rowSignature = RowSignature.builder()
                                             .add("__time", ColumnType.LONG)
@@ -1111,7 +1110,7 @@ public void testReplaceSegmentsWithQuarterSegmentGranularity()
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Arrays.asList(new FloatDimensionSchema("m1"), new DoubleDimensionSchema("m2")),
                              GranularityType.QUARTER
                          )
@@ -1196,7 +1195,7 @@ public void testReplaceUnnestSegmentEntireTable(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new StringDimensionSchema("d")),
                              GranularityType.ALL
                          )
@@ -1267,7 +1266,7 @@ public void testReplaceUnnestWithVirtualColumnSegmentEntireTable(String contextN
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.emptyList(),
+                             context, Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("d")),
                              GranularityType.ALL
                          )
@@ -1349,7 +1348,7 @@ public void testReplaceUnnestSegmentWithTimeFilter(String contextName, Map<Strin
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             Collections.singletonList("d"),
+                             context, Collections.singletonList("d"),
                              Collections.singletonList(new StringDimensionSchema("d")),
                              GranularityType.DAY
                          )
@@ -1830,8 +1829,9 @@ private List<Object[]> expectedFooRows()
     ));
     return expectedRows;
   }
-  private CompactionState expectedCompactionState(List<String> partitionDimensions, List<DimensionSchema> dimensions,
-                                                  GranularityType segmentGranularity
+  private CompactionState expectedCompactionState(
+      Map<String, Object> context, List<String> partitionDimensions, List<DimensionSchema> dimensions,
+      GranularityType segmentGranularity
   ){
     if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){
       return null;

From f40252316d02392fe4b00610bef6cd47a2f15b28 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 18:31:03 +0530
Subject: [PATCH 12/26] Separate compactionStateAnnotationFunction to a common
 place, and other minor changes.

---
 .../apache/druid/msq/exec/ControllerImpl.java | 117 +++++++++---------
 .../common/task/AbstractBatchIndexTask.java   |   8 +-
 .../druid/indexing/common/task/IndexTask.java |   2 +-
 .../parallel/ParallelIndexSupervisorTask.java |   2 +-
 .../druid/timeline/CompactionState.java       |  28 +++++
 5 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index aba6fb2d1097..d7c92bdc097f 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1729,10 +1729,11 @@ private void publishSegmentsIfNeeded(
 
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
 
-      Boolean storeCompactionState = (Boolean) task.getQuerySpec()
-                                                   .getQuery()
-                                                   .getContext()
-                                                   .get(Tasks.STORE_COMPACTION_STATE_KEY);
+      Boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext())
+                                                 .getBoolean(
+                                                     Tasks.STORE_COMPACTION_STATE_KEY,
+                                                     Tasks.DEFAULT_STORE_COMPACTION_STATE
+                                                 );
 
       if (storeCompactionState == null) {
         storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE;
@@ -1740,20 +1741,26 @@ private void publishSegmentsIfNeeded(
       }
 
       if (!segments.isEmpty() && storeCompactionState) {
-        DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel
-            .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema();
 
+        DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
+        if (!destination.isReplaceTimeChunks()) {
+          // Only do this for replace queries, whether originating directly or via compaction
+          log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryDef.getQueryId());
+        } else {
 
-        ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec();
+          DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel
+              .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema();
 
-        compactionStateAnnotateFunction = compactionStateAnnotateFunction(
-            task(),
-            context.jsonMapper(),
-            dataSchema,
-            shardSpec,
-            queryDef.getQueryId()
-        );
+          ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec();
 
+          compactionStateAnnotateFunction = prepareCompactionStateAnnotateFunction(
+              task(),
+              context.jsonMapper(),
+              dataSchema,
+              shardSpec,
+              queryDef.getQueryId()
+          );
+        }
       }
 
       log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
@@ -1761,24 +1768,34 @@ private void publishSegmentsIfNeeded(
     }
   }
 
-  public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+  public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStateAnnotateFunction(
       MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId
   )
   {
-    DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
-    if (!destination.isReplaceTimeChunks()) {
-      // Only do this for replace queries, whether originating directly or via compaction
-      log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryId);
+    PartitionsSpec partitionSpec;
+
+    if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
+         || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+      List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
+      partitionSpec = new DimensionRangePartitionsSpec(
+          task.getQuerySpec().getTuningConfig().getRowsPerSegment(),
+          null,
+          partitionDimensions,
+          false
+      );
+
+    } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
+      partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null);
+    } else {
+      log.error(
+          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].",
+          queryId, shardSpec.getType()
+      );
       return Function.identity();
     }
 
-    // In case of MSQ, the segment granularity comes as the context parameter SQL_INSERT_SEGMENT_GRANULARITY
-    Granularity segmentGranularity = QueryKitUtils.getSegmentGranularityFromContext(
-        jsonMapper,
-        task.getQuerySpec()
-            .getQuery()
-            .getContext()
-    );
+    Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec()
+                                                                     .getDestination()).getSegmentGranularity();
 
     GranularitySpec granularitySpec = new UniformGranularitySpec(
         segmentGranularity,
@@ -1800,31 +1817,12 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
                                    {
                                    });
 
-    PartitionsSpec partitionSpec;
-
-    if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
-         || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
-      List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
-      partitionSpec = new DimensionRangePartitionsSpec(
-          task.getQuerySpec().getTuningConfig().getRowsPerSegment(),
-          null,
-          partitionDimensions,
-          false
-      );
-
-    } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
-      partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null);
-    } else {
-      log.error(
-          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].",
-          queryId, shardSpec.getType()
-      );
-      return Function.identity();
-    }
 
     IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
 
-    final CompactionState compactionState = new CompactionState(
+    log.info("Query [%s] storing compaction state in segments.", queryId);
+
+    return CompactionState.compactionStateAnnotateFunction(
         partitionSpec,
         dimensionsSpec,
         metricsSpec,
@@ -1832,13 +1830,6 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
         indexSpec.asMap(jsonMapper),
         granularitySpec.asMap(jsonMapper)
     );
-
-    log.info("Query [%s] storing compaction state in segments.", queryId);
-
-    return segments -> segments
-        .stream()
-        .map(s -> s.withLastCompactionState(compactionState))
-        .collect(Collectors.toSet());
   }
 
   /**
@@ -1901,7 +1892,8 @@ private static QueryDefinition makeQueryDefinition(
       }
     } else {
       shuffleSpecFactory = querySpec.getDestination()
-                                    .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context()));
+                                    .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery()
+                                                                                                          .context()));
       queryToPlan = querySpec.getQuery();
     }
 
@@ -2003,9 +1995,11 @@ private static QueryDefinition makeQueryDefinition(
         if (filesIterator.hasNext()) {
           throw DruidException.forPersona(DruidException.Persona.USER)
                               .ofCategory(DruidException.Category.RUNTIME_FAILURE)
-                              .build("Found files at provided export destination[%s]. Export is only allowed to "
-                                     + "an empty path. Please provide an empty path/subdirectory or move the existing files.",
-                                     exportStorageProvider.getBasePath());
+                              .build(
+                                  "Found files at provided export destination[%s]. Export is only allowed to "
+                                  + "an empty path. Please provide an empty path/subdirectory or move the existing files.",
+                                  exportStorageProvider.getBasePath()
+                              );
         }
       }
       catch (IOException e) {
@@ -2037,7 +2031,6 @@ private static QueryDefinition makeQueryDefinition(
   }
 
 
-
   private static DataSchema generateDataSchema(
       MSQSpec querySpec,
       RowSignature querySignature,
@@ -2486,7 +2479,9 @@ private static MSQStatusReport makeStatusReport(
       workerStatsMap = taskLauncher.getWorkerStats();
     }
 
-    SegmentLoadStatusFetcher.SegmentLoadWaiterStatus status = segmentLoadWaiter == null ? null : segmentLoadWaiter.status();
+    SegmentLoadStatusFetcher.SegmentLoadWaiterStatus status = segmentLoadWaiter == null
+                                                              ? null
+                                                              : segmentLoadWaiter.status();
 
     return new MSQStatusReport(
         taskState,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
index 4a76e688fb7a..fcad74070cb2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
@@ -583,7 +583,7 @@ public static boolean isGuaranteedRollup(
     return tuningConfig.isForceGuaranteedRollup();
   }
 
-  public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+  public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStateAnnotateFunction(
       boolean storeCompactionState,
       TaskToolbox toolbox,
       IngestionSpec ingestionSpec
@@ -604,7 +604,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
                                  ? null
                                  : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference<List<Object>>() {});
 
-      final CompactionState compactionState = new CompactionState(
+      return CompactionState.compactionStateAnnotateFunction(
           tuningConfig.getPartitionsSpec(),
           dimensionsSpec,
           metricsSpec,
@@ -612,10 +612,6 @@ public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnota
           tuningConfig.getIndexSpec().asMap(toolbox.getJsonMapper()),
           granularitySpec.asMap(toolbox.getJsonMapper())
       );
-      return segments -> segments
-          .stream()
-          .map(s -> s.withLastCompactionState(compactionState))
-          .collect(Collectors.toSet());
     } else {
       return Function.identity();
     }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
index 50e13a93c0be..a23e918cbda9 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
@@ -991,7 +991,7 @@ private TaskStatus generateAndPublishSegments(
           Tasks.DEFAULT_STORE_COMPACTION_STATE
       );
       final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction =
-          compactionStateAnnotateFunction(
+          prepareCompactionStateAnnotateFunction(
               storeCompactionState,
               toolbox,
               ingestionSchema
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index db497dff5ecf..45bd9518712e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -1149,7 +1149,7 @@ private void publishSegments(
         Tasks.STORE_COMPACTION_STATE_KEY,
         Tasks.DEFAULT_STORE_COMPACTION_STATE
     );
-    final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = compactionStateAnnotateFunction(
+    final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = prepareCompactionStateAnnotateFunction(
         storeCompactionState,
         toolbox,
         ingestionSchema
diff --git a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
index cb9ddf1a93b5..43d53ffe3312 100644
--- a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
+++ b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
@@ -27,6 +27,9 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 /**
  * This class describes what compaction task spec was used to create a given segment.
@@ -146,4 +149,29 @@ public String toString()
            ", metricsSpec=" + metricsSpec +
            '}';
   }
+
+  public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+      PartitionsSpec partitionsSpec,
+      DimensionsSpec dimensionsSpec,
+      List<Object> metricsSpec,
+      Map<String, Object> transformSpec,
+      Map<String, Object> indexSpec,
+      Map<String, Object> granularitySpec
+  )
+  {
+    CompactionState compactionState = new CompactionState(
+        partitionsSpec,
+        dimensionsSpec,
+        metricsSpec,
+        transformSpec,
+        indexSpec,
+        granularitySpec
+    );
+
+    return segments -> segments
+        .stream()
+        .map(s -> s.withLastCompactionState(compactionState))
+        .collect(Collectors.toSet());
+  }
+
 }

From 13f2c9903e2d85c2ffae46f2df0b9e6c0e5c6539 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 18:43:50 +0530
Subject: [PATCH 13/26] Checkstyle fixes

---
 .../java/org/apache/druid/msq/exec/MSQReplaceTest.java   | 9 ++++++---
 .../test/java/org/apache/druid/msq/test/MSQTestBase.java | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 3743a42eb073..ef625cc813e3 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -1829,11 +1829,14 @@ private List<Object[]> expectedFooRows()
     ));
     return expectedRows;
   }
+
   private CompactionState expectedCompactionState(
       Map<String, Object> context, List<String> partitionDimensions, List<DimensionSchema> dimensions,
       GranularityType segmentGranularity
-  ){
-    if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY) || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))){
+  )
+  {
+    if (!context.containsKey(Tasks.STORE_COMPACTION_STATE_KEY)
+        || !((Boolean) context.get(Tasks.STORE_COMPACTION_STATE_KEY))) {
       return null;
     }
     PartitionsSpec partitionsSpec;
@@ -1842,7 +1845,7 @@ private CompactionState expectedCompactionState(
 
     } else {
       partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null,
-                                                       partitionDimensions, false
+                                                        partitionDimensions, false
       );
     }
     DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder().
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
index ad4035239ccd..7bc2cdda32a6 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestBase.java
@@ -1268,7 +1268,7 @@ public void verifyResults()
         // SegmentGeneratorFrameProcessorFactory. We can get the tombstone segment ids published by taking a set
         // difference of all the segments published with the segments that are created by the SegmentGeneratorFrameProcessorFactory
         if (!testTaskActionClient.getPublishedSegments().isEmpty()) {
-          if (expectedLastCompactionState != null){
+          if (expectedLastCompactionState != null) {
             CompactionState compactionState = testTaskActionClient.getPublishedSegments().stream().findFirst().get()
                                                                   .getLastCompactionState();
             Assert.assertEquals(expectedLastCompactionState, compactionState);

From 3b57dfae83cf17f5fbcc9c0059a5a9a7ab7aab4c Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 19:09:36 +0530
Subject: [PATCH 14/26] Try again

---
 .../apache/druid/msq/exec/ControllerImpl.java |  6 +-
 .../apache/druid/msq/exec/MSQReplaceTest.java | 55 ++++++++++++-------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index d7c92bdc097f..fd8bce7dc3fe 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1769,7 +1769,11 @@ private void publishSegmentsIfNeeded(
   }
 
   public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStateAnnotateFunction(
-      MSQControllerTask task, ObjectMapper jsonMapper, DataSchema dataSchema, ShardSpec shardSpec, String queryId
+      MSQControllerTask task,
+      ObjectMapper jsonMapper,
+      DataSchema dataSchema,
+      ShardSpec shardSpec,
+      String queryId
   )
   {
     PartitionsSpec partitionSpec;
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index ef625cc813e3..945c5bc227f0 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -181,7 +181,8 @@ public void testReplaceOnFooWithAll(String contextName, Map<String, Object> cont
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.DAY
                          )
@@ -238,7 +239,8 @@ public void testReplaceOnFooWithWhere(String contextName, Map<String, Object> co
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.DAY
                          )
@@ -321,7 +323,8 @@ public void testReplaceOnFoo1WithAllExtern(String contextName, Map<String, Objec
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new LongDimensionSchema("cnt")),
                              GranularityType.HOUR
                          )
@@ -395,7 +398,8 @@ public void testReplaceOnFoo1WithWhereExtern(String contextName, Map<String, Obj
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new StringDimensionSchema("user")),
                              GranularityType.HOUR
                          )
@@ -475,7 +479,8 @@ public void testReplaceSegmentEntireTable(String contextName, Map<String, Object
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.ALL
                          )
@@ -566,7 +571,8 @@ public void testReplaceSegmentsRepartitionTable(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -647,7 +653,8 @@ public void testReplaceWithWhereClause(String contextName, Map<String, Object> c
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -731,7 +738,8 @@ public void testReplaceWhereClauseLargerThanData(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -819,7 +827,8 @@ public void testReplaceTimeChunks(String contextName, Map<String, Object> contex
                          ))
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -882,7 +891,8 @@ public void testReplaceTimeChunksLargerThanData(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
@@ -942,7 +952,8 @@ public void testReplaceAllOverEternitySegment(String contextName, Map<String, Ob
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema(
                                  "m1")),
                              GranularityType.MONTH
@@ -973,7 +984,8 @@ public void testReplaceOnFoo1Range(String contextName, Map<String, Object> conte
                      .setExpectedResultRows(expectedFooRows())
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.singletonList("dim1"),
+                             context,
+                             Collections.singletonList("dim1"),
                              Arrays.asList(
                                  new StringDimensionSchema("dim1"),
                                  new LongDimensionSchema("cnt")
@@ -1055,7 +1067,8 @@ public void testReplaceSegmentsInsertIntoNewTable(String contextName, Map<String
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.ALL
                          )
@@ -1110,7 +1123,8 @@ public void testReplaceSegmentsWithQuarterSegmentGranularity(String contextName,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Arrays.asList(new FloatDimensionSchema("m1"), new DoubleDimensionSchema("m2")),
                              GranularityType.QUARTER
                          )
@@ -1195,7 +1209,8 @@ public void testReplaceUnnestSegmentEntireTable(String contextName, Map<String,
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new StringDimensionSchema("d")),
                              GranularityType.ALL
                          )
@@ -1266,7 +1281,8 @@ public void testReplaceUnnestWithVirtualColumnSegmentEntireTable(String contextN
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.emptyList(),
+                             context,
+                             Collections.emptyList(),
                              Collections.singletonList(new FloatDimensionSchema("d")),
                              GranularityType.ALL
                          )
@@ -1348,7 +1364,8 @@ public void testReplaceUnnestSegmentWithTimeFilter(String contextName, Map<Strin
                      )
                      .setExpectedLastCompactionState(
                          expectedCompactionState(
-                             context, Collections.singletonList("d"),
+                             context,
+                             Collections.singletonList("d"),
                              Collections.singletonList(new StringDimensionSchema("d")),
                              GranularityType.DAY
                          )
@@ -1848,8 +1865,8 @@ private CompactionState expectedCompactionState(
                                                         partitionDimensions, false
       );
     }
-    DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder().
-        setDimensions(dimensions)
+    DimensionsSpec dimensionsSpec = new DimensionsSpec.Builder()
+        .setDimensions(dimensions)
         .setDimensionExclusions(Collections.singletonList(
             "__time"))
         .build();

From 454cb0905ab57b337a8621a4caab568c81336a05 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 19:49:04 +0530
Subject: [PATCH 15/26] Update doc

---
 docs/multi-stage-query/reference.md | 39 +++++++++++++++--------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 0b10e14b50f9..1d64d29c34f7 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -346,26 +346,27 @@ If you're using the web console, you can specify the context parameters through
 
 The following table lists the context parameters for the MSQ task engine:
 
-| Parameter | Description | Default value |
-|---|---|---|
-| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 |
-| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul> | `max` |
-| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` |
-| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
-| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` |
-| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 |
+| Parameter | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Default value |
+|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
+| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority.                                                                                                                                                                                                                                                                                                                                        | 2 |
+| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul>                                                                             | `max` |
+| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md).                                                                                                                                                                                                                                                                                             | `true` |
+| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details.                                                                                                                                                                                              | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
+| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                  | `broadcast` |
+| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage.                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 100,000 |
 | `segmentSortOrder` | INSERT or REPLACE<br /><br />Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.<br /><br />You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list |
-| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 |
-| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 |
-| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
-| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br /> | `false` |
-| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` |
-| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section. | `taskReport` |
-| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
-| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
-| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
-| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
-| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
+| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 0 |
+| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 3,000,000 |
+| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
+| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br />                                                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
+| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false` |
+| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section.                                                                                                                                                                                                                                                                                                                  | `taskReport` |
+| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded.                                                                                                                                                                                                                                                                                                              | `false` |
+| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks.                                                                                                                                                                                                                                                                                                                                                                                                               | `NONE` |
+| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 100000 |
+| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.                                                                                                                                                                                                                                                                                                                                                   | empty list |
+| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault.                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false` |
+| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false` |
 
 ## Joins
 

From e59c1bcabe3cbc8b16f7eefff13177957547d29d Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 22 Mar 2024 19:54:46 +0530
Subject: [PATCH 16/26] Revert additional indentation changes

---
 docs/multi-stage-query/reference.md | 40 ++++++++++++++---------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 1d64d29c34f7..505294edefa5 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through
 
 The following table lists the context parameters for the MSQ task engine:
 
-| Parameter | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Default value |
-|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
-| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority.                                                                                                                                                                                                                                                                                                                                        | 2 |
-| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul>                                                                             | `max` |
-| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md).                                                                                                                                                                                                                                                                                             | `true` |
-| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details.                                                                                                                                                                                              | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
-| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                  | `broadcast` |
-| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage.                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 100,000 |
+| Parameter | Description | Default value |
+|---|---|---|
+| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 |
+| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul> | `max` |
+| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` |
+| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
+| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` |
+| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 |
 | `segmentSortOrder` | INSERT or REPLACE<br /><br />Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.<br /><br />You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list |
-| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 0 |
-| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 3,000,000 |
-| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
-| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br />                                                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
-| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false` |
-| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section.                                                                                                                                                                                                                                                                                                                  | `taskReport` |
-| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded.                                                                                                                                                                                                                                                                                                              | `false` |
-| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks.                                                                                                                                                                                                                                                                                                                                                                                                               | `NONE` |
-| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 100000 |
-| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.                                                                                                                                                                                                                                                                                                                                                   | empty list |
-| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault.                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false` |
-| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | `false` |
+| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 |
+| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 |
+| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
+| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br /> | `false` |
+| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` |
+| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section. | `taskReport` |
+| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
+| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
+| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
+| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
+| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
+| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` |
 
 ## Joins
 

From cbc582d236c256a8949940375619711a08b0b82c Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Sat, 23 Mar 2024 12:39:58 +0530
Subject: [PATCH 17/26] Resolve coverage test for druid-processing

---
 docs/multi-stage-query/reference.md           |  2 +-
 .../apache/druid/msq/exec/ControllerImpl.java |  7 +--
 .../druid/timeline/DataSegmentTest.java       | 43 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 505294edefa5..9bb4388e208f 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -366,7 +366,7 @@ The following table lists the context parameters for the MSQ task engine:
 | `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
 | `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
 | `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
-| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. | `false` |
+| `storeCompactionState`  | REPLACE<br /><br /> REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [storeCompactionState](../ingestion/tasks.md#context-parameters) task context flag. | `false` |
 
 ## Joins
 
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index fd8bce7dc3fe..eb8e7ba29ea2 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1729,17 +1729,12 @@ private void publishSegmentsIfNeeded(
 
       Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
 
-      Boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext())
+      boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext())
                                                  .getBoolean(
                                                      Tasks.STORE_COMPACTION_STATE_KEY,
                                                      Tasks.DEFAULT_STORE_COMPACTION_STATE
                                                  );
 
-      if (storeCompactionState == null) {
-        storeCompactionState = Tasks.DEFAULT_STORE_COMPACTION_STATE;
-
-      }
-
       if (!segments.isEmpty() && storeCompactionState) {
 
         DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
index 97c3d7a2aaac..aebdb9493000 100644
--- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
+++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
@@ -23,6 +23,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.RangeSet;
 import org.apache.druid.data.input.impl.DimensionsSpec;
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
@@ -47,6 +48,8 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
 
 /**
  */
@@ -361,10 +364,48 @@ public void testWithLastCompactionState()
                                            .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString())
                                            .shardSpec(getShardSpec(7))
                                            .size(0)
-                                           .build();
+                                            .build();
     Assert.assertEquals(segment1, segment2.withLastCompactionState(compactionState));
   }
 
+  @Test
+  public void testAnnotateWithLastCompactionState()
+  {
+    final CompactionState compactionState = new CompactionState(
+        new DynamicPartitionsSpec(null, null),
+        new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
+        ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")),
+        ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")),
+        Collections.singletonMap("test", "map"),
+        Collections.singletonMap("test2", "map2")
+    );
+
+    final Function<Set<DataSegment>, Set<DataSegment>> annotateFn = CompactionState.compactionStateAnnotateFunction(
+        new DynamicPartitionsSpec(null, null),
+        new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
+        ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")),
+        ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")),
+        Collections.singletonMap("test", "map"),
+        Collections.singletonMap("test2", "map2")
+    );
+    final DataSegment segment1 = DataSegment.builder()
+                                            .dataSource("foo")
+                                            .interval(Intervals.of("2012-01-01/2012-01-02"))
+                                            .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString())
+                                            .shardSpec(getShardSpec(7))
+                                            .size(0)
+                                            .lastCompactionState(compactionState)
+                                            .build();
+    final DataSegment segment2 = DataSegment.builder()
+                                            .dataSource("foo")
+                                            .interval(Intervals.of("2012-01-01/2012-01-02"))
+                                            .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString())
+                                            .shardSpec(getShardSpec(7))
+                                            .size(0)
+                                            .build();
+    Assert.assertEquals(ImmutableSet.of(segment1), annotateFn.apply(ImmutableSet.of(segment2)));
+  }
+
   @Test
   public void testTombstoneType()
   {

From 316e378376fe4b9eee6c49cf02c966c861a39edc Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 1 Apr 2024 09:35:34 +0530
Subject: [PATCH 18/26] Suppress spelling error

---
 website/.spelling | 1 +
 1 file changed, 1 insertion(+)

diff --git a/website/.spelling b/website/.spelling
index 37e43c9d0c0b..e013d4eebc56 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -1328,6 +1328,7 @@ valueFormat
 IOConfig
 compactionTask
 compactionTasks
+storeCompactionState
 ingestSegmentFirehose
 numShards
 IngestSegment

From c87ff9fe6aa48673248c4daa7da855965fdf7854 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 2 Apr 2024 11:05:04 +0530
Subject: [PATCH 19/26] Address review comments

---
 docs/multi-stage-query/reference.md           | 40 +++++-----
 .../apache/druid/msq/exec/ControllerImpl.java | 75 +++++++++++--------
 .../apache/druid/msq/exec/MSQReplaceTest.java | 44 +++++------
 .../common/task/AbstractBatchIndexTask.java   |  4 +-
 .../indexing/common/task/CompactionTask.java  |  5 +-
 .../druid/indexing/common/task/IndexTask.java |  2 +-
 .../parallel/ParallelIndexSupervisorTask.java |  2 +-
 .../partitions/DynamicPartitionsSpec.java     |  4 +
 .../druid/timeline/CompactionState.java       |  2 +-
 .../druid/timeline/DataSegmentTest.java       | 47 ++++++++----
 website/.spelling                             |  1 -
 11 files changed, 127 insertions(+), 99 deletions(-)

diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 9bb4388e208f..0220a2ebca8e 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through
 
 The following table lists the context parameters for the MSQ task engine:
 
-| Parameter | Description | Default value |
-|---|---|---|
-| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 |
-| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul> | `max` |
-| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` |
-| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
-| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` |
-| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 |
+| Parameter | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Default value |
+|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
+| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority.                                                                                                                                                                                                                                                                                                                                        | 2 |
+| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul>                                                                             | `max` |
+| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md).                                                                                                                                                                                                                                                                                             | `true` |
+| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details.                                                                                                                                                                                              | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
+| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                  | `broadcast` |
+| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage.                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 100,000 |
 | `segmentSortOrder` | INSERT or REPLACE<br /><br />Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.<br /><br />You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list |
-| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 |
-| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 |
-| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
-| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br /> | `false` |
-| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` |
-| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section. | `taskReport` |
-| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
-| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
-| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
-| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
-| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
-| `storeCompactionState`  | REPLACE<br /><br /> REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [storeCompactionState](../ingestion/tasks.md#context-parameters) task context flag. | `false` |
+| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 0 |
+| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 3,000,000 |
+| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
+| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br />                                                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
+| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false` |
+| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section.                                                                                                                                                                                                                                                                                                                  | `taskReport` |
+| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded.                                                                                                                                                                                                                                                                                                              | `false` |
+| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks.                                                                                                                                                                                                                                                                                                                                                                                                               | `NONE` |
+| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 100000 |
+| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.                                                                                                                                                                                                                                                                                                                                                   | empty list |
+| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault.                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false` |
+| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag.                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
 
 ## Joins
 
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index eb8e7ba29ea2..fd2408b2b4c1 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -67,6 +67,7 @@
 import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.LockGranularity;
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskLockType;
@@ -219,6 +220,7 @@
 import org.apache.druid.timeline.partition.NumberedPartialShardSpec;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
 import org.apache.druid.timeline.partition.ShardSpec;
+import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
 import org.apache.druid.utils.CollectionUtils;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;
@@ -1725,9 +1727,7 @@ private void publishSegmentsIfNeeded(
 
       //noinspection unchecked
       @SuppressWarnings("unchecked")
-      final Set<DataSegment> segments = (Set<DataSegment>) queryKernel.getResultObjectForStage(finalStageId);
-
-      Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = Function.identity();
+      Set<DataSegment> segments = (Set<DataSegment>) queryKernel.getResultObjectForStage(finalStageId);
 
       boolean storeCompactionState = QueryContext.of(task.getQuerySpec().getQuery().getContext())
                                                  .getBoolean(
@@ -1736,34 +1736,35 @@ private void publishSegmentsIfNeeded(
                                                  );
 
       if (!segments.isEmpty() && storeCompactionState) {
-
         DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
         if (!destination.isReplaceTimeChunks()) {
-          // Only do this for replace queries, whether originating directly or via compaction
-          log.error("storeCompactionState flag set for a non-REPLACE query [%s]", queryDef.getQueryId());
+          // Store compaction state only for replace queries.
+          log.error(
+              "storeCompactionState flag set for a non-REPLACE query [%s]. Ignoring the flag for now.",
+              queryDef.getQueryId()
+          );
         } else {
-
           DataSchema dataSchema = ((SegmentGeneratorFrameProcessorFactory) queryKernel
               .getStageDefinition(finalStageId).getProcessorFactory()).getDataSchema();
 
           ShardSpec shardSpec = segments.stream().findFirst().get().getShardSpec();
 
-          compactionStateAnnotateFunction = prepareCompactionStateAnnotateFunction(
+          Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction = addCompactionStateToSegments(
               task(),
               context.jsonMapper(),
               dataSchema,
               shardSpec,
               queryDef.getQueryId()
           );
+          segments = compactionStateAnnotateFunction.apply(segments);
         }
       }
-
       log.info("Query [%s] publishing %d segments.", queryDef.getQueryId(), segments.size());
-      publishAllSegments(compactionStateAnnotateFunction.apply(segments));
+      publishAllSegments(segments);
     }
   }
 
-  public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStateAnnotateFunction(
+  private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateToSegments(
       MSQControllerTask task,
       ObjectMapper jsonMapper,
       DataSchema dataSchema,
@@ -1771,30 +1772,41 @@ public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStat
       String queryId
   )
   {
+    final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig();
     PartitionsSpec partitionSpec;
 
-    if ((Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)
-         || Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE))) {
+    // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task.
+    if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) {
+      String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension();
+      partitionSpec = new SingleDimensionPartitionsSpec(
+          tuningConfig.getRowsPerSegment(),
+          null,
+          partitionDimension,
+          false
+      );
+    } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) {
       List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
       partitionSpec = new DimensionRangePartitionsSpec(
-          task.getQuerySpec().getTuningConfig().getRowsPerSegment(),
+          tuningConfig.getRowsPerSegment(),
           null,
           partitionDimensions,
           false
       );
-
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
-      partitionSpec = new DynamicPartitionsSpec(task.getQuerySpec().getTuningConfig().getRowsPerSegment(), null);
+      // Using Long.MAX_VALUE for MaxTotalRows as that is the default used by a compaction task.
+      partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS);
     } else {
-      log.error(
-          "Query [%s] skipping storing compaction state in segments as shard spec of unsupported type [%s].",
-          queryId, shardSpec.getType()
-      );
-      return Function.identity();
+      throw new MSQException(
+          UnknownFault.forMessage(
+              StringUtils.format(
+                  "Query[%s] cannot store compaction state in segments as shard spec of unsupported type[%s].",
+                  queryId,
+                  shardSpec.getType()
+              )));
     }
 
-    Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec()
-                                                                     .getDestination()).getSegmentGranularity();
+    Granularity segmentGranularity = ((DataSourceMSQDestination) task.getQuerySpec().getDestination())
+        .getSegmentGranularity();
 
     GranularitySpec granularitySpec = new UniformGranularitySpec(
         segmentGranularity,
@@ -1806,9 +1818,9 @@ public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStat
     DimensionsSpec dimensionsSpec = dataSchema.getDimensionsSpec();
     Map<String, Object> transformSpec = TransformSpec.NONE.equals(dataSchema.getTransformSpec())
                                         ? null
-                                        : new ClientCompactionTaskTransformSpec(dataSchema.getTransformSpec()
-                                                                                          .getFilter()).asMap(
-                                            jsonMapper);
+                                        : new ClientCompactionTaskTransformSpec(
+                                            dataSchema.getTransformSpec().getFilter()
+                                        ).asMap(jsonMapper);
     List<Object> metricsSpec = dataSchema.getAggregators() == null
                                ? null
                                : jsonMapper.convertValue(
@@ -1817,11 +1829,11 @@ public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStat
                                    });
 
 
-    IndexSpec indexSpec = task.getQuerySpec().getTuningConfig().getIndexSpec();
+    IndexSpec indexSpec = tuningConfig.getIndexSpec();
 
-    log.info("Query [%s] storing compaction state in segments.", queryId);
+    log.info("Query[%s] storing compaction state in segments.", queryId);
 
-    return CompactionState.compactionStateAnnotateFunction(
+    return CompactionState.addCompactionStateToSegments(
         partitionSpec,
         dimensionsSpec,
         metricsSpec,
@@ -1891,8 +1903,9 @@ private static QueryDefinition makeQueryDefinition(
       }
     } else {
       shuffleSpecFactory = querySpec.getDestination()
-                                    .getShuffleSpecFactory(MultiStageQueryContext.getRowsPerPage(querySpec.getQuery()
-                                                                                                          .context()));
+                                    .getShuffleSpecFactory(
+                                        MultiStageQueryContext.getRowsPerPage(querySpec.getQuery().context())
+                                    );
       queryToPlan = querySpec.getQuery();
     }
 
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 945c5bc227f0..be15bfdcd996 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -83,10 +83,7 @@ public class MSQReplaceTest extends MSQTestBase
                       Tasks.TASK_LOCK_TYPE,
                       StringUtils.toLowerCase(TaskLockType.REPLACE.name())
                   )
-                  .put(
-                      Tasks.STORE_COMPACTION_STATE_KEY,
-                      true
-                  )
+                  .put( Tasks.STORE_COMPACTION_STATE_KEY, true)
                   .build();
 
   public static Collection<Object[]> data()
@@ -954,8 +951,7 @@ public void testReplaceAllOverEternitySegment(String contextName, Map<String, Ob
                          expectedCompactionState(
                              context,
                              Collections.emptyList(),
-                             Collections.singletonList(new FloatDimensionSchema(
-                                 "m1")),
+                             Collections.singletonList(new FloatDimensionSchema("m1")),
                              GranularityType.MONTH
                          )
                      )
@@ -1095,21 +1091,23 @@ public void testReplaceSegmentsWithQuarterSegmentGranularity(String contextName,
                      .setExpectedRowSignature(rowSignature)
                      .setQueryContext(context)
                      .setExpectedDestinationIntervals(Intervals.ONLY_ETERNITY)
-                     .setExpectedSegment(ImmutableSet.of(SegmentId.of(
-                                                             "foobar",
-                                                             Intervals.of(
-                                                                 "2000-01-01T00:00:00.000Z/2000-04-01T00:00:00.000Z"),
-                                                             "test",
-                                                             0
-                                                         ),
-                                                         SegmentId.of(
-                                                             "foobar",
-                                                             Intervals.of(
-                                                                 "2001-01-01T00:00:00.000Z/2001-04-01T00:00:00.000Z"),
-                                                             "test",
-                                                             0
-                                                         )
-                                         )
+                     .setExpectedSegment(
+                         ImmutableSet.of(
+                             SegmentId.of(
+                                 "foobar",
+                                 Intervals.of(
+                                     "2000-01-01T00:00:00.000Z/2000-04-01T00:00:00.000Z"),
+                                 "test",
+                                 0
+                             ),
+                             SegmentId.of(
+                                 "foobar",
+                                 Intervals.of(
+                                     "2001-01-01T00:00:00.000Z/2001-04-01T00:00:00.000Z"),
+                                 "test",
+                                 0
+                             )
+                         )
                      )
                      .setExpectedResultRows(
                          ImmutableList.of(
@@ -1848,7 +1846,9 @@ private List<Object[]> expectedFooRows()
   }
 
   private CompactionState expectedCompactionState(
-      Map<String, Object> context, List<String> partitionDimensions, List<DimensionSchema> dimensions,
+      Map<String, Object> context,
+      List<String> partitionDimensions,
+      List<DimensionSchema> dimensions,
       GranularityType segmentGranularity
   )
   {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
index fcad74070cb2..c4ffcb8aae41 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/AbstractBatchIndexTask.java
@@ -583,7 +583,7 @@ public static boolean isGuaranteedRollup(
     return tuningConfig.isForceGuaranteedRollup();
   }
 
-  public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStateAnnotateFunction(
+  public static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateToSegments(
       boolean storeCompactionState,
       TaskToolbox toolbox,
       IngestionSpec ingestionSpec
@@ -604,7 +604,7 @@ public static Function<Set<DataSegment>, Set<DataSegment>> prepareCompactionStat
                                  ? null
                                  : toolbox.getJsonMapper().convertValue(ingestionSpec.getDataSchema().getAggregators(), new TypeReference<List<Object>>() {});
 
-      return CompactionState.compactionStateAnnotateFunction(
+      return CompactionState.addCompactionStateToSegments(
           tuningConfig.getPartitionsSpec(),
           dimensionsSpec,
           metricsSpec,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
index 59a0a499f917..72be46785ab8 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CompactionTask.java
@@ -1219,10 +1219,7 @@ CompactionTuningConfig computeTuningConfig()
         final DynamicPartitionsSpec dynamicPartitionsSpec = (DynamicPartitionsSpec) partitionsSpec;
         partitionsSpec = new DynamicPartitionsSpec(
             dynamicPartitionsSpec.getMaxRowsPerSegment(),
-            // Setting maxTotalRows to Long.MAX_VALUE to respect the computed maxRowsPerSegment.
-            // If this is set to something too small, compactionTask can generate small segments
-            // which need to be compacted again, which in turn making auto compaction stuck in the same interval.
-            dynamicPartitionsSpec.getMaxTotalRowsOr(Long.MAX_VALUE)
+            dynamicPartitionsSpec.getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS)
         );
       }
       return newTuningConfig.withPartitionsSpec(partitionsSpec);
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
index a23e918cbda9..7822e58f40cf 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
@@ -991,7 +991,7 @@ private TaskStatus generateAndPublishSegments(
           Tasks.DEFAULT_STORE_COMPACTION_STATE
       );
       final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction =
-          prepareCompactionStateAnnotateFunction(
+          addCompactionStateToSegments(
               storeCompactionState,
               toolbox,
               ingestionSchema
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 45bd9518712e..9929f12a6c71 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -1149,7 +1149,7 @@ private void publishSegments(
         Tasks.STORE_COMPACTION_STATE_KEY,
         Tasks.DEFAULT_STORE_COMPACTION_STATE
     );
-    final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = prepareCompactionStateAnnotateFunction(
+    final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = addCompactionStateToSegments(
         storeCompactionState,
         toolbox,
         ingestionSchema
diff --git a/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java b/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java
index 05dec7cb58e7..2c5d294f3c1c 100644
--- a/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java
+++ b/processing/src/main/java/org/apache/druid/indexer/partitions/DynamicPartitionsSpec.java
@@ -34,6 +34,10 @@ public class DynamicPartitionsSpec implements PartitionsSpec
    * Default maxTotalRows for most task types except compaction task.
    */
   public static final long DEFAULT_MAX_TOTAL_ROWS = 20_000_000;
+  // Using MAX_VALUE as the default for setting maxTotalRows for compaction to respect the computed maxRowsPerSegment.
+  // If this is set to something too small, compactionTask can generate small segments
+  // which need to be compacted again, which in turn making auto compaction stuck in the same interval.
+  public static final long DEFAULT_COMPACTION_MAX_TOTAL_ROWS = Long.MAX_VALUE;
   static final String NAME = "dynamic";
 
   private final int maxRowsPerSegment;
diff --git a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
index 43d53ffe3312..2c6e0d96c397 100644
--- a/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
+++ b/processing/src/main/java/org/apache/druid/timeline/CompactionState.java
@@ -150,7 +150,7 @@ public String toString()
            '}';
   }
 
-  public static Function<Set<DataSegment>, Set<DataSegment>> compactionStateAnnotateFunction(
+  public static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateToSegments(
       PartitionsSpec partitionsSpec,
       DimensionsSpec dimensionsSpec,
       List<Object> metricsSpec,
diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
index aebdb9493000..afb743d7a1af 100644
--- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
+++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
@@ -364,30 +364,45 @@ public void testWithLastCompactionState()
                                            .version(DateTimes.of("2012-01-01T11:22:33.444Z").toString())
                                            .shardSpec(getShardSpec(7))
                                            .size(0)
-                                            .build();
+                                           .build();
     Assert.assertEquals(segment1, segment2.withLastCompactionState(compactionState));
   }
 
   @Test
   public void testAnnotateWithLastCompactionState()
   {
-    final CompactionState compactionState = new CompactionState(
-        new DynamicPartitionsSpec(null, null),
-        new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
-        ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")),
-        ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")),
-        Collections.singletonMap("test", "map"),
-        Collections.singletonMap("test2", "map2")
+    DynamicPartitionsSpec dynamicPartitionsSpec = new DynamicPartitionsSpec(null, null);
+    DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of(
+        "bar",
+        "foo"
+    )));
+    List<Object> metricsSpec = ImmutableList.of(ImmutableMap.of("type", "count", "name", "count"));
+    Map<String, Object> transformSpec = ImmutableMap.of(
+        "filter",
+        ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo" )
     );
+    Map<String, Object> indexSpec = Collections.singletonMap("test", "map");
+    Map<String, Object> granularitySpec = Collections.singletonMap("test2", "map");
 
-    final Function<Set<DataSegment>, Set<DataSegment>> annotateFn = CompactionState.compactionStateAnnotateFunction(
-        new DynamicPartitionsSpec(null, null),
-        new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))),
-        ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")),
-        ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")),
-        Collections.singletonMap("test", "map"),
-        Collections.singletonMap("test2", "map2")
+    final CompactionState compactionState = new CompactionState(
+        dynamicPartitionsSpec,
+        dimensionsSpec,
+        metricsSpec,
+        transformSpec,
+        indexSpec,
+        granularitySpec
     );
+
+    final Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateFunction =
+        CompactionState.addCompactionStateToSegments(
+            dynamicPartitionsSpec,
+            dimensionsSpec,
+            metricsSpec,
+            transformSpec,
+            indexSpec,
+            granularitySpec
+        );
+
     final DataSegment segment1 = DataSegment.builder()
                                             .dataSource("foo")
                                             .interval(Intervals.of("2012-01-01/2012-01-02"))
@@ -403,7 +418,7 @@ public void testAnnotateWithLastCompactionState()
                                             .shardSpec(getShardSpec(7))
                                             .size(0)
                                             .build();
-    Assert.assertEquals(ImmutableSet.of(segment1), annotateFn.apply(ImmutableSet.of(segment2)));
+    Assert.assertEquals(ImmutableSet.of(segment1), addCompactionStateFunction.apply(ImmutableSet.of(segment2)));
   }
 
   @Test
diff --git a/website/.spelling b/website/.spelling
index e013d4eebc56..37e43c9d0c0b 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -1328,7 +1328,6 @@ valueFormat
 IOConfig
 compactionTask
 compactionTasks
-storeCompactionState
 ingestSegmentFirehose
 numShards
 IngestSegment

From f18650d923e7b57922629f076ccf7edf09eb6b35 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 2 Apr 2024 11:19:25 +0530
Subject: [PATCH 20/26] Resolve checkstyle errors

---
 .../java/org/apache/druid/msq/exec/ControllerImpl.java     | 2 +-
 .../java/org/apache/druid/msq/exec/MSQReplaceTest.java     | 7 ++-----
 .../java/org/apache/druid/timeline/DataSegmentTest.java    | 2 +-
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index fd2408b2b4c1..cc150e9f4d38 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1739,7 +1739,7 @@ private void publishSegmentsIfNeeded(
         DataSourceMSQDestination destination = (DataSourceMSQDestination) task.getQuerySpec().getDestination();
         if (!destination.isReplaceTimeChunks()) {
           // Store compaction state only for replace queries.
-          log.error(
+          log.warn(
               "storeCompactionState flag set for a non-REPLACE query [%s]. Ignoring the flag for now.",
               queryDef.getQueryId()
           );
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index be15bfdcd996..9d8196bd50a0 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -79,11 +79,8 @@ public class MSQReplaceTest extends MSQTestBase
   private static final Map<String, Object> QUERY_CONTEXT_WITH_REPLACE_LOCK_AND_COMPACTION_STATE =
       ImmutableMap.<String, Object>builder()
                   .putAll(DEFAULT_MSQ_CONTEXT)
-                  .put(
-                      Tasks.TASK_LOCK_TYPE,
-                      StringUtils.toLowerCase(TaskLockType.REPLACE.name())
-                  )
-                  .put( Tasks.STORE_COMPACTION_STATE_KEY, true)
+                  .put(Tasks.TASK_LOCK_TYPE, StringUtils.toLowerCase(TaskLockType.REPLACE.name()))
+                  .put(Tasks.STORE_COMPACTION_STATE_KEY, true)
                   .build();
 
   public static Collection<Object[]> data()
diff --git a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
index afb743d7a1af..3f0667b870c9 100644
--- a/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
+++ b/processing/src/test/java/org/apache/druid/timeline/DataSegmentTest.java
@@ -379,7 +379,7 @@ public void testAnnotateWithLastCompactionState()
     List<Object> metricsSpec = ImmutableList.of(ImmutableMap.of("type", "count", "name", "count"));
     Map<String, Object> transformSpec = ImmutableMap.of(
         "filter",
-        ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo" )
+        ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")
     );
     Map<String, Object> indexSpec = Collections.singletonMap("test", "map");
     Map<String, Object> granularitySpec = Collections.singletonMap("test2", "map");

From 49053db00c94b70005a79d04a5bf8aee41ec0506 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 2 Apr 2024 11:40:49 +0530
Subject: [PATCH 21/26] Remove redundant comment

---
 .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index cc150e9f4d38..93b665c32bce 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1793,7 +1793,6 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
           false
       );
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
-      // Using Long.MAX_VALUE for MaxTotalRows as that is the default used by a compaction task.
       partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS);
     } else {
       throw new MSQException(

From 29ea760b83a6ae9787e5228aadf629c8009c3d30 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Thu, 4 Apr 2024 09:37:10 +0530
Subject: [PATCH 22/26] Revert maxTotalRows to null

---
 .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 93b665c32bce..3373c1efd520 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1793,7 +1793,7 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
           false
       );
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
-      partitionSpec = new DynamicPartitionsSpec(null, DynamicPartitionsSpec.DEFAULT_COMPACTION_MAX_TOTAL_ROWS);
+      partitionSpec = new DynamicPartitionsSpec(null, null);
     } else {
       throw new MSQException(
           UnknownFault.forMessage(

From 7e43b5deb113b73c47454325d83fb37c503102ff Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Fri, 5 Apr 2024 15:28:34 +0530
Subject: [PATCH 23/26] Address review comments and fix tests

---
 docs/multi-stage-query/reference.md           | 40 +++++++++----------
 .../apache/druid/msq/exec/ControllerImpl.java |  3 +-
 .../apache/druid/msq/exec/MSQReplaceTest.java |  2 +-
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 0220a2ebca8e..7320edfc6200 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -346,27 +346,27 @@ If you're using the web console, you can specify the context parameters through
 
 The following table lists the context parameters for the MSQ task engine:
 
-| Parameter | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                | Default value |
-|---|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---|
-| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority.                                                                                                                                                                                                                                                                                                                                        | 2 |
-| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul>                                                                             | `max` |
-| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md).                                                                                                                                                                                                                                                                                             | `true` |
-| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details.                                                                                                                                                                                              | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
-| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details.                                                                                                                                                                                                                                                                                                                                                                                                                                  | `broadcast` |
-| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage.                                                                                                                                                                                                                                                                                                                                                                                                                                                 | 100,000 |
+| Parameter | Description | Default value |
+|---|---|---|
+| `maxNumTasks` | SELECT, INSERT, REPLACE<br /><br />The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.<br /><br />May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 |
+| `taskAssignment` | SELECT, INSERT, REPLACE<br /><br />Determines how many tasks to use. Possible values include: <ul><li>`max`: Uses as many tasks as possible, up to `maxNumTasks`.</li><li>`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.</li></ul> | `max` |
+| `finalizeAggregations` | SELECT, INSERT, REPLACE<br /><br />Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` |
+| `arrayIngestMode` | INSERT, REPLACE<br /><br /> Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
+| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE<br /><br />Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` |
+| `rowsInMemory` | INSERT or REPLACE<br /><br />Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 |
 | `segmentSortOrder` | INSERT or REPLACE<br /><br />Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.<br /><br />You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list |
-| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | 0 |
-| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                              | 3,000,000 |
-| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
-| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br />                                                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
-| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `false` |
-| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section.                                                                                                                                                                                                                                                                                                                  | `taskReport` |
-| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded.                                                                                                                                                                                                                                                                                                              | `false` |
-| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks.                                                                                                                                                                                                                                                                                                                                                                                                               | `NONE` |
-| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage`                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | 100000 |
-| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.                                                                                                                                                                                                                                                                                                                                                   | empty list |
-| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault.                                                                                                                                                                                                                                                                                                                                                                                                                                                      | `false` |
-| `storeCompactionState`  | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag.                                                                                                                                                                                                                                                                                                                                                                                   | `false` |
+| `maxParseExceptions`| SELECT, INSERT, REPLACE<br /><br />Maximum number of parse exceptions that are ignored while executing the query before it stops with `TooManyWarningsFault`. To ignore all the parse exceptions, set the value to -1. | 0 |
+| `rowsPerSegment` | INSERT or REPLACE<br /><br />The number of rows per segment to target. The actual number of rows per segment may be somewhat higher or lower than this number. In most cases, use the default. For general information about sizing rows per segment, see [Segment Size Optimization](../operations/segment-optimization.md). | 3,000,000 |
+| `indexSpec` | INSERT or REPLACE<br /><br />An [`indexSpec`](../ingestion/ingestion-spec.md#indexspec) to use when generating segments. May be a JSON string or object. See [Front coding](../ingestion/ingestion-spec.md#front-coding) for details on configuring an `indexSpec` with front coding. | See [`indexSpec`](../ingestion/ingestion-spec.md#indexspec). |
+| `durableShuffleStorage` | SELECT, INSERT, REPLACE <br /><br />Whether to use durable storage for shuffle mesh. To use this feature, configure the durable storage at the server level using `druid.msq.intermediate.storage.enable=true`). If these properties are not configured, any query with the context variable `durableShuffleStorage=true` fails with a configuration error. <br /><br /> | `false` |
+| `faultTolerance` | SELECT, INSERT, REPLACE<br /><br /> Whether to turn on fault tolerance mode or not. Failed workers are retried based on [Limits](#limits). Cannot be used when `durableShuffleStorage` is explicitly set to false. | `false` |
+| `selectDestination` | SELECT<br /><br /> Controls where the final result of the select query is written. <br />Use `taskReport`(the default) to write select results to the task report. <b> This is not scalable since task reports size explodes for large results </b> <br/>Use `durableStorage` to write results to durable storage location. <b>For large results sets, its recommended to use `durableStorage` </b>. To configure durable storage see [`this`](#durable-storage) section. | `taskReport` |
+| `waitUntilSegmentsLoad` | INSERT, REPLACE<br /><br /> If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
+| `includeSegmentSource` | SELECT, INSERT, REPLACE<br /><br /> Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
+| `rowsPerPage` | SELECT<br /><br />The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default.<br /> This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
+| `skipTypeVerification` | INSERT or REPLACE<br /><br />During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.<br /><br />Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
+| `failOnEmptyInsert` | INSERT or REPLACE<br /><br /> When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause.  When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
+| `storeCompactionState` | REPLACE<br /><br /> When set to true, a REPLACE query stores as part of each segment's metadata a `lastCompactionState` field that captures the various specs used to create the segment. Future compaction jobs skip segments whose `lastCompactionState` matches the desired compaction state. Works the same as [`storeCompactionState`](../ingestion/tasks.md#context-parameters) task context flag. | `false` |
 
 ## Joins
 
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 3373c1efd520..9a66153f8305 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1775,7 +1775,6 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
     final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig();
     PartitionsSpec partitionSpec;
 
-    // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task.
     if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) {
       String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension();
       partitionSpec = new SingleDimensionPartitionsSpec(
@@ -1793,6 +1792,8 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
           false
       );
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
+      // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task.
+      // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment.
       partitionSpec = new DynamicPartitionsSpec(null, null);
     } else {
       throw new MSQException(
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 9d8196bd50a0..6e0ad4e2bf47 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -1855,7 +1855,7 @@ private CompactionState expectedCompactionState(
     }
     PartitionsSpec partitionsSpec;
     if (partitionDimensions.isEmpty()) {
-      partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null);
+      partitionsSpec = new DynamicPartitionsSpec(null, null);
 
     } else {
       partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null,

From a282e32e6782f060d939d05640ca8c5652dd8ff5 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 8 Apr 2024 16:36:59 +0530
Subject: [PATCH 24/26] Correct values in DynamicPartitionSpec.

---
 .../org/apache/druid/msq/exec/ControllerImpl.java   | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 9a66153f8305..8d0d006d9606 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1775,15 +1775,7 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
     final MSQTuningConfig tuningConfig = task.getQuerySpec().getTuningConfig();
     PartitionsSpec partitionSpec;
 
-    if (Objects.equals(shardSpec.getType(), ShardSpec.Type.SINGLE)) {
-      String partitionDimension = ((SingleDimensionShardSpec) shardSpec).getDimension();
-      partitionSpec = new SingleDimensionPartitionsSpec(
-          tuningConfig.getRowsPerSegment(),
-          null,
-          partitionDimension,
-          false
-      );
-    } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) {
+    if (Objects.equals(shardSpec.getType(), ShardSpec.Type.RANGE)) {
       List<String> partitionDimensions = ((DimensionRangeShardSpec) shardSpec).getDimensions();
       partitionSpec = new DimensionRangePartitionsSpec(
           tuningConfig.getRowsPerSegment(),
@@ -1794,8 +1786,9 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
       // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task.
       // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment.
-      partitionSpec = new DynamicPartitionsSpec(null, null);
+      partitionSpec = new DynamicPartitionsSpec(tuningConfig.getRowsPerSegment(), Long.MAX_VALUE);
     } else {
+      // SingleDimenionShardSpec and other shard specs are never created in MSQ.
       throw new MSQException(
           UnknownFault.forMessage(
               StringUtils.format(

From b6bc0a594981af85763436b521ba00b87bbcb1fd Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Mon, 8 Apr 2024 17:11:57 +0530
Subject: [PATCH 25/26] Fix checkstyle

---
 .../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 8d0d006d9606..87a00e3a9af4 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -67,7 +67,6 @@
 import org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec;
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
-import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.LockGranularity;
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskLockType;
@@ -220,7 +219,6 @@
 import org.apache.druid.timeline.partition.NumberedPartialShardSpec;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
 import org.apache.druid.timeline.partition.ShardSpec;
-import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
 import org.apache.druid.utils.CollectionUtils;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;

From 6a4edc9789dace8456537649811ab676abd81dc0 Mon Sep 17 00:00:00 2001
From: Vishesh Garg <gargvishesh@gmail.com>
Date: Tue, 9 Apr 2024 10:27:55 +0530
Subject: [PATCH 26/26] Fix tests

---
 .../main/java/org/apache/druid/msq/exec/ControllerImpl.java    | 3 +--
 .../test/java/org/apache/druid/msq/exec/MSQReplaceTest.java    | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 87a00e3a9af4..fe10b3509a60 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -1782,8 +1782,7 @@ private static Function<Set<DataSegment>, Set<DataSegment>> addCompactionStateTo
           false
       );
     } else if (Objects.equals(shardSpec.getType(), ShardSpec.Type.NUMBERED)) {
-      // There is currently no way of specifying either maxRowsPerSegment or maxTotalRows for an MSQ task.
-      // Hence using null for both which ends up translating to DEFAULT_MAX_ROWS_PER_SEGMENT for maxRowsPerSegment.
+      // MSQ tasks don't use maxTotalRows. Hence using LONG.MAX_VALUE.
       partitionSpec = new DynamicPartitionsSpec(tuningConfig.getRowsPerSegment(), Long.MAX_VALUE);
     } else {
       // SingleDimenionShardSpec and other shard specs are never created in MSQ.
diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
index 6e0ad4e2bf47..fed19b7132da 100644
--- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
+++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQReplaceTest.java
@@ -1855,7 +1855,7 @@ private CompactionState expectedCompactionState(
     }
     PartitionsSpec partitionsSpec;
     if (partitionDimensions.isEmpty()) {
-      partitionsSpec = new DynamicPartitionsSpec(null, null);
+      partitionsSpec = new DynamicPartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, Long.MAX_VALUE);
 
     } else {
       partitionsSpec = new DimensionRangePartitionsSpec(MultiStageQueryContext.DEFAULT_ROWS_PER_SEGMENT, null,