From a166b7e18dacc3b6ad40a24fc68065ff66f23cf4 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Wed, 20 Nov 2019 17:57:18 -0800
Subject: [PATCH 01/17] Parallel indexing single dim partitions

Implements single dimension range partitioning for native parallel batch
indexing as described in #8769. This initial version requires the
druid-datasketches extension to be loaded.

The algorithm has 5 phases that are orchestrated by the supervisor in
`ParallelIndexSupervisorTask#runRangePartitionMultiPhaseParallel()`.
These phases and the main classes involved are described below:

1) In parallel, determine the distribution of dimension values for each
   input source split.

   `PartialDimensionDistributionTask` uses `StringSketch` to generate
   the approximate distribution of dimension values for each input
   source split. If the rows are ungrouped,
   `PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter`
   uses a Bloom filter to skip rows that would be grouped. The final
   distribution is sent back to the supervisor via
   `DimensionDistributionReport`.

2) The range partitions are determined.

   In `ParallelIndexSupervisorTask#determineAllRangePartitions()`, the
   supervisor uses `StringSketchMerger` to merge the individual
   `StringSketch`es created in the preceding phase. The merged sketch is
   then used to create the range partitions.

3) In parallel, generate partial range-partitioned segments.

   `PartialRangeSegmentGenerateTask` uses the range partitions
   determined in the preceding phase and
   `RangePartitionCachingLocalSegmentAllocator` to generate
   `SingleDimensionShardSpec`s.  The partition information is sent back
   to the supervisor via `GeneratedGenericPartitionsReport`.

4) The partial range segments are grouped.

   In `ParallelIndexSupervisorTask#groupGenericPartitionLocationsPerPartition()`,
   the supervisor creates the `PartialGenericSegmentMergeIOConfig`s
   necessary for the next phase.

5) In parallel, merge partial range-partitioned segments.

   `PartialGenericSegmentMergeTask` uses `GenericPartitionLocation` to
   retrieve the partial range-partitioned segments generated earlier and
   then merges and publishes them.
---
 .../SingleDimensionPartitionsSpec.java        |   6 +-
 docs/ingestion/hadoop.md                      |   2 +-
 docs/ingestion/index.md                       |   2 +-
 docs/ingestion/native-batch.md                |  39 +-
 indexing-service/pom.xml                      |   5 +
 .../task/CachingLocalSegmentAllocator.java    |   1 +
 ...PartitionCachingLocalSegmentAllocator.java | 193 +++++++
 .../druid/indexing/common/task/Task.java      |   6 +
 .../parallel/DimensionDistributionReport.java |  68 +++
 .../GeneratedGenericPartitionsReport.java     |  44 ++
 .../parallel/GenericPartitionLocation.java    |  62 +++
 .../batch/parallel/GenericPartitionStat.java  |  91 ++++
 .../parallel/ParallelIndexSupervisorTask.java | 190 ++++++-
 ...onDistributionParallelIndexTaskRunner.java | 124 +++++
 .../PartialDimensionDistributionTask.java     | 448 +++++++++++++++++
 .../PartialGenericSegmentMergeIOConfig.java   |  40 ++
 ...rtialGenericSegmentMergeIngestionSpec.java |  38 ++
 ...icSegmentMergeParallelIndexTaskRunner.java | 111 ++++
 .../PartialGenericSegmentMergeTask.java       | 116 +++++
 ...egmentGenerateParallelIndexTaskRunner.java | 130 +++++
 .../PartialRangeSegmentGenerateTask.java      | 183 +++++++
 .../task/batch/parallel/SubTaskReport.java    |   4 +-
 .../distribution/StringDistribution.java      |  66 +++
 .../StringDistributionMerger.java             |  36 ++
 .../parallel/distribution/StringSketch.java   | 194 +++++++
 .../distribution/StringSketchMerger.java      |  52 ++
 .../parallel/distribution/TimeDimTuple.java   |  86 ++++
 .../distribution/TimeDimTupleFactory.java     |  47 ++
 .../distribution/TimeDimTupleFunnel.java      |  38 ++
 ...faultIndexTaskInputRowIteratorBuilder.java |   2 +
 ...itionIndexTaskInputRowIteratorBuilder.java | 102 ++++
 .../common/task/IngestionTestBase.java        |   5 +
 ...itionCachingLocalSegmentAllocatorTest.java | 233 +++++++++
 .../DimensionDistributionReportTest.java      |  55 ++
 .../GenericPartitionLocationTest.java         |  58 +++
 .../parallel/GenericPartitionStatTest.java    |  59 +++
 .../ParallelIndexSupervisorTaskSerdeTest.java |  21 +-
 .../parallel/ParallelIndexTestingFactory.java |  16 +-
 .../PartialDimensionDistributionTaskTest.java | 470 +++++++++++++++++
 ...artialGenericSegmentMergeIOConfigTest.java |  54 ++
 ...lGenericSegmentMergeIngestionSpecTest.java |  68 +++
 .../PartialGenericSegmentMergeTaskTest.java   |  90 ++++
 .../PartialRangeSegmentGenerateTaskTest.java  | 151 ++++++
 ...rtitionMultiPhaseParallelIndexingTest.java | 472 ++++++++++++++++++
 .../distribution/StringSketchMergerTest.java  |  80 +++
 .../distribution/StringSketchTest.java        | 379 ++++++++++++++
 .../distribution/TimeDimTupleFactoryTest.java |  66 +++
 .../distribution/TimeDimTupleFunnelTest.java  |  37 ++
 .../distribution/TimeDimTupleTest.java        |  71 +++
 ...InputRowIteratorBuilderTestingFactory.java |   1 +
 ...titionTaskInputRowIteratorBuilderTest.java |  74 +++
 .../indexer/AbstractITBatchIndexTest.java     |   8 +-
 .../tests/indexer/ITParallelIndexTest.java    |   4 +-
 53 files changed, 4974 insertions(+), 24 deletions(-)
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReport.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocation.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionParallelIndexTaskRunner.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfig.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpec.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistributionMerger.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMerger.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTuple.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactory.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnel.java
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReportTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocationTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfigTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpecTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactoryTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnelTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleTest.java
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java

diff --git a/core/src/main/java/org/apache/druid/indexer/partitions/SingleDimensionPartitionsSpec.java b/core/src/main/java/org/apache/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
index d193b30b2cf1..031f160d7655 100644
--- a/core/src/main/java/org/apache/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
+++ b/core/src/main/java/org/apache/druid/indexer/partitions/SingleDimensionPartitionsSpec.java
@@ -175,7 +175,11 @@ public List<String> getPartitionDimensions()
   @Override
   public String getForceGuaranteedRollupIncompatiblityReason()
   {
-    return NAME + " partitions unsupported";
+    if (getPartitionDimension() == null) {
+      return PARITION_DIMENSION + " must be specified";
+    }
+
+    return FORCE_GUARANTEED_ROLLUP_COMPATIBLE;
   }
 
   @Override
diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md
index cb86355d4189..81a5ce2e844e 100644
--- a/docs/ingestion/hadoop.md
+++ b/docs/ingestion/hadoop.md
@@ -366,7 +366,7 @@ The configuration options are:
 |type|Type of partitionSpec to be used.|"single_dim"|
 |targetRowsPerSegment|Target number of rows to include in a partition, should be a number that targets segments of 500MB\~1GB.|yes|
 |targetPartitionSize|Deprecated. Renamed to `targetRowsPerSegment`. Target number of rows to include in a partition, should be a number that targets segments of 500MB\~1GB.|no|
-|maxRowsPerSegment|Maximum number of rows to include in a partition. Defaults to 50% larger than the `targetPartitionSize`.|no|
+|maxRowsPerSegment|Maximum number of rows to include in a partition. Defaults to 50% larger than the `targetRowsPerSegment`.|no|
 |maxPartitionSize|Deprecated. Use `maxRowsPerSegment` instead. Maximum number of rows to include in a partition. Defaults to 50% larger than the `targetPartitionSize`.|no|
 |partitionDimension|The dimension to partition on. Leave blank to select a dimension automatically.|no|
 |assumeGrouped|Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.|no|
diff --git a/docs/ingestion/index.md b/docs/ingestion/index.md
index 23b240922e93..54324ab65eea 100644
--- a/docs/ingestion/index.md
+++ b/docs/ingestion/index.md
@@ -88,7 +88,7 @@ This table compares the three available options:
 | **Input locations** | Any [firehose](native-batch.md#firehoses). | Any [firehose](native-batch.md#firehoses). | Any Hadoop FileSystem or Druid datasource. |
 | **File formats** | Text file formats (CSV, TSV, JSON). Support for binary formats is coming in a future release. | Text file formats (CSV, TSV, JSON). Support for binary formats is coming in a future release. | Any Hadoop InputFormat. |
 | **[Rollup modes](#rollup)** | Perfect if `forceGuaranteedRollup` = true in the [`tuningConfig`](native-batch.md#tuningconfig).| Perfect if `forceGuaranteedRollup` = true in the [`tuningConfig`](native-batch.md#tuningconfig). | Always perfect. |
-| **Partitioning options** | Hash-based partitioning is supported when `forceGuaranteedRollup` = true in the [`tuningConfig`](native-batch.md#tuningconfig). | Hash-based partitioning (when `forceGuaranteedRollup` = true). | Hash-based or range-based partitioning via [`partitionsSpec`](hadoop.md#partitionsspec). |
+| **Partitioning options** | Hash-based partitioning is supported when `forceGuaranteedRollup` = true in the [`tuningConfig`](native-batch.md#tuningconfig). | Hash-based or range-based partitioning (when `forceGuaranteedRollup` = true). | Hash-based or range-based partitioning via [`partitionsSpec`](hadoop.md#partitionsspec). |
 
 <a name="data-model"></a>
 
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 428df3eff2b1..e11986ed7a49 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -54,7 +54,7 @@ each sub task creates segments individually and reports them to the supervisor t
 
 If `forceGuaranteedRollup` = true, it's executed in two phases with data shuffle which is similar to [MapReduce](https://en.wikipedia.org/wiki/MapReduce).
 In the first phase, each sub task partitions input data based on `segmentGranularity` (primary partition key) in `granularitySpec`
-and `partitionDimensions` (secondary partition key) in `partitionsSpec`. The partitioned data is served by
+and `partitionDimension` or `partitionDimensions` (secondary partition key) in `partitionsSpec`. The partitioned data is served by
 the [middleManager](../design/middlemanager.md) or the [indexer](../design/indexer.md)
 where the first phase tasks ran. In the second phase, each sub task fetches
 partitioned data from MiddleManagers or indexers and merges them to create the final segments.
@@ -205,13 +205,13 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
 |maxRowsInMemory|Used in determining when intermediate persists to disk should occur. Normally user does not need to set this, but depending on the nature of data, if rows are short in terms of bytes, user may not want to store a million rows in memory and this value should be set.|1000000|no|
 |maxBytesInMemory|Used in determining when intermediate persists to disk should occur. Normally this is computed internally and user does not need to set it. This value represents number of bytes to aggregate in heap memory before persisting. This is based on a rough estimate of memory usage and not actual usage. The maximum heap memory usage for indexing is maxBytesInMemory * (2 + maxPendingPersists)|1/6 of max JVM memory|no|
 |maxTotalRows|Deprecated. Use `partitionsSpec` instead. Total number of rows in segments waiting for being pushed. Used in determining when intermediate pushing should occur.|20000000|no|
-|numShards|Deprecated. Use `partitionsSpec` instead. Directly specify the number of shards to create. If this is specified and `intervals` is specified in the `granularitySpec`, the index task can skip the determine intervals/partitions pass through the data. `numShards` cannot be specified if `maxRowsPerSegment` is set.|null|no|
+|numShards|Deprecated. Use `partitionsSpec` instead. Directly specify the number of shards to create when using a `hashed` `partitionsSpec`. If this is specified and `intervals` is specified in the `granularitySpec`, the index task can skip the determine intervals/partitions pass through the data. `numShards` cannot be specified if `maxRowsPerSegment` is set.|null|no|
 |splitHintSpec|Used to give a hint to control the amount of data that each first phase task reads. This hint could be ignored depending on the implementation of firehose. See [SplitHintSpec](#splithintspec) for more details.|null|no|
-|partitionsSpec|Defines how to partition data in each timeChunk, see [PartitionsSpec](#partitionsspec)|`dynamic` if `forceGuaranteedRollup` = false, `hashed` if `forceGuaranteedRollup` = true|no|
+|partitionsSpec|Defines how to partition data in each timeChunk, see [PartitionsSpec](#partitionsspec)|`dynamic` if `forceGuaranteedRollup` = false, `hashed` or `single_dim` if `forceGuaranteedRollup` = true|no|
 |indexSpec|Defines segment storage format options to be used at indexing time, see [IndexSpec](index.md#indexspec)|null|no|
 |indexSpecForIntermediatePersists|Defines segment storage format options to be used at indexing time for intermediate persisted temporary segments. this can be used to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. however, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published, see [IndexSpec](index.md#indexspec) for possible values.|same as indexSpec|no|
 |maxPendingPersists|Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|0 (meaning one persist can be running concurrently with ingestion, and none can be queued up)|no|
-|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.md#rollup). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. If this is set to true, `numShards` in `tuningConfig` and `intervals` in `granularitySpec` must be set. Note that the result segments would be hash-partitioned. This flag cannot be used with `appendToExisting` of IOConfig. For more details, see the below __Segment pushing modes__ section.|false|no|
+|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.md#rollup). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. If this is set to true, `intervals` in `granularitySpec` must be set and `hashed` or `single_dim` must be used for `partitionsSpec`. This flag cannot be used with `appendToExisting` of IOConfig. For more details, see the below __Segment pushing modes__ section.|false|no|
 |reportParseExceptions|If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped.|false|no|
 |pushTimeout|Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever.|0|no|
 |segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [SegmentWriteOutMediumFactory](#segmentwriteoutmediumfactory).|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory.type` is used|no|
@@ -241,18 +241,37 @@ Currently only one splitHintSpec, i.e., `segments`, is available.
 
 ### `partitionsSpec`
 
-PartitionsSpec is to describe the secondary partitioning method.
+PartitionsSpec is used to describe the secondary partitioning method.
 You should use different partitionsSpec depending on the [rollup mode](../ingestion/index.md#rollup) you want.
-For perfect rollup, you should use `hashed`.
+For perfect rollup, you should use either `hashed` (partitioning based on the hash of dimensions in each row) or
+`single_dim` (based on ranges of a single dimension. For best-effort rollup, you should use `dynamic`.
+
+Hashed partitioning is recommended in most cases, as it will improve indexing performance and create more uniformly
+sized data segments relative to single-dimension or dynamic partitioning.
+
+#### Hash-based partitioning
 
 |property|description|default|required?|
 |--------|-----------|-------|---------|
 |type|This should always be `hashed`|none|yes|
-|targetRowsPerSegment|Target number of rows to include in a partition, should be a number that targets segments of 500MB\~1GB.|5000000 (if `numShards` is not set)|either this or `numShards`|
-|numShards|Directly specify the number of shards to create. If this is specified and `intervals` is specified in the `granularitySpec`, the index task can skip the determine intervals/partitions pass through the data. `numShards` cannot be specified if `targetRowsPerSegment` is set.|null|no|
-|partitionDimensions|The dimensions to partition on. Leave blank to select all dimensions. Only used with `numShards`, will be ignored when `targetRowsPerSegment` is set.|null|no|
+|numShards|Directly specify the number of shards to create. If this is specified and `intervals` is specified in the `granularitySpec`, the index task can skip the determine intervals/partitions pass through the data. `numShards` cannot be specified if `targetRowsPerSegment` is set.|null|yes|
+|partitionDimensions|The dimensions to partition on. Leave blank to select all dimensions.|null|no|
 
-For best-effort rollup, you should use `dynamic`.
+#### Single-dimension range partitioning
+
+> Single-dimension range partitioning currently requires the
+> [druid-datasketches](../development/extensions-core/datasketches-extension.md)
+> extension to be loaded.
+
+|property|description|default|required?|
+|--------|-----------|-------|---------|
+|type|This should always be `single_dim`|none|yes|
+|targetRowsPerSegment|Target number of rows to include in a partition, should be a number that targets segments of 500MB\~1GB.|none|either this or `maxRowsPerSegment`|
+|maxRowsPerSegment|Maximum number of rows to include in a partition. Defaults to 50% larger than the `targetRowsPerSegment`.|none|either this or `targetRowsPerSegment`|
+|partitionDimension|The dimension to partition on.|none|yes|
+|assumeGrouped|Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.|false|no|
+
+#### Dynamic partitioning
 
 |property|description|default|required?|
 |--------|-----------|-------|---------|
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index 2a71a9a72c56..c55e2de46827 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -199,6 +199,11 @@
             <groupId>it.unimi.dsi</groupId>
             <artifactId>fastutil</artifactId>
         </dependency>
+        <dependency>
+            <groupId>com.yahoo.datasketches</groupId>
+            <artifactId>sketches-core</artifactId>
+            <scope>provided</scope>
+        </dependency>
 
         <!-- Tests -->
         <dependency>
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
index 21157bf13957..fbb9081aafa3 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
@@ -43,6 +43,7 @@
  * Allocates all necessary segments locally at the beginning and reuses them.
  *
  * @see HashPartitionCachingLocalSegmentAllocator
+ * @see RangePartitionCachingLocalSegmentAllocator
  */
 class CachingLocalSegmentAllocator implements IndexTaskSegmentAllocator
 {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
new file mode 100644
index 000000000000..15c9b56c60d5
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task;
+
+import com.google.common.collect.Maps;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
+import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Allocates all necessary range-partitioned segments locally at the beginning and reuses them.
+ *
+ * @see CachingLocalSegmentAllocator
+ */
+public class RangePartitionCachingLocalSegmentAllocator implements IndexTaskSegmentAllocator
+{
+  private final String dataSource;
+  private final String partitionDimension;
+  private final Map<Interval, String[]> intervalsToPartitions;
+  private final IndexTaskSegmentAllocator delegate;
+
+  public RangePartitionCachingLocalSegmentAllocator(
+      TaskToolbox toolbox,
+      String taskId,
+      String dataSource,
+      String partitionDimension,
+      Map<Interval, String[]> intervalsToPartitions
+  ) throws IOException
+  {
+    this.dataSource = dataSource;
+    this.partitionDimension = partitionDimension;
+    this.intervalsToPartitions = intervalsToPartitions;
+
+    this.delegate = new CachingLocalSegmentAllocator(
+        toolbox,
+        taskId,
+        this::getIntervalToSegmentIds
+    );
+  }
+
+  private Map<Interval, List<SegmentIdWithShardSpec>> getIntervalToSegmentIds(Function<Interval, String> versionFinder)
+  {
+    Map<Interval, List<SegmentIdWithShardSpec>> intervalToSegmentIds =
+        Maps.newHashMapWithExpectedSize(intervalsToPartitions.size());
+
+    intervalsToPartitions.forEach(
+        (interval, partitions) ->
+            intervalToSegmentIds.put(
+                interval,
+                translatePartitions(interval, partitions, versionFinder)
+            )
+    );
+
+    return intervalToSegmentIds;
+  }
+
+  private List<SegmentIdWithShardSpec> translatePartitions(
+      Interval interval,
+      String[] partitions,
+      Function<Interval, String> versionFinder
+  )
+  {
+    if (partitions.length == 0) {
+      return Collections.emptyList();
+    }
+
+    String[] uniquePartitions = Arrays.stream(partitions).distinct().toArray(String[]::new);
+    int numUniquePartition = uniquePartitions.length;
+
+    if (numUniquePartition == 1) {
+      return Collections.singletonList(
+          createLastSegmentIdWithShardSpec(
+              interval,
+              versionFinder.apply(interval),
+              uniquePartitions[0],
+              0
+          )
+      );
+    }
+
+    if (isLastPartitionOnlyMaxValue(partitions)) {
+      // The last partition only contains the max value. A shard that just contains the max value is likely to be
+      // small, so combine it with the second to last one.
+      numUniquePartition -= 1;
+    }
+
+    List<SegmentIdWithShardSpec> segmentIds =
+        IntStream.range(0, numUniquePartition - 1)
+                 .mapToObj(i -> createSegmentIdWithShardSpec(
+                     interval,
+                     versionFinder.apply(interval),
+                     uniquePartitions[i],
+                     uniquePartitions[i + 1],
+                     i
+                 ))
+                 .collect(Collectors.toCollection(ArrayList::new));
+    segmentIds.add(
+        createLastSegmentIdWithShardSpec(
+            interval,
+            versionFinder.apply(interval),
+            uniquePartitions[numUniquePartition - 1],
+            segmentIds.size()
+        )
+    );
+
+    return segmentIds;
+  }
+
+  private boolean isLastPartitionOnlyMaxValue(String[] partitions)
+  {
+    String lastPartition = partitions[partitions.length - 1];
+    String secondToLastPartition = partitions[partitions.length - 2];
+    return !lastPartition.equals(secondToLastPartition);
+  }
+
+  private SegmentIdWithShardSpec createLastSegmentIdWithShardSpec(
+      Interval interval,
+      String version,
+      String partitionStart,
+      int partitionNum
+  )
+  {
+    return createSegmentIdWithShardSpec(interval, version, partitionStart, null, partitionNum);
+  }
+
+  private SegmentIdWithShardSpec createSegmentIdWithShardSpec(
+      Interval interval,
+      String version,
+      String partitionStart,
+      @Nullable String partitionEnd,
+      int partitionNum
+  )
+  {
+    return new SegmentIdWithShardSpec(
+        dataSource,
+        interval,
+        version,
+        new SingleDimensionShardSpec(
+            partitionDimension,
+            partitionStart,
+            partitionEnd,
+            partitionNum
+        )
+    );
+  }
+
+  @Override
+  public String getSequenceName(Interval interval, InputRow inputRow)
+  {
+    return delegate.getSequenceName(interval, inputRow);
+  }
+
+  @Override
+  public SegmentIdWithShardSpec allocate(
+      InputRow row,
+      String sequenceName,
+      String previousSegmentId,
+      boolean skipSegmentLineageCheck
+  ) throws IOException
+  {
+    return delegate.allocate(row, sequenceName, previousSegmentId, skipSegmentLineageCheck);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java
index e2857d1fc553..a5db7586439b 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/Task.java
@@ -28,8 +28,11 @@
 import org.apache.druid.indexing.common.config.TaskConfig;
 import org.apache.druid.indexing.common.task.batch.parallel.LegacySinglePhaseSubTask;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialGenericSegmentMergeTask;
 import org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask;
 import org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentMergeTask;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialRangeSegmentGenerateTask;
 import org.apache.druid.indexing.common.task.batch.parallel.SinglePhaseSubTask;
 import org.apache.druid.query.Query;
 import org.apache.druid.query.QueryRunner;
@@ -60,6 +63,9 @@
     @Type(name = SinglePhaseSubTask.OLD_TYPE_NAME, value = LegacySinglePhaseSubTask.class), // for backward compatibility
     @Type(name = PartialHashSegmentGenerateTask.TYPE, value = PartialHashSegmentGenerateTask.class),
     @Type(name = PartialHashSegmentMergeTask.TYPE, value = PartialHashSegmentMergeTask.class),
+    @Type(name = PartialRangeSegmentGenerateTask.TYPE, value = PartialRangeSegmentGenerateTask.class),
+    @Type(name = PartialDimensionDistributionTask.TYPE, value = PartialDimensionDistributionTask.class),
+    @Type(name = PartialGenericSegmentMergeTask.TYPE, value = PartialGenericSegmentMergeTask.class),
     @Type(name = "index_hadoop", value = HadoopIndexTask.class),
     @Type(name = "index_realtime", value = RealtimeIndexTask.class),
     @Type(name = "index_realtime_appenderator", value = AppenderatorDriverRealtimeIndexTask.class),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReport.java
new file mode 100644
index 000000000000..a2e6dd0c476d
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReport.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.joda.time.Interval;
+
+import java.util.Map;
+
+public class DimensionDistributionReport implements SubTaskReport
+{
+  static final String TYPE = "dimension_distribution";
+  private static final String PROP_DISTRIBUTIONS = "distributions";
+
+  private final String taskId;
+  private final Map<Interval, StringDistribution> intervalToDistribution;
+
+  @JsonCreator
+  public DimensionDistributionReport(
+      @JsonProperty("taskId") String taskId,
+      @JsonProperty(PROP_DISTRIBUTIONS) Map<Interval, StringDistribution> intervalToDistribution
+  )
+  {
+    this.taskId = taskId;
+    this.intervalToDistribution = intervalToDistribution;
+  }
+
+  @Override
+  @JsonProperty
+  public String getTaskId()
+  {
+    return taskId;
+  }
+
+  @JsonProperty(PROP_DISTRIBUTIONS)
+  public Map<Interval, StringDistribution> getIntervalToDistribution()
+  {
+    return intervalToDistribution;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "DimensionDistributionReport{" +
+           "taskId='" + taskId + '\'' +
+           ", intervalToDistribution=" + intervalToDistribution +
+           '}';
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java
new file mode 100644
index 000000000000..0f6570505003
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * Report containing the {@link GenericPartitionStat}s created by a {@link PartialSegmentGenerateTask}.
+ * This report is collected by {@link ParallelIndexSupervisorTask} and
+ * used to generate {@link PartialGenericSegmentMergeIOConfig}.
+ */
+class GeneratedGenericPartitionsReport extends GeneratedPartitionsReport<GenericPartitionStat> implements SubTaskReport
+{
+  public static final String TYPE = "generated_generic_partitions";
+
+  @JsonCreator
+  GeneratedGenericPartitionsReport(
+      @JsonProperty("taskId") String taskId,
+      @JsonProperty("partitionStats") List<GenericPartitionStat> partitionStats
+  )
+  {
+    super(taskId, partitionStats);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocation.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocation.java
new file mode 100644
index 000000000000..23bb69a3d525
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocation.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.timeline.partition.ShardSpec;
+import org.joda.time.Interval;
+
+/**
+ * This class represents the intermediary data server where the partition of {@link #interval} and {@link #shardSpec}
+ * is stored.
+ */
+public class GenericPartitionLocation extends PartitionLocation<ShardSpec>
+{
+  private final ShardSpec shardSpec;
+
+  @JsonCreator
+  public GenericPartitionLocation(
+      @JsonProperty("host") String host,
+      @JsonProperty("port") int port,
+      @JsonProperty("useHttps") boolean useHttps,
+      @JsonProperty("subTaskId") String subTaskId,
+      @JsonProperty("interval") Interval interval,
+      @JsonProperty("shardSpec") ShardSpec shardSpec
+  )
+  {
+    super(host, port, useHttps, subTaskId, interval, shardSpec);
+    this.shardSpec = shardSpec;
+  }
+
+  @JsonIgnore
+  @Override
+  public int getPartitionId()
+  {
+    return shardSpec.getPartitionNum();
+  }
+
+  @JsonProperty
+  ShardSpec getShardSpec()
+  {
+    return shardSpec;
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
new file mode 100644
index 000000000000..04a98c284476
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.timeline.partition.ShardSpec;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+import java.util.Objects;
+
+/**
+ * Statistics about a partition created by {@link PartialSegmentGenerateTask}. Each partition is a set of data
+ * of the same time chunk (primary partition key) and the same {@link ShardSpec} (secondary partition key). This class
+ * holds the statistics of a single partition created by a task.
+ */
+public class GenericPartitionStat extends PartitionStat<ShardSpec>
+{
+  private static final String PROP_SHARD_SPEC = "shardSpec";
+
+  // Secondary partition key
+  private final ShardSpec shardSpec;
+
+  @JsonCreator
+  public GenericPartitionStat(
+      @JsonProperty("taskExecutorHost") String taskExecutorHost,
+      @JsonProperty("taskExecutorPort") int taskExecutorPort,
+      @JsonProperty("useHttps") boolean useHttps,
+      @JsonProperty("interval") Interval interval,
+      @JsonProperty(PROP_SHARD_SPEC) ShardSpec shardSpec,
+      @JsonProperty("numRows") @Nullable Integer numRows,
+      @JsonProperty("sizeBytes") @Nullable Long sizeBytes
+  )
+  {
+    super(taskExecutorHost, taskExecutorPort, useHttps, interval, numRows, sizeBytes);
+    this.shardSpec = shardSpec;
+  }
+
+  @Override
+  public int getPartitionId()
+  {
+    return shardSpec.getPartitionNum();
+  }
+
+  @JsonProperty(PROP_SHARD_SPEC)
+  @Override
+  ShardSpec getSecondaryPartition()
+  {
+    return shardSpec;
+  }
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    if (!super.equals(o)) {
+      return false;
+    }
+    GenericPartitionStat that = (GenericPartitionStat) o;
+    return Objects.equals(shardSpec, that.shardSpec);
+  }
+
+  @Override
+  public int hashCode()
+  {
+    return Objects.hash(super.hashCode(), shardSpec);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 28bfc7c421b1..d2e94a675e9b 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -27,6 +27,8 @@
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.Multimap;
 import org.apache.druid.client.indexing.IndexingServiceClient;
 import org.apache.druid.data.input.FiniteFirehoseFactory;
 import org.apache.druid.data.input.InputFormat;
@@ -36,6 +38,7 @@
 import org.apache.druid.indexer.TaskStatus;
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
 import org.apache.druid.indexing.common.Counters;
 import org.apache.druid.indexing.common.TaskLock;
@@ -56,6 +59,10 @@
 import org.apache.druid.indexing.common.task.TaskResource;
 import org.apache.druid.indexing.common.task.Tasks;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.Pair;
@@ -75,6 +82,7 @@
 import org.apache.druid.server.security.AuthorizerMapper;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.partition.NumberedShardSpec;
+import org.apache.druid.utils.CollectionUtils;
 import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
 import org.joda.time.DateTime;
 import org.joda.time.Interval;
@@ -93,6 +101,7 @@
 import javax.ws.rs.core.Response.Status;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -298,6 +307,36 @@ PartialHashSegmentGenerateParallelIndexTaskRunner createPartialHashSegmentGenera
     );
   }
 
+  @VisibleForTesting
+  PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistributionRunner(TaskToolbox toolbox)
+  {
+    return new PartialDimensionDistributionParallelIndexTaskRunner(
+        toolbox,
+        getId(),
+        getGroupId(),
+        ingestionSchema,
+        getContext(),
+        indexingServiceClient
+    );
+  }
+
+  @VisibleForTesting
+  PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
+      TaskToolbox toolbox,
+      Map<Interval, String[]> intervalToPartitions
+  )
+  {
+    return new PartialRangeSegmentGenerateParallelIndexTaskRunner(
+       toolbox,
+       getId(),
+       getGroupId(),
+       ingestionSchema,
+       getContext(),
+       indexingServiceClient,
+       intervalToPartitions
+    );
+  }
+
   @VisibleForTesting
   PartialHashSegmentMergeParallelIndexTaskRunner createPartialHashSegmentMergeRunner(
       TaskToolbox toolbox,
@@ -316,6 +355,24 @@ PartialHashSegmentMergeParallelIndexTaskRunner createPartialHashSegmentMergeRunn
     );
   }
 
+  @VisibleForTesting
+  PartialGenericSegmentMergeParallelIndexTaskRunner createPartialGenericSegmentMergeRunner(
+      TaskToolbox toolbox,
+      List<PartialGenericSegmentMergeIOConfig> ioConfigs
+  )
+  {
+    return new PartialGenericSegmentMergeParallelIndexTaskRunner(
+        toolbox,
+        getId(),
+        getGroupId(),
+        getIngestionSchema().getDataSchema(),
+        ioConfigs,
+        getIngestionSchema().getTuningConfig(),
+        getContext(),
+        indexingServiceClient
+    );
+  }
+
   @Override
   public boolean isReady(TaskActionClient taskActionClient) throws Exception
   {
@@ -471,11 +528,9 @@ private TaskStatus runSinglePhaseParallel(TaskToolbox toolbox) throws Exception
    */
   private TaskStatus runMultiPhaseParallel(TaskToolbox toolbox) throws Exception
   {
-    if (useHashPartitions()) {
-      return runHashPartitionMultiPhaseParallel(toolbox);
-    } else {
-      throw new UnsupportedOperationException("hash partition required");
-    }
+    return useHashPartitions()
+           ? runHashPartitionMultiPhaseParallel(toolbox)
+           : runRangePartitionMultiPhaseParallel(toolbox);
   }
 
   private boolean useHashPartitions()
@@ -519,6 +574,101 @@ private TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throw
     return TaskStatus.fromCode(getId(), state);
   }
 
+  private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception
+  {
+    assertDataSketchesAvailable();
+
+    ParallelIndexTaskRunner<PartialDimensionDistributionTask, DimensionDistributionReport> distributionRunner =
+        createRunner(
+            toolbox,
+            this::createPartialDimensionDistributionRunner
+        );
+
+    TaskState distributionState = runNextPhase(distributionRunner);
+    if (distributionState.isFailure()) {
+      return TaskStatus.failure(getId());
+    }
+
+    Map<Interval, String[]> intervalToPartitions =
+        determineAllRangePartitions(distributionRunner.getReports().values());
+
+    if (intervalToPartitions.isEmpty()) {
+      String msg = "No valid rows for single dimension partitioning."
+          + " All rows may have invalid timestamps or multiple dimension values.";
+      LOG.warn(msg);
+      return TaskStatus.success(getId(), msg);
+    }
+
+    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>> indexingRunner =
+        createRunner(toolbox, tb -> createPartialRangeSegmentGenerateRunner(tb, intervalToPartitions));
+
+    TaskState indexingState = runNextPhase(indexingRunner);
+    if (indexingState.isFailure()) {
+      return TaskStatus.failure(getId());
+    }
+
+    // partition (interval, partitionId) -> partition locations
+    Map<Pair<Interval, Integer>, List<GenericPartitionLocation>> partitionToLocations =
+        groupGenericPartitionLocationsPerPartition(indexingRunner.getReports());
+    final List<PartialGenericSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(
+        ingestionSchema.getTuningConfig().getTotalNumMergeTasks(),
+        partitionToLocations
+    );
+
+    ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(
+        toolbox,
+        tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs)
+    );
+    TaskState mergeState = runNextPhase(mergeRunner);
+    if (mergeState.isSuccess()) {
+      publishSegments(toolbox, mergeRunner.getReports());
+    }
+
+    return TaskStatus.fromCode(getId(), mergeState);
+  }
+
+  private static void assertDataSketchesAvailable()
+  {
+    try {
+      //noinspection ResultOfObjectAllocationIgnored
+      new StringSketch();
+    }
+    catch (Exception e) {
+      throw new ISE(e, "DataSketches is unvailable. Try loading the druid-datasketches extension.");
+    }
+  }
+
+  private Map<Interval, String[]> determineAllRangePartitions(Collection<DimensionDistributionReport> reports)
+  {
+    Multimap<Interval, StringDistribution> intervalToDistributions = ArrayListMultimap.create();
+    reports.forEach(report -> {
+      Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
+      intervalToDistribution.forEach(intervalToDistributions::put);
+    });
+
+    return CollectionUtils.mapValues(intervalToDistributions.asMap(), this::determineRangePartition);
+  }
+
+  private String[] determineRangePartition(Collection<StringDistribution> distributions)
+  {
+    StringDistributionMerger distributionMerger = new StringSketchMerger();
+    distributions.forEach(distributionMerger::merge);
+    StringDistribution mergedDistribution = distributionMerger.getResult();
+
+    SingleDimensionPartitionsSpec partitionsSpec =
+        (SingleDimensionPartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
+
+    final String[] partitions;
+    Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
+    if (targetRowsPerSegment == null) {
+      partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
+    } else {
+      partitions = mergedDistribution.getEvenPartitionsByTargetSize(targetRowsPerSegment);
+    }
+
+    return partitions;
+  }
+
   private static Map<Pair<Interval, Integer>, List<HashPartitionLocation>> groupHashPartitionLocationsPerPartition(
       Map<String, GeneratedHashPartitionsReport> subTaskIdToReport
   )
@@ -537,6 +687,24 @@ private static Map<Pair<Interval, Integer>, List<HashPartitionLocation>> groupHa
     return groupPartitionLocationsPerPartition(subTaskIdToReport, createPartitionLocationFunction);
   }
 
+  private static Map<Pair<Interval, Integer>, List<GenericPartitionLocation>> groupGenericPartitionLocationsPerPartition(
+      Map<String, GeneratedPartitionsReport<GenericPartitionStat>> subTaskIdToReport
+  )
+  {
+    BiFunction<String, GenericPartitionStat, GenericPartitionLocation> createPartitionLocationFunction =
+        (subtaskId, partitionStat) ->
+            new GenericPartitionLocation(
+                partitionStat.getTaskExecutorHost(),
+                partitionStat.getTaskExecutorPort(),
+                partitionStat.isUseHttps(),
+                subtaskId,
+                partitionStat.getInterval(),
+                partitionStat.getSecondaryPartition()
+            );
+
+    return groupPartitionLocationsPerPartition(subTaskIdToReport, createPartitionLocationFunction);
+  }
+
   private static <S extends PartitionStat, L extends PartitionLocation>
         Map<Pair<Interval, Integer>, List<L>> groupPartitionLocationsPerPartition(
       Map<String, ? extends GeneratedPartitionsReport<S>> subTaskIdToReport,
@@ -572,6 +740,18 @@ private static List<PartialHashSegmentMergeIOConfig> createHashMergeIOConfigs(
     );
   }
 
+  private static List<PartialGenericSegmentMergeIOConfig> createGenericMergeIOConfigs(
+      int totalNumMergeTasks,
+      Map<Pair<Interval, Integer>, List<GenericPartitionLocation>> partitionToLocations
+  )
+  {
+    return createMergeIOConfigs(
+        totalNumMergeTasks,
+        partitionToLocations,
+        PartialGenericSegmentMergeIOConfig::new
+    );
+  }
+
   private static <M extends PartialSegmentMergeIOConfig, L extends PartitionLocation> List<M> createMergeIOConfigs(
       int totalNumMergeTasks,
       Map<Pair<Interval, Integer>, List<L>> partitionToLocations,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionParallelIndexTaskRunner.java
new file mode 100644
index 000000000000..239976b77caa
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionParallelIndexTaskRunner.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.data.input.InputSplit;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+
+import java.util.Map;
+
+/**
+ * {@link ParallelIndexTaskRunner} for the phase to determine distribution of dimension values in
+ * multi-phase parallel indexing.
+ */
+class PartialDimensionDistributionParallelIndexTaskRunner
+    extends InputSourceSplitParallelIndexTaskRunner<PartialDimensionDistributionTask, DimensionDistributionReport>
+{
+  // For tests
+  private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
+
+  PartialDimensionDistributionParallelIndexTaskRunner(
+      TaskToolbox toolbox,
+      String taskId,
+      String groupId,
+      ParallelIndexIngestionSpec ingestionSchema,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient
+  )
+  {
+    this(
+        toolbox,
+        taskId,
+        groupId,
+        ingestionSchema,
+        context,
+        indexingServiceClient,
+        null
+    );
+  }
+
+  @VisibleForTesting
+  PartialDimensionDistributionParallelIndexTaskRunner(
+      TaskToolbox toolbox,
+      String taskId,
+      String groupId,
+      ParallelIndexIngestionSpec ingestionSchema,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient,
+      IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory
+  )
+  {
+    super(
+        toolbox,
+        taskId,
+        groupId,
+        ingestionSchema,
+        context,
+        indexingServiceClient
+    );
+    this.taskClientFactory = taskClientFactory;
+  }
+
+  @Override
+  public String getName()
+  {
+    return PartialDimensionDistributionTask.TYPE;
+  }
+
+  @Override
+  SubTaskSpec<PartialDimensionDistributionTask> createSubTaskSpec(
+      String id,
+      String groupId,
+      String supervisorTaskId,
+      Map<String, Object> context,
+      InputSplit split,
+      ParallelIndexIngestionSpec subTaskIngestionSpec,
+      IndexingServiceClient indexingServiceClient
+  )
+  {
+    return new SubTaskSpec<PartialDimensionDistributionTask>(
+        id,
+        groupId,
+        supervisorTaskId,
+        context,
+        split
+    )
+    {
+      @Override
+      public PartialDimensionDistributionTask newSubTask(int numAttempts)
+      {
+        return new PartialDimensionDistributionTask(
+            null,
+            getGroupId(),
+            null,
+            getSupervisorTaskId(),
+            numAttempts,
+            subTaskIngestionSpec,
+            getContext(),
+            getIndexingServiceClient(),
+            taskClientFactory
+        );
+      }
+    };
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
new file mode 100644
index 000000000000..508b5c8615ed
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -0,0 +1,448 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.hash.BloomFilter;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.data.input.HandlingInputRowIterator;
+import org.apache.druid.data.input.InputFormat;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.InputRowSchema;
+import org.apache.druid.data.input.InputSource;
+import org.apache.druid.data.input.InputSourceReader;
+import org.apache.druid.indexer.TaskStatus;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.actions.TaskActionClient;
+import org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTuple;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTupleFactory;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.TimeDimTupleFunnel;
+import org.apache.druid.indexing.common.task.batch.parallel.iterator.IndexTaskInputRowIteratorBuilder;
+import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
+import org.apache.druid.java.util.common.granularity.Granularity;
+import org.apache.druid.java.util.common.logger.Logger;
+import org.apache.druid.java.util.common.parsers.CloseableIterator;
+import org.apache.druid.java.util.common.parsers.ParseException;
+import org.apache.druid.query.aggregation.AggregatorFactory;
+import org.apache.druid.segment.indexing.DataSchema;
+import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+import org.joda.time.DateTime;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * The worker task of {@link PartialDimensionDistributionParallelIndexTaskRunner}. This task
+ * determines the distribution of dimension values of input data.
+ */
+
+public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
+{
+  public static final String TYPE = "partial_dimension_distribution";
+  private static final Logger LOG = new Logger(PartialDimensionDistributionTask.class);
+
+  private final int numAttempts;
+  private final ParallelIndexIngestionSpec ingestionSchema;
+  private final String supervisorTaskId;
+  private final IndexingServiceClient indexingServiceClient;
+  private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
+
+  // For testing
+  private final Supplier<UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier;
+
+  @JsonCreator
+  PartialDimensionDistributionTask(
+      // id shouldn't be null except when this task is created by ParallelIndexSupervisorTask
+      @JsonProperty("id") @Nullable String id,
+      @JsonProperty("groupId") final String groupId,
+      @JsonProperty("resource") final TaskResource taskResource,
+      @JsonProperty("supervisorTaskId") final String supervisorTaskId,
+      @JsonProperty("numAttempts") final int numAttempts, // zero-based counting
+      @JsonProperty("spec") final ParallelIndexIngestionSpec ingestionSchema,
+      @JsonProperty("context") final Map<String, Object> context,
+      @JacksonInject IndexingServiceClient indexingServiceClient,
+      @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory
+  )
+  {
+    this(
+        id,
+        groupId,
+        taskResource,
+        supervisorTaskId,
+        numAttempts,
+        ingestionSchema,
+        context,
+        indexingServiceClient,
+        taskClientFactory,
+        () -> new UngroupedRowDimensionValueFilter(
+            ingestionSchema.getDataSchema().getGranularitySpec().getQueryGranularity()
+        )
+    );
+  }
+
+  @VisibleForTesting  // Only for testing
+  PartialDimensionDistributionTask(
+      @Nullable String id,
+      final String groupId,
+      final TaskResource taskResource,
+      final String supervisorTaskId,
+      final int numAttempts,
+      final ParallelIndexIngestionSpec ingestionSchema,
+      final Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient,
+      IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
+      Supplier<UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier
+  )
+  {
+    super(
+        getOrMakeId(id, TYPE, ingestionSchema.getDataSchema().getDataSource()),
+        groupId,
+        taskResource,
+        ingestionSchema.getDataSchema(),
+        ingestionSchema.getTuningConfig(),
+        context
+    );
+
+    Preconditions.checkArgument(
+        ingestionSchema.getTuningConfig().getPartitionsSpec() instanceof SingleDimensionPartitionsSpec,
+        "%s partitionsSpec required",
+        SingleDimensionPartitionsSpec.NAME
+    );
+
+    this.numAttempts = numAttempts;
+    this.ingestionSchema = ingestionSchema;
+    this.supervisorTaskId = supervisorTaskId;
+    this.indexingServiceClient = indexingServiceClient;
+    this.taskClientFactory = taskClientFactory;
+    this.ungroupedRowDimValueFilterSupplier = ungroupedRowDimValueFilterSupplier;
+  }
+
+  @JsonProperty
+  private int getNumAttempts()
+  {
+    return numAttempts;
+  }
+
+  @JsonProperty("spec")
+  private ParallelIndexIngestionSpec getIngestionSchema()
+  {
+    return ingestionSchema;
+  }
+
+  @JsonProperty
+  private String getSupervisorTaskId()
+  {
+    return supervisorTaskId;
+  }
+
+  @Override
+  public String getType()
+  {
+    return TYPE;
+  }
+
+  @Override
+  public boolean isReady(TaskActionClient taskActionClient) throws Exception
+  {
+    return tryTimeChunkLock(
+        taskActionClient,
+        getIngestionSchema().getDataSchema().getGranularitySpec().inputIntervals()
+    );
+  }
+
+  @Override
+  public TaskStatus runTask(TaskToolbox toolbox) throws Exception
+  {
+    DataSchema dataSchema = ingestionSchema.getDataSchema();
+    GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
+    ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
+
+    SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) tuningConfig.getPartitionsSpec();
+    Preconditions.checkNotNull(partitionsSpec);
+    String partitionDimension = partitionsSpec.getPartitionDimension();
+    Preconditions.checkNotNull(partitionDimension, "partitionDimension required");
+    boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped();
+
+    InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(
+        ingestionSchema.getDataSchema().getParser()
+    );
+    List<String> metricsNames = Arrays.stream(dataSchema.getAggregators())
+                                      .map(AggregatorFactory::getName)
+                                      .collect(Collectors.toList());
+    InputFormat inputFormat = ParallelIndexSupervisorTask.getInputFormat(ingestionSchema);
+    InputSourceReader inputSourceReader = dataSchema.getTransformSpec().decorate(
+        inputSource.reader(
+            new InputRowSchema(
+                dataSchema.getTimestampSpec(),
+                dataSchema.getDimensionsSpec(),
+                metricsNames
+            ),
+            inputFormat,
+            null
+        )
+    );
+
+    try (
+        CloseableIterator<InputRow> inputRowIterator = inputSourceReader.read();
+        HandlingInputRowIterator iterator = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimension)
+            .delegate(inputRowIterator)
+            .granularitySpec(granularitySpec)
+            .nullRowRunnable(IndexTaskInputRowIteratorBuilder.NOOP_RUNNABLE)
+            .absentBucketIntervalConsumer(IndexTaskInputRowIteratorBuilder.NOOP_CONSUMER)
+            .build()
+    ) {
+      Map<Interval, StringDistribution> distribution = determineDistribution(
+          iterator,
+          granularitySpec,
+          partitionDimension,
+          isAssumeGrouped,
+          tuningConfig.isLogParseExceptions(),
+          tuningConfig.getMaxParseExceptions()
+      );
+      sendReport(new DimensionDistributionReport(getId(), distribution));
+    }
+
+    return TaskStatus.success(getId());
+  }
+
+  private Map<Interval, StringDistribution> determineDistribution(
+      HandlingInputRowIterator inputRowIterator,
+      GranularitySpec granularitySpec,
+      String partitionDimension,
+      boolean isAssumeGrouped,
+      boolean isLogParseExceptions,
+      long maxParseExceptions
+  )
+  {
+    Map<Interval, StringDistribution> intervalToDistribution = new HashMap<>();
+    DimensionValueFilter dimValueFilter =
+        isAssumeGrouped
+        ? new GroupedRowDimensionValueFilter()
+        : ungroupedRowDimValueFilterSupplier.get();
+
+    long numParseExceptions = 0;
+
+    while (inputRowIterator.hasNext()) {
+      try {
+        InputRow inputRow = inputRowIterator.next();
+        if (inputRow == null) {
+          continue;
+        }
+
+        DateTime timestamp = inputRow.getTimestamp();
+
+        //noinspection OptionalGetWithoutIsPresent (InputRowIterator returns rows with present intervals)
+        Interval interval = granularitySpec.bucketInterval(timestamp).get();
+        StringDistribution stringDistribution =
+            intervalToDistribution.computeIfAbsent(interval, k -> new StringSketch());
+
+        String dimensionValue = dimValueFilter.accept(
+            interval,
+            timestamp,
+            inputRow.getDimension(partitionDimension).get(0)
+        );
+
+        if (dimensionValue != null) {
+          stringDistribution.put(dimensionValue);
+        }
+      }
+      catch (ParseException e) {
+        if (isLogParseExceptions) {
+          LOG.error(e, "Encountered parse exception:");
+        }
+
+        numParseExceptions++;
+        if (numParseExceptions > maxParseExceptions) {
+          throw new RuntimeException("Max parse exceptions exceeded, terminating task...");
+        }
+      }
+    }
+
+    // UngroupedDimValueFilter may not accept the min/max dimensionValue. If needed, add the min/max
+    // values to the distributions so they have an accurate min/max.
+    dimValueFilter.getIntervalToMinDimensionValue()
+                  .forEach((interval, min) -> intervalToDistribution.get(interval).putIfNewMin(min));
+    dimValueFilter.getIntervalToMaxDimensionValue()
+                  .forEach((interval, max) -> intervalToDistribution.get(interval).putIfNewMax(max));
+
+    return intervalToDistribution;
+  }
+
+  private void sendReport(DimensionDistributionReport report)
+  {
+    final ParallelIndexSupervisorTaskClient taskClient = taskClientFactory.build(
+        new ClientBasedTaskInfoProvider(indexingServiceClient),
+        getId(),
+        1, // always use a single http thread
+        ingestionSchema.getTuningConfig().getChatHandlerTimeout(),
+        ingestionSchema.getTuningConfig().getChatHandlerNumRetries()
+    );
+    taskClient.report(supervisorTaskId, report);
+  }
+
+  private interface DimensionValueFilter
+  {
+    /**
+     * @return Dimension value if it should be accepted, else null
+     */
+    @Nullable
+    String accept(Interval interval, DateTime timestamp, String dimesionValue);
+
+    /**
+     * @return Minimum dimension value for each interval processed so far.
+     */
+    Map<Interval, String> getIntervalToMinDimensionValue();
+
+    /**
+     * @return Maximum dimension value for each interval processed so far.
+     */
+    Map<Interval, String> getIntervalToMaxDimensionValue();
+  }
+
+  @VisibleForTesting
+  static class UngroupedRowDimensionValueFilter implements DimensionValueFilter
+  {
+    // A bloom filter is used to approximately group rows by query granularity. These values assume
+    // time chunks have fewer than BLOOM_FILTER_EXPECTED_INSERTIONS rows. With the below values, the
+    // Bloom filter will use about 170MB of memory.
+    //
+    // For more details on the Bloom filter memory consumption:
+    // https://github.com/google/guava/issues/2520#issuecomment-231233736
+    private static final int BLOOM_FILTER_EXPECTED_INSERTIONS = 100_000_000;
+    private static final double BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY = 0.001;
+
+    private final GroupedRowDimensionValueFilter delegate;
+    private final TimeDimTupleFactory timeDimTupleFactory;
+    private final BloomFilter<TimeDimTuple> timeDimTupleBloomFilter;
+
+    UngroupedRowDimensionValueFilter(Granularity queryGranularity)
+    {
+      this(queryGranularity, BLOOM_FILTER_EXPECTED_INSERTIONS, BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY);
+    }
+
+    @VisibleForTesting  // to allow controlling false positive rate of bloom filter
+    UngroupedRowDimensionValueFilter(
+        Granularity queryGranularity,
+        int bloomFilterExpectedInsertions,
+        double bloomFilterFalsePositiveProbability
+    )
+    {
+      delegate = new GroupedRowDimensionValueFilter();
+      timeDimTupleFactory = new TimeDimTupleFactory(queryGranularity);
+      timeDimTupleBloomFilter = BloomFilter.create(
+          TimeDimTupleFunnel.INSTANCE,
+          bloomFilterExpectedInsertions,
+          bloomFilterFalsePositiveProbability
+      );
+    }
+
+    @Nullable
+    @Override
+    public String accept(Interval interval, DateTime timestamp, String dimensionValue)
+    {
+      delegate.accept(interval, timestamp, dimensionValue);
+
+      TimeDimTuple timeDimTuple = timeDimTupleFactory.createWithBucketedTimestamp(timestamp, dimensionValue);
+      if (timeDimTupleBloomFilter.mightContain(timeDimTuple)) {
+        return null;
+      } else {
+        timeDimTupleBloomFilter.put(timeDimTuple);
+        return dimensionValue;
+      }
+    }
+
+    @Override
+    public Map<Interval, String> getIntervalToMinDimensionValue()
+    {
+      return delegate.getIntervalToMinDimensionValue();
+    }
+
+    @Override
+    public Map<Interval, String> getIntervalToMaxDimensionValue()
+    {
+      return delegate.getIntervalToMaxDimensionValue();
+    }
+  }
+
+  private static class GroupedRowDimensionValueFilter implements DimensionValueFilter
+  {
+    private final Map<Interval, String> intervalToMinDimensionValue;
+    private final Map<Interval, String> intervalToMaxDimensionValue;
+
+    GroupedRowDimensionValueFilter()
+    {
+      this.intervalToMinDimensionValue = new HashMap<>();
+      this.intervalToMaxDimensionValue = new HashMap<>();
+    }
+
+    @Override
+    @Nullable
+    public String accept(Interval interval, DateTime timestamp, String dimensionValue)
+    {
+      updateMinDimensionValue(interval, dimensionValue);
+      updateMaxDimensionValue(interval, dimensionValue);
+      return dimensionValue;
+    }
+
+    private void updateMinDimensionValue(Interval interval, String dimensionValue)
+    {
+      String minDimensionValue = intervalToMinDimensionValue.get(interval);
+      if (minDimensionValue == null || dimensionValue.compareTo(minDimensionValue) < 0) {
+        intervalToMinDimensionValue.put(interval, dimensionValue);
+      }
+    }
+
+    private void updateMaxDimensionValue(Interval interval, String dimensionValue)
+    {
+      String maxDimensionValue = intervalToMaxDimensionValue.get(interval);
+      if (maxDimensionValue == null || dimensionValue.compareTo(maxDimensionValue) > 0) {
+        intervalToMaxDimensionValue.put(interval, dimensionValue);
+      }
+    }
+
+    @Override
+    public Map<Interval, String> getIntervalToMinDimensionValue()
+    {
+      return intervalToMinDimensionValue;
+    }
+
+    @Override
+    public Map<Interval, String> getIntervalToMaxDimensionValue()
+    {
+      return intervalToMaxDimensionValue;
+    }
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfig.java
new file mode 100644
index 000000000000..bbec73f9a446
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfig.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import org.apache.druid.segment.indexing.IOConfig;
+
+import java.util.List;
+
+@JsonTypeName(PartialGenericSegmentMergeTask.TYPE)
+class PartialGenericSegmentMergeIOConfig extends PartialSegmentMergeIOConfig<GenericPartitionLocation>
+    implements IOConfig
+{
+  @JsonCreator
+  PartialGenericSegmentMergeIOConfig(
+      @JsonProperty("partitionLocations") List<GenericPartitionLocation> partitionLocations
+  )
+  {
+    super(partitionLocations);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpec.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpec.java
new file mode 100644
index 000000000000..52edad6e1e91
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpec.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.druid.segment.indexing.DataSchema;
+
+class PartialGenericSegmentMergeIngestionSpec
+    extends PartialSegmentMergeIngestionSpec<PartialGenericSegmentMergeIOConfig>
+{
+  @JsonCreator
+  PartialGenericSegmentMergeIngestionSpec(
+      @JsonProperty("dataSchema") DataSchema dataSchema,
+      @JsonProperty("ioConfig") PartialGenericSegmentMergeIOConfig ioConfig,
+      @JsonProperty("tuningConfig") ParallelIndexTuningConfig tuningConfig
+  )
+  {
+    super(dataSchema, ioConfig, tuningConfig);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
new file mode 100644
index 000000000000..e53b1d22451a
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.data.input.InputSplit;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.segment.indexing.DataSchema;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link ParallelIndexTaskRunner} for the phase to merge partitioned segments in multi-phase parallel indexing.
+ */
+class PartialGenericSegmentMergeParallelIndexTaskRunner
+    extends ParallelIndexPhaseRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport>
+{
+  private final DataSchema dataSchema;
+  private final List<PartialGenericSegmentMergeIOConfig> mergeIOConfigs;
+
+  PartialGenericSegmentMergeParallelIndexTaskRunner(
+      TaskToolbox toolbox,
+      String taskId,
+      String groupId,
+      DataSchema dataSchema,
+      List<PartialGenericSegmentMergeIOConfig> mergeIOConfigs,
+      ParallelIndexTuningConfig tuningConfig,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient
+  )
+  {
+    super(toolbox, taskId, groupId, tuningConfig, context, indexingServiceClient);
+
+    this.dataSchema = dataSchema;
+    this.mergeIOConfigs = mergeIOConfigs;
+  }
+
+  @Override
+  public String getName()
+  {
+    return PartialGenericSegmentMergeTask.TYPE;
+  }
+
+  @Override
+  Iterator<SubTaskSpec<PartialGenericSegmentMergeTask>> subTaskSpecIterator()
+  {
+    return mergeIOConfigs.stream().map(this::newTaskSpec).iterator();
+  }
+
+  @Override
+  int getTotalNumSubTasks()
+  {
+    return mergeIOConfigs.size();
+  }
+
+  @VisibleForTesting
+  SubTaskSpec<PartialGenericSegmentMergeTask> newTaskSpec(PartialGenericSegmentMergeIOConfig ioConfig)
+  {
+    final PartialGenericSegmentMergeIngestionSpec ingestionSpec =
+        new PartialGenericSegmentMergeIngestionSpec(
+            dataSchema,
+            ioConfig,
+            getTuningConfig()
+        );
+    return new SubTaskSpec<PartialGenericSegmentMergeTask>(
+        getTaskId() + "_" + getAndIncrementNextSpecId(),
+        getGroupId(),
+        getTaskId(),
+        getContext(),
+        new InputSplit<>(ioConfig.getPartitionLocations())
+    )
+    {
+      @Override
+      public PartialGenericSegmentMergeTask newSubTask(int numAttempts)
+      {
+        return new PartialGenericSegmentMergeTask(
+            null,
+            getGroupId(),
+            null,
+            getSupervisorTaskId(),
+            numAttempts,
+            ingestionSpec,
+            getContext(),
+            null,
+            null,
+            null
+        );
+      }
+    };
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
new file mode 100644
index 000000000000..559a9b5317ef
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.HashBasedTable;
+import com.google.common.collect.Table;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.guice.annotations.EscalatedClient;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.java.util.http.client.HttpClient;
+import org.apache.druid.timeline.partition.ShardSpec;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link ParallelIndexTaskRunner} for the phase to merge generic partitioned segments in multi-phase parallel indexing.
+ */
+public class PartialGenericSegmentMergeTask extends PartialSegmentMergeTask<ShardSpec, GenericPartitionLocation>
+{
+  public static final String TYPE = "partial_index_generic_merge";
+
+  private final PartialGenericSegmentMergeIngestionSpec ingestionSchema;
+  private final Table<Interval, Integer, ShardSpec> createIntervalAndIntegerToShardSpec;
+
+  @JsonCreator
+  public PartialGenericSegmentMergeTask(
+      // id shouldn't be null except when this task is created by ParallelIndexSupervisorTask
+      @JsonProperty("id") @Nullable String id,
+      @JsonProperty("groupId") final String groupId,
+      @JsonProperty("resource") final TaskResource taskResource,
+      @JsonProperty("supervisorTaskId") final String supervisorTaskId,
+      @JsonProperty("numAttempts") final int numAttempts, // zero-based counting
+      @JsonProperty("spec") final PartialGenericSegmentMergeIngestionSpec ingestionSchema,
+      @JsonProperty("context") final Map<String, Object> context,
+      @JacksonInject IndexingServiceClient indexingServiceClient,
+      @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
+      @JacksonInject @EscalatedClient HttpClient shuffleClient
+  )
+  {
+    super(
+        getOrMakeId(id, TYPE, ingestionSchema.getDataSchema().getDataSource()),
+        groupId,
+        taskResource,
+        supervisorTaskId,
+        ingestionSchema.getDataSchema(),
+        ingestionSchema.getIOConfig(),
+        ingestionSchema.getTuningConfig(),
+        numAttempts,
+        context,
+        indexingServiceClient,
+        taskClientFactory,
+        shuffleClient
+    );
+
+    this.ingestionSchema = ingestionSchema;
+    this.createIntervalAndIntegerToShardSpec = createIntervalAndIntegerToShardSpec(
+        ingestionSchema.getIOConfig().getPartitionLocations()
+    );
+  }
+
+  private static Table<Interval, Integer, ShardSpec> createIntervalAndIntegerToShardSpec(
+      List<GenericPartitionLocation> partitionLocations
+  )
+  {
+    Table<Interval, Integer, ShardSpec> intervalAndIntegerToShardSpec = HashBasedTable.create();
+
+    partitionLocations.forEach(
+        p -> intervalAndIntegerToShardSpec.put(p.getInterval(), p.getPartitionId(), p.getShardSpec())
+    );
+
+    return intervalAndIntegerToShardSpec;
+  }
+
+  @JsonProperty("spec")
+  private PartialGenericSegmentMergeIngestionSpec getIngestionSchema()
+  {
+    return ingestionSchema;
+  }
+
+  @Override
+  public String getType()
+  {
+    return TYPE;
+  }
+
+  @Override
+  ShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionNum)
+  {
+    return createIntervalAndIntegerToShardSpec.get(interval, partitionNum);
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
new file mode 100644
index 000000000000..57002a8311c4
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.data.input.InputSplit;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
+import org.joda.time.Interval;
+
+import java.util.Map;
+
+/**
+ * {@link ParallelIndexTaskRunner} for the phase to create range partitioned segments in multi-phase parallel indexing.
+ *
+ * @see PartialHashSegmentMergeParallelIndexTaskRunner
+ */
+class PartialRangeSegmentGenerateParallelIndexTaskRunner
+    extends InputSourceSplitParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>>
+{
+  private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
+  private final AppenderatorsManager appenderatorsManager;
+  private final Map<Interval, String[]> intervalToPartitions;
+
+  PartialRangeSegmentGenerateParallelIndexTaskRunner(
+      TaskToolbox toolbox,
+      String taskId,
+      String groupId,
+      ParallelIndexIngestionSpec ingestionSchema,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient,
+      Map<Interval, String[]> intervalToPartitions
+  )
+  {
+    this(
+        toolbox,
+        taskId,
+        groupId,
+        ingestionSchema,
+        context,
+        indexingServiceClient,
+        intervalToPartitions,
+        null,
+        null
+    );
+  }
+
+  @VisibleForTesting
+  PartialRangeSegmentGenerateParallelIndexTaskRunner(
+      TaskToolbox toolbox,
+      String taskId,
+      String groupId,
+      ParallelIndexIngestionSpec ingestionSchema,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient,
+      Map<Interval, String[]> intervalToPartitions,
+      IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
+      AppenderatorsManager appenderatorsManager
+  )
+  {
+    super(toolbox, taskId, groupId, ingestionSchema, context, indexingServiceClient);
+    this.taskClientFactory = taskClientFactory;
+    this.appenderatorsManager = appenderatorsManager;
+    this.intervalToPartitions = intervalToPartitions;
+  }
+
+  @Override
+  public String getName()
+  {
+    return PartialRangeSegmentGenerateTask.TYPE;
+  }
+
+  @Override
+  SubTaskSpec<PartialRangeSegmentGenerateTask> createSubTaskSpec(
+      String id,
+      String groupId,
+      String supervisorTaskId,
+      Map<String, Object> context,
+      InputSplit split,
+      ParallelIndexIngestionSpec subTaskIngestionSpec,
+      IndexingServiceClient indexingServiceClient
+  )
+  {
+    return new SubTaskSpec<PartialRangeSegmentGenerateTask>(
+        id,
+        groupId,
+        supervisorTaskId,
+        context,
+        split
+    )
+    {
+      @Override
+      public PartialRangeSegmentGenerateTask newSubTask(int numAttempts)
+      {
+        return new PartialRangeSegmentGenerateTask(
+            null,
+            groupId,
+            null,
+            supervisorTaskId,
+            numAttempts,
+            subTaskIngestionSpec,
+            context,
+            intervalToPartitions,
+            indexingServiceClient,
+            taskClientFactory,
+            appenderatorsManager
+        );
+      }
+    };
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
new file mode 100644
index 000000000000..8956fbee2195
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.actions.TaskActionClient;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.IndexTaskSegmentAllocator;
+import org.apache.druid.indexing.common.task.RangePartitionCachingLocalSegmentAllocator;
+import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
+import org.apache.druid.indexing.worker.ShuffleDataSegmentPusher;
+import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
+import org.apache.druid.timeline.DataSegment;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * The worker task of {@link PartialRangeSegmentGenerateParallelIndexTaskRunner}. This task
+ * partitions input data by ranges of the partition dimension specified in
+ * {@link SingleDimensionPartitionsSpec}. Partitioned segments are stored in local storage using
+ * {@link ShuffleDataSegmentPusher}.
+ */
+public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<GeneratedGenericPartitionsReport>
+{
+  public static final String TYPE = "partial_range_index_generate";
+  private static final String PROP_SPEC = "spec";
+
+  private final String supervisorTaskId;
+  private final int numAttempts;
+  private final ParallelIndexIngestionSpec ingestionSchema;
+  private final Map<Interval, String[]> intervalToPartitions;
+
+  @JsonCreator
+  public PartialRangeSegmentGenerateTask(
+      // id shouldn't be null except when this task is created by ParallelIndexSupervisorTask
+      @JsonProperty("id") @Nullable String id,
+      @JsonProperty("groupId") String groupId,
+      @JsonProperty("resource") TaskResource taskResource,
+      @JsonProperty("supervisorTaskId") String supervisorTaskId,
+      @JsonProperty("numAttempts") int numAttempts, // zero-based counting
+      @JsonProperty(PROP_SPEC) ParallelIndexIngestionSpec ingestionSchema,
+      @JsonProperty("context") Map<String, Object> context,
+      @JsonProperty("intervalToPartitions") Map<Interval, String[]> intervalToPartitions,
+      @JacksonInject IndexingServiceClient indexingServiceClient,
+      @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
+      @JacksonInject AppenderatorsManager appenderatorsManager
+  )
+  {
+    super(
+        getOrMakeId(id, TYPE, ingestionSchema.getDataSchema().getDataSource()),
+        groupId,
+        taskResource,
+        supervisorTaskId,
+        ingestionSchema,
+        context,
+        indexingServiceClient,
+        taskClientFactory,
+        appenderatorsManager,
+        new RangePartitionIndexTaskInputRowIteratorBuilder(getPartitionDimension(ingestionSchema))
+    );
+
+    this.numAttempts = numAttempts;
+    this.ingestionSchema = ingestionSchema;
+    this.supervisorTaskId = supervisorTaskId;
+    this.intervalToPartitions = intervalToPartitions;
+  }
+
+  private static String getPartitionDimension(ParallelIndexIngestionSpec ingestionSpec)
+  {
+    PartitionsSpec partitionsSpec = ingestionSpec.getTuningConfig().getPartitionsSpec();
+    Preconditions.checkArgument(
+        partitionsSpec instanceof SingleDimensionPartitionsSpec,
+        "%s partitionsSpec required",
+        SingleDimensionPartitionsSpec.NAME
+    );
+
+    SingleDimensionPartitionsSpec singleDimPartitionsSpec = (SingleDimensionPartitionsSpec) partitionsSpec;
+    String partitionDimension = singleDimPartitionsSpec.getPartitionDimension();
+    Preconditions.checkNotNull(partitionDimension, "partitionDimension required");
+
+    return partitionDimension;
+  }
+
+  @JsonProperty
+  public int getNumAttempts()
+  {
+    return numAttempts;
+  }
+
+  @JsonProperty(PROP_SPEC)
+  public ParallelIndexIngestionSpec getIngestionSchema()
+  {
+    return ingestionSchema;
+  }
+
+  @JsonProperty
+  public String getSupervisorTaskId()
+  {
+    return supervisorTaskId;
+  }
+
+  @JsonProperty
+  public Map<Interval, String[]> getIntervalToPartitions()
+  {
+    return intervalToPartitions;
+  }
+
+  @Override
+  public String getType()
+  {
+    return TYPE;
+  }
+
+  @Override
+  public boolean isReady(TaskActionClient taskActionClient)
+  {
+    return true;
+  }
+
+  @Override
+  IndexTaskSegmentAllocator createSegmentAllocator(TaskToolbox toolbox) throws IOException
+  {
+    return new RangePartitionCachingLocalSegmentAllocator(
+        toolbox,
+        getId(),
+        getDataSource(),
+        getPartitionDimension(ingestionSchema),
+        intervalToPartitions
+    );
+  }
+
+  @Override
+  GeneratedGenericPartitionsReport createGeneratedPartitionsReport(TaskToolbox toolbox, List<DataSegment> segments)
+  {
+    List<GenericPartitionStat> partitionStats = segments.stream()
+                                                     .map(segment -> createPartitionStat(toolbox, segment))
+                                                     .collect(Collectors.toList());
+    return new GeneratedGenericPartitionsReport(getId(), partitionStats);
+  }
+
+  private GenericPartitionStat createPartitionStat(TaskToolbox toolbox, DataSegment segment)
+  {
+    return new GenericPartitionStat(
+        toolbox.getTaskExecutorNode().getHost(),
+        toolbox.getTaskExecutorNode().getPortToUse(),
+        toolbox.getTaskExecutorNode().isEnableTlsPort(),
+        segment.getInterval(),
+        segment.getShardSpec(),
+        null, // numRows is not supported yet
+        null  // sizeBytes is not supported yet
+    );
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
index e60423533dfa..8cc6db91e94e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
@@ -30,7 +30,9 @@
 @JsonTypeInfo(use = Id.NAME, property = "type", defaultImpl = PushedSegmentsReport.class)
 @JsonSubTypes(value = {
     @Type(name = PushedSegmentsReport.TYPE, value = PushedSegmentsReport.class),
-    @Type(name = GeneratedHashPartitionsReport.TYPE, value = GeneratedHashPartitionsReport.class)
+    @Type(name = GeneratedHashPartitionsReport.TYPE, value = GeneratedHashPartitionsReport.class),
+    @Type(name = DimensionDistributionReport.TYPE, value = DimensionDistributionReport.class),
+    @Type(name = GeneratedGenericPartitionsReport.TYPE, value = GeneratedGenericPartitionsReport.class)
 })
 public interface SubTaskReport
 {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
new file mode 100644
index 000000000000..643a1a8276e8
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * Counts frequencies of {@link String}s.
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
+@JsonSubTypes({
+    @JsonSubTypes.Type(name = StringSketch.NAME, value = StringSketch.class)
+})
+public interface StringDistribution
+{
+  /**
+   * Record occurence of {@link String}
+   */
+  void put(String element);
+
+  /**
+   * Record occurence of {@link String} if it will become the new minimum element.
+   */
+  void putIfNewMin(String element);
+
+  /**
+   * Record occurence of {@link String} if it will become the new maximum element;
+   */
+  void putIfNewMax(String element);
+
+  /**
+   * Split the distribution in the fewest number of evenly-sized partitions while honoring a max
+   * partition size.
+   *
+   * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
+   * sorted elements.
+   */
+  String[] getEvenPartitionsByMaxSize(int maxSize);
+
+  /**
+   * Split the distribution in the fewest number of evenly-sized partitions while honoring a target
+   * partition size (actual partition sizes may be slightly lower or higher).
+   *
+   * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
+   * sorted elements.
+   */
+  String[] getEvenPartitionsByTargetSize(int targetSize);
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistributionMerger.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistributionMerger.java
new file mode 100644
index 000000000000..f35fd33a792e
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistributionMerger.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+/**
+ * Merges {@link StringDistribution}s.
+ */
+public interface StringDistributionMerger
+{
+  /**
+   * Merge distribution.
+   */
+  void merge(StringDistribution distribution);
+
+  /**
+   * @return Merged distributions.
+   */
+  StringDistribution getResult();
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
new file mode 100644
index 000000000000..74a97b2d7537
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
+import com.fasterxml.jackson.databind.jsontype.TypeSerializer;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+import com.google.common.base.Preconditions;
+import com.yahoo.memory.Memory;
+import com.yahoo.sketches.ArrayOfStringsSerDe;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+
+import java.io.IOException;
+import java.util.Comparator;
+
+/**
+ * Counts approximate frequencies of strings.
+ */
+@JsonSerialize(using = StringSketch.Jackson.Serializer.class)
+@JsonDeserialize(using = StringSketch.Jackson.Deserializer.class)
+public class StringSketch implements StringDistribution
+{
+  static final String NAME = "sketch";
+  static final int SKETCH_K = 1 << 12;  // smallest value with normalized rank error < 0.1%; retain up to ~86k elements
+  static final Comparator<String> SKETCH_COMPARATOR = Comparator.naturalOrder();
+  private static final ArrayOfStringsSerDe ARRAY_OF_STRINGS_SERDE = new ArrayOfStringsSerDe();
+
+  private final ItemsSketch<String> delegate;
+
+  public StringSketch()
+  {
+    this(ItemsSketch.getInstance(SKETCH_K, SKETCH_COMPARATOR));
+  }
+
+  StringSketch(ItemsSketch<String> sketch)
+  {
+    this.delegate = sketch;
+  }
+
+  @Override
+  public void put(String string)
+  {
+    delegate.update(string);
+  }
+
+  @Override
+  public void putIfNewMin(String string)
+  {
+    String min = delegate.getMinValue();
+    if (min == null || string.compareTo(min) < 0) {
+      delegate.update(string);
+    }
+  }
+
+  @Override
+  public void putIfNewMax(String string)
+  {
+    String max = delegate.getMaxValue();
+    if (max == null || string.compareTo(max) > 0) {
+      delegate.update(string);
+    }
+  }
+
+  @Override
+  public String[] getEvenPartitionsByMaxSize(int maxSize)
+  {
+    Preconditions.checkArgument(maxSize > 0, "maxSize must be positive but is %s", maxSize);
+    long n = delegate.getN();
+    double delta = delegate.getNormalizedRankError(true) * n;  // account for approx distribution
+    int targetSize = Math.max(1, (int) Math.floor(maxSize - delta));  // floor() to increase chance below max size
+    int evenPartitionCount = (int) Math.ceil((double) n / targetSize);  // ceil() to increase chance below max size
+    return getEventPartitionsByCount(Math.max(1, evenPartitionCount));
+  }
+
+  @Override
+  public String[] getEvenPartitionsByTargetSize(int targetSize)
+  {
+    Preconditions.checkArgument(targetSize > 0, "targetSize must be positive but is %s", targetSize);
+    long n = delegate.getN();
+    int evenPartitionCount = Math.max(1, (int) Math.round((double) n / targetSize));
+    return getEventPartitionsByCount(evenPartitionCount);
+  }
+
+  private String[] getEventPartitionsByCount(int evenPartitionCount)
+  {
+    Preconditions.checkArgument(
+        evenPartitionCount > 0,
+        "evenPartitionCount must be positive but is %s",
+        evenPartitionCount
+    );
+    String[] partitions = delegate.getQuantiles(evenPartitionCount + 1); // add 1 since this returns endpoints
+    return (partitions == null) ? new String[0] : partitions;
+  }
+
+  @Override
+  public String toString()
+  {
+    return "StringSketch{" +
+           "delegate=" + delegate +
+           '}';
+  }
+
+  ItemsSketch<String> getDelegate()
+  {
+    return delegate;
+  }
+
+  private byte[] toByteArray()
+  {
+    return delegate.toByteArray(ARRAY_OF_STRINGS_SERDE);
+  }
+
+  static class Jackson
+  {
+    private static final String FIELD_SKETCH = "sketch";
+
+    static class Serializer extends StdSerializer<StringSketch>
+    {
+      Serializer()
+      {
+        super(StringSketch.class);
+      }
+
+      @Override
+      public void serialize(
+          StringSketch stringSketch,
+          JsonGenerator jsonGenerator,
+          SerializerProvider serializerProvider
+      ) throws IOException
+      {
+        jsonGenerator.writeBinaryField(FIELD_SKETCH, stringSketch.toByteArray());
+      }
+
+      @Override
+      public void serializeWithType(
+          StringSketch value,
+          JsonGenerator gen,
+          SerializerProvider serializers,
+          TypeSerializer typeSer
+      ) throws IOException
+      {
+        typeSer.writeTypePrefixForObject(value, gen);
+        serialize(value, gen, serializers);
+        typeSer.writeTypeSuffixForObject(value, gen);
+      }
+    }
+
+    static class Deserializer extends StdDeserializer<StringSketch>
+    {
+      Deserializer()
+      {
+        super(StringSketch.class);
+      }
+
+      @Override
+      public StringSketch deserialize(JsonParser jsonParser, DeserializationContext deserializationContext)
+          throws IOException
+      {
+        JsonNode jsonNode = jsonParser.getCodec().readTree(jsonParser);
+        byte[] sketchBytes = jsonNode.get(FIELD_SKETCH).binaryValue();
+        ItemsSketch<String> sketch = ItemsSketch.getInstance(
+            Memory.wrap(sketchBytes),
+            SKETCH_COMPARATOR,
+            ARRAY_OF_STRINGS_SERDE
+        );
+        return new StringSketch(sketch);
+      }
+    }
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMerger.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMerger.java
new file mode 100644
index 000000000000..f628f35c6694
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMerger.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.yahoo.sketches.quantiles.ItemsUnion;
+
+/**
+ * Merges {@link StringSketch}es.
+ */
+public class StringSketchMerger implements StringDistributionMerger
+{
+  private final ItemsUnion<String> delegate;
+
+  public StringSketchMerger()
+  {
+    delegate = ItemsUnion.getInstance(StringSketch.SKETCH_K, StringSketch.SKETCH_COMPARATOR);
+  }
+
+  @Override
+  public void merge(StringDistribution stringDistribution)
+  {
+    if (!(stringDistribution instanceof StringSketch)) {
+      throw new IllegalArgumentException("Only merging StringSketch instances is currently supported");
+    }
+
+    StringSketch stringSketch = (StringSketch) stringDistribution;
+    delegate.update(stringSketch.getDelegate());
+  }
+
+  @Override
+  public StringDistribution getResult()
+  {
+    return new StringSketch(delegate.getResult());
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTuple.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTuple.java
new file mode 100644
index 000000000000..1c7f5c3be12f
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTuple.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import java.util.Objects;
+
+/**
+ * Tuple of timestamp and dimension value
+ */
+public class TimeDimTuple implements Comparable<TimeDimTuple>
+{
+  private final long timestamp;
+  private final String dimensionValue;
+
+  TimeDimTuple(long timestamp, String dimensionValue)
+  {
+    this.timestamp = timestamp;
+    this.dimensionValue = dimensionValue;
+  }
+
+  public long getTimestamp()
+  {
+    return timestamp;
+  }
+
+  public String getDimensionValue()
+  {
+    return dimensionValue;
+  }
+
+  @Override
+  public int compareTo(TimeDimTuple o)
+  {
+    if (timestamp < o.timestamp) {
+      return -1;
+    }
+
+    if (o.timestamp < timestamp) {
+      return 1;
+    }
+
+    return dimensionValue.compareTo(o.dimensionValue);
+  }
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (!(o instanceof TimeDimTuple)) {
+      return false;
+    }
+    return compareTo((TimeDimTuple) o) == 0;
+  }
+
+  @Override
+  public int hashCode()
+  {
+    return Objects.hash(timestamp, dimensionValue);
+  }
+
+  @Override
+  public String toString()
+  {
+    return "TimeDimTuple{" +
+           "timestamp=" + timestamp +
+           ", dimensionValue='" + dimensionValue + '\'' +
+           '}';
+  }
+}
+
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactory.java
new file mode 100644
index 000000000000..229bbc637791
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactory.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.apache.druid.java.util.common.granularity.Granularity;
+import org.joda.time.DateTime;
+
+/**
+ * Creates {@link TimeDimTuple}s with time stamp adjust according to a {@link Granularity}.
+ */
+public class TimeDimTupleFactory
+{
+  private final Granularity granularity;
+
+  public TimeDimTupleFactory(Granularity granularity)
+  {
+    this.granularity = granularity;
+  }
+
+  public TimeDimTuple createWithBucketedTimestamp(DateTime timestamp, String dimensionValue)
+  {
+    return new TimeDimTuple(getBucketTimestamp(timestamp), dimensionValue);
+  }
+
+  private long getBucketTimestamp(DateTime dateTime)
+  {
+    return granularity.bucketStart(dateTime).getMillis();
+  }
+}
+
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnel.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnel.java
new file mode 100644
index 000000000000..050c903402a3
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnel.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.google.common.hash.Funnel;
+import com.google.common.hash.PrimitiveSink;
+
+/**
+ * Utility class for adding {@link TimeDimTuple}s to a {@link com.google.common.hash.BloomFilter}.
+ */
+public enum TimeDimTupleFunnel implements Funnel<TimeDimTuple>
+{
+  INSTANCE;
+
+  @Override
+  public void funnel(TimeDimTuple timeDimTuple, PrimitiveSink into)
+  {
+    into.putLong(timeDimTuple.getTimestamp())
+        .putUnencodedChars(timeDimTuple.getDimensionValue());
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/DefaultIndexTaskInputRowIteratorBuilder.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/DefaultIndexTaskInputRowIteratorBuilder.java
index 3a8ad8ab566c..b2a9463bf40f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/DefaultIndexTaskInputRowIteratorBuilder.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/DefaultIndexTaskInputRowIteratorBuilder.java
@@ -55,6 +55,8 @@
  * If any of the handlers invoke their respective callback, the {@link HandlingInputRowIterator} will yield
  * a null {@link InputRow} next; otherwise, the next {@link InputRow} is yielded.
  * </pre>
+ *
+ * @see RangePartitionIndexTaskInputRowIteratorBuilder
  */
 public class DefaultIndexTaskInputRowIteratorBuilder implements IndexTaskInputRowIteratorBuilder
 {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
new file mode 100644
index 000000000000..b2884b99d439
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.iterator;
+
+import org.apache.druid.data.input.HandlingInputRowIterator;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.indexing.common.task.IndexTask;
+import org.apache.druid.java.util.common.parsers.CloseableIterator;
+import org.apache.druid.java.util.common.parsers.ParseException;
+import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+
+import java.util.List;
+import java.util.function.Consumer;
+
+/**
+ * <pre>
+ * Build an {@link HandlingInputRowIterator} for {@link IndexTask}s used for range partitioning. Each {@link
+ * InputRow} is processed by the following handlers, in order:
+ *
+ *   1. Null row: If {@link InputRow} is null, invoke the null row {@link Runnable} callback.
+ *
+ *   2. Invalid timestamp: If {@link InputRow} has an invalid timestamp, throw a {@link ParseException}.
+ *
+ *   3. Absent bucket interval: If {@link InputRow} has a timestamp that does not match the
+ *      {@link GranularitySpec} bucket intervals, invoke the absent bucket interval {@link Consumer}
+ *      callback.
+ *
+ *   4. Filter for rows with only a single dimension value count for the specified partition dimension.
+ *
+ * If any of the handlers invoke their respective callback, the {@link HandlingInputRowIterator} will yield
+ * a null {@link InputRow} next; otherwise, the next {@link InputRow} is yielded.
+ * </pre>
+ *
+ * @see DefaultIndexTaskInputRowIteratorBuilder
+ */
+public class RangePartitionIndexTaskInputRowIteratorBuilder implements IndexTaskInputRowIteratorBuilder
+{
+  private final DefaultIndexTaskInputRowIteratorBuilder delegate;
+
+  public RangePartitionIndexTaskInputRowIteratorBuilder(String partitionDimension)
+  {
+    delegate = new DefaultIndexTaskInputRowIteratorBuilder();
+    delegate.appendInputRowHandler(createOnlySingleDimensionValueRowsHandler(partitionDimension));
+  }
+
+  @Override
+  public IndexTaskInputRowIteratorBuilder delegate(CloseableIterator<InputRow> inputRowIterator)
+  {
+    return delegate.delegate(inputRowIterator);
+  }
+
+  @Override
+  public IndexTaskInputRowIteratorBuilder granularitySpec(GranularitySpec granularitySpec)
+  {
+    return delegate.granularitySpec(granularitySpec);
+  }
+
+  @Override
+  public IndexTaskInputRowIteratorBuilder nullRowRunnable(Runnable nullRowRunnable)
+  {
+    return delegate.nullRowRunnable(nullRowRunnable);
+  }
+
+  @Override
+  public IndexTaskInputRowIteratorBuilder absentBucketIntervalConsumer(Consumer<InputRow> absentBucketIntervalConsumer)
+  {
+    return delegate.absentBucketIntervalConsumer(absentBucketIntervalConsumer);
+  }
+
+  @Override
+  public HandlingInputRowIterator build()
+  {
+    return delegate.build();
+  }
+
+  private static HandlingInputRowIterator.InputRowHandler createOnlySingleDimensionValueRowsHandler(
+      String partitionDimension
+  )
+  {
+    return inputRow -> {
+      List<String> dimensionValues = inputRow.getDimension(partitionDimension);
+      return dimensionValues.size() != 1;
+    };
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
index dcaffb395ded..fe6c534e837a 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
@@ -23,6 +23,7 @@
 import com.google.common.base.Optional;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
+import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.indexer.TaskStatus;
 import org.apache.druid.indexing.common.SegmentLoaderFactory;
 import org.apache.druid.indexing.common.SingleFileTaskReportFileWriter;
@@ -80,6 +81,10 @@
 
 public abstract class IngestionTestBase
 {
+  static {
+    NullHandling.initializeForTests();
+  }
+
   @Rule
   public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
new file mode 100644
index 000000000000..86cb36403c25
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.indexing.common.TaskLock;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.actions.LockListAction;
+import org.apache.druid.indexing.common.actions.TaskActionClient;
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
+import org.apache.druid.timeline.SegmentId;
+import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
+import org.easymock.EasyMock;
+import org.joda.time.Interval;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class RangePartitionCachingLocalSegmentAllocatorTest
+{
+  private static final String DATASOURCE = "datasource";
+  private static final String TASKID = "taskid";
+  private static final String PARTITION_DIMENSION = "dimension";
+  private static final Interval INTERVAL_EMPTY = Intervals.utc(0, 1000);
+  private static final Interval INTERVAL_SINGLETON = Intervals.utc(1000, 2000);
+  private static final Interval INTERVAL_NORMAL = Intervals.utc(2000, 3000);
+  private static final Interval INTERVAL_FREQUENT_MID = Intervals.utc(3000, 4000);
+  private static final Interval INTERVAL_FREQUENT_MAX = Intervals.utc(5000, 6000);
+  private static final Map<Interval, String> INTERVAL_TO_VERSION = ImmutableMap.of(
+      INTERVAL_EMPTY, "version-empty",
+      INTERVAL_SINGLETON, "version-singleton",
+      INTERVAL_NORMAL, "version-normal",
+      INTERVAL_FREQUENT_MID, "version-frequent-mid",
+      INTERVAL_FREQUENT_MAX, "version-frequent-max"
+  );
+  private static final String PARTITION0 = "0";
+  private static final String PARTITION5 = "5";
+  private static final String PARTITION9 = "9";
+  private static final String[] EMPTY_PARTITIONS = new String[]{};
+  private static final String[] SINGLETON_PARTITIONS = new String[]{PARTITION0, PARTITION0};
+  private static final String[] NORMAL_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION9};
+  private static final String[] FREQUENT_MID_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION5, PARTITION9};
+  private static final String[] FREQUENT_MAX_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION9, PARTITION9};
+
+  private static final Map<Interval, String[]> INTERVAL_TO_PARTITONS = ImmutableMap.of(
+      INTERVAL_EMPTY, EMPTY_PARTITIONS,
+      INTERVAL_SINGLETON, SINGLETON_PARTITIONS,
+      INTERVAL_NORMAL, NORMAL_PARTITIONS,
+      INTERVAL_FREQUENT_MID, FREQUENT_MID_PARTITIONS,
+      INTERVAL_FREQUENT_MAX, FREQUENT_MAX_PARTITIONS
+  );
+
+  private RangePartitionCachingLocalSegmentAllocator target;
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
+  @Before
+  public void setup() throws IOException
+  {
+    TaskToolbox toolbox = createToolbox(
+        INTERVAL_TO_VERSION.keySet()
+                           .stream()
+                           .map(RangePartitionCachingLocalSegmentAllocatorTest::createTaskLock)
+                           .collect(Collectors.toList())
+    );
+    target = new RangePartitionCachingLocalSegmentAllocator(
+        toolbox,
+        TASKID,
+        DATASOURCE,
+        PARTITION_DIMENSION,
+        INTERVAL_TO_PARTITONS
+    );
+  }
+
+  @Test
+  public void failsIfAllocateFromEmptyInterval()
+  {
+    int dummy = 0;
+    Interval interval = INTERVAL_EMPTY;
+    InputRow row = createInputRow(interval, PARTITION9);
+
+    exception.expect(IllegalStateException.class);
+    exception.expectMessage("Failed to get shardSpec");
+
+    testAllocate(row, interval, dummy, null);
+  }
+
+  @Test
+  public void allocatesCorrectShardSpecsForSingletonPartitions()
+  {
+    Interval interval = INTERVAL_SINGLETON;
+    InputRow row = createInputRow(interval, PARTITION9);
+    testAllocate(row, interval, 0, null);
+  }
+
+
+  @Test
+  public void allocatesCorrectShardSpecsForFirstPartition()
+  {
+    Interval interval = INTERVAL_NORMAL;
+    InputRow row = createInputRow(interval, PARTITION0);
+    testAllocate(row, interval, 0);
+  }
+
+  @Test
+  public void allocatesCorrectShardSpecsForLastPartitionWithoutFrequentValue()
+  {
+    Interval interval = INTERVAL_NORMAL;
+    InputRow row = createInputRow(interval, PARTITION9);
+    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 2, null);
+  }
+
+  @Test
+  public void allocatesCorrectShardSpecsForLPartitionWithFrequentMid()
+  {
+    Interval interval = INTERVAL_FREQUENT_MID;
+    InputRow row = createInputRow(interval, PARTITION9);
+    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 3, null);
+  }
+
+  @Test
+  public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMax()
+  {
+    Interval interval = INTERVAL_FREQUENT_MAX;
+    InputRow row = createInputRow(interval, PARTITION9);
+    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 2, null);
+  }
+
+  private void testAllocate(InputRow row, Interval interval, int partitionNum)
+  {
+    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval)[partitionNum + 1]);
+  }
+
+  private void testAllocate(InputRow row, Interval interval, int partitionNum, @Nullable String partitionEnd)
+  {
+    String sequenceName = target.getSequenceName(interval, row);
+    SegmentIdWithShardSpec segmentIdWithShardSpec = allocate(row, sequenceName);
+
+    Assert.assertEquals(
+        SegmentId.of(DATASOURCE, interval, INTERVAL_TO_VERSION.get(interval), partitionNum),
+        segmentIdWithShardSpec.asSegmentId()
+    );
+    SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segmentIdWithShardSpec.getShardSpec();
+    Assert.assertEquals(PARTITION_DIMENSION, shardSpec.getDimension());
+    Assert.assertEquals(partitionNum, shardSpec.getPartitionNum());
+    String partitionStart = INTERVAL_TO_PARTITONS.get(interval)[partitionNum];
+    Assert.assertEquals(partitionStart, shardSpec.getStart());
+    Assert.assertEquals(partitionEnd, shardSpec.getEnd());
+  }
+
+  private SegmentIdWithShardSpec allocate(InputRow row, String sequenceName)
+  {
+    try {
+      return target.allocate(row, sequenceName, null, false);
+    }
+    catch (IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  private static TaskToolbox createToolbox(List<TaskLock> taskLocks)
+  {
+    TaskToolbox toolbox = EasyMock.mock(TaskToolbox.class);
+    EasyMock.expect(toolbox.getTaskActionClient()).andStubReturn(createTaskActionClient(taskLocks));
+    EasyMock.replay(toolbox);
+    return toolbox;
+  }
+
+  private static TaskActionClient createTaskActionClient(List<TaskLock> taskLocks)
+  {
+    try {
+      TaskActionClient taskActionClient = EasyMock.mock(TaskActionClient.class);
+      EasyMock.expect(taskActionClient.submit(EasyMock.anyObject(LockListAction.class))).andStubReturn(taskLocks);
+      EasyMock.replay(taskActionClient);
+      return taskActionClient;
+    }
+    catch (IOException e) {
+      throw new UncheckedIOException(e);
+    }
+  }
+
+  private static TaskLock createTaskLock(Interval interval)
+  {
+    TaskLock taskLock = EasyMock.mock(TaskLock.class);
+    EasyMock.expect(taskLock.getInterval()).andStubReturn(interval);
+    EasyMock.expect(taskLock.getVersion()).andStubReturn(INTERVAL_TO_VERSION.get(interval));
+    EasyMock.replay(taskLock);
+    return taskLock;
+  }
+
+  private static InputRow createInputRow(Interval interval, String dimensionValue)
+  {
+    long timestamp = interval.getStartMillis();
+    InputRow inputRow = EasyMock.mock(InputRow.class);
+    EasyMock.expect(inputRow.getTimestamp()).andStubReturn(DateTimes.utc(timestamp));
+    EasyMock.expect(inputRow.getTimestampFromEpoch()).andStubReturn(timestamp);
+    EasyMock.expect(inputRow.getDimension(PARTITION_DIMENSION))
+            .andStubReturn(Collections.singletonList(dimensionValue));
+    EasyMock.replay(inputRow);
+    return inputRow;
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReportTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReportTest.java
new file mode 100644
index 000000000000..c23362f3e9c3
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/DimensionDistributionReportTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.segment.TestHelper;
+import org.joda.time.Interval;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Collections;
+import java.util.Map;
+
+public class DimensionDistributionReportTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+
+  private DimensionDistributionReport target;
+
+  @Before
+  public void setup()
+  {
+    Interval interval = Intervals.ETERNITY;
+    StringSketch sketch = new StringSketch();
+    Map<Interval, StringDistribution> intervalToDistribution = Collections.singletonMap(interval, sketch);
+    String taskId = "abc";
+    target = new DimensionDistributionReport(taskId, intervalToDistribution);
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocationTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocationTest.java
new file mode 100644
index 000000000000..956dbc8fd150
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionLocationTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.segment.TestHelper;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class GenericPartitionLocationTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+
+  private GenericPartitionLocation target;
+
+  @Before
+  public void setup()
+  {
+    target = new GenericPartitionLocation(
+        ParallelIndexTestingFactory.HOST,
+        ParallelIndexTestingFactory.PORT,
+        ParallelIndexTestingFactory.USE_HTTPS,
+        ParallelIndexTestingFactory.SUBTASK_ID,
+        ParallelIndexTestingFactory.INTERVAL,
+        ParallelIndexTestingFactory.HASH_BASED_NUMBERED_SHARD_SPEC
+    );
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+
+  @Test
+  public void hasPartitionIdThatMatchesShardSpec()
+  {
+    Assert.assertEquals(ParallelIndexTestingFactory.PARTITION_ID, target.getPartitionId());
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
new file mode 100644
index 000000000000..2bcac8edfd47
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.segment.TestHelper;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class GenericPartitionStatTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+
+  private GenericPartitionStat target;
+
+  @Before
+  public void setup()
+  {
+    target = new GenericPartitionStat(
+        ParallelIndexTestingFactory.TASK_EXECUTOR_HOST,
+        ParallelIndexTestingFactory.TASK_EXECUTOR_PORT,
+        ParallelIndexTestingFactory.USE_HTTPS,
+        ParallelIndexTestingFactory.INTERVAL,
+        ParallelIndexTestingFactory.HASH_BASED_NUMBERED_SHARD_SPEC,
+        ParallelIndexTestingFactory.NUM_ROWS,
+        ParallelIndexTestingFactory.SIZE_BYTES
+    );
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+
+  @Test
+  public void hasPartitionIdThatMatchesSecondaryPartition()
+  {
+    Assert.assertEquals(target.getSecondaryPartition().getPartitionNum(), target.getPartitionId());
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java
index 97c6954e9247..313a5ccad4ef 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTaskSerdeTest.java
@@ -153,14 +153,28 @@ public void forceGuaranteedRollupWithHashPartitionsValid()
   }
 
   @Test
-  public void forceGuaranteedRollupWithSingleDimPartitionsInvalid()
+  public void forceGuaranteedRollupWithSingleDimPartitionsMissingDimension()
   {
     expectedException.expect(IllegalStateException.class);
     expectedException.expectMessage(
-        "forceGuaranteedRollup is incompatible with partitionsSpec: single_dim partitions unsupported"
+        "forceGuaranteedRollup is incompatible with partitionsSpec: partitionDimension must be specified"
     );
 
     new ParallelIndexSupervisorTaskBuilder()
+        .ingestionSpec(
+            new ParallelIndexIngestionSpecBuilder()
+                .forceGuaranteedRollup(true)
+                .partitionsSpec(new SingleDimensionPartitionsSpec(1, null, null, true))
+                .inputIntervals(INTERVALS)
+                .build()
+        )
+        .build();
+  }
+
+  @Test
+  public void forceGuaranteedRollupWithSingleDimPartitionsValid()
+  {
+    ParallelIndexSupervisorTask task = new ParallelIndexSupervisorTaskBuilder()
         .ingestionSpec(
             new ParallelIndexIngestionSpecBuilder()
                 .forceGuaranteedRollup(true)
@@ -169,6 +183,9 @@ public void forceGuaranteedRollupWithSingleDimPartitionsInvalid()
                 .build()
         )
         .build();
+
+    PartitionsSpec partitionsSpec = task.getIngestionSchema().getTuningConfig().getPartitionsSpec();
+    Assert.assertThat(partitionsSpec, CoreMatchers.instanceOf(SingleDimensionPartitionsSpec.class));
   }
 
   private static class ParallelIndexSupervisorTaskBuilder
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java
index 3d6e86aa01f2..a580ab6b8deb 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexTestingFactory.java
@@ -27,6 +27,7 @@
 import org.apache.druid.data.input.InputFormat;
 import org.apache.druid.data.input.InputSource;
 import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.JsonInputFormat;
 import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
@@ -44,6 +45,7 @@
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.apache.druid.segment.transform.TransformSpec;
 import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
+import org.easymock.EasyMock;
 import org.joda.time.Duration;
 import org.joda.time.Interval;
 
@@ -229,7 +231,14 @@ SingleDimensionPartitionsSpec build()
 
   static IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> createTaskClientFactory()
   {
-    return TASK_CLIENT_FACTORY;
+    return (taskInfoProvider, callerId, numThreads, httpTimeout, numRetries) -> createTaskClient();
+  }
+
+  private static ParallelIndexSupervisorTaskClient createTaskClient()
+  {
+    ParallelIndexSupervisorTaskClient taskClient = EasyMock.niceMock(ParallelIndexSupervisorTaskClient.class);
+    EasyMock.replay(taskClient);
+    return taskClient;
   }
 
   static String createRow(long timestamp, Object dimensionValue)
@@ -244,4 +253,9 @@ static String createRow(long timestamp, Object dimensionValue)
       throw new RuntimeException(e);
     }
   }
+
+  static InputFormat getInputFormat()
+  {
+    return new JsonInputFormat(null, null);
+  }
 }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
new file mode 100644
index 000000000000..86bfc2e0e8c0
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -0,0 +1,470 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Iterables;
+import org.apache.druid.data.input.InputFormat;
+import org.apache.druid.data.input.InputSource;
+import org.apache.druid.data.input.impl.InlineInputSource;
+import org.apache.druid.indexer.TaskState;
+import org.apache.druid.indexer.TaskStatus;
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.indexing.DataSchema;
+import org.apache.druid.testing.junit.LoggerCaptureRule;
+import org.apache.logging.log4j.core.LogEvent;
+import org.easymock.Capture;
+import org.easymock.EasyMock;
+import org.hamcrest.Matchers;
+import org.joda.time.Interval;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.runners.Enclosed;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+@RunWith(Enclosed.class)
+public class PartialDimensionDistributionTaskTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+  private static final SingleDimensionPartitionsSpec SINGLE_DIM_PARTITIONS_SPEC =
+      new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().build();
+
+  public static class ConstructorTest
+  {
+    @Rule
+    public ExpectedException exception = ExpectedException.none();
+
+    @Test
+    public void requiresForceGuaranteedRollup()
+    {
+      exception.expect(IllegalArgumentException.class);
+      exception.expectMessage("forceGuaranteedRollup must be set");
+
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .forceGuaranteedRollup(false)
+          .partitionsSpec(new DynamicPartitionsSpec(null, null))
+          .build();
+
+      new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .build();
+    }
+
+    @Test
+    public void requiresSingleDimensionPartitions()
+    {
+      exception.expect(IllegalArgumentException.class);
+      exception.expectMessage("single_dim partitionsSpec required");
+
+      PartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 1, null);
+      ParallelIndexTuningConfig tuningConfig =
+          new ParallelIndexTestingFactory.TuningConfigBuilder().partitionsSpec(partitionsSpec).build();
+
+      new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .build();
+    }
+
+    @Test
+    public void requiresGranularitySpecInputIntervals()
+    {
+      exception.expect(IllegalArgumentException.class);
+      exception.expectMessage("Missing intervals in granularitySpec");
+
+      DataSchema dataSchema = ParallelIndexTestingFactory.createDataSchema(Collections.emptyList());
+
+      new PartialDimensionDistributionTaskBuilder()
+          .dataSchema(dataSchema)
+          .build();
+    }
+
+    @Test
+    public void serializesDeserializes()
+    {
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .build();
+      TestHelper.testSerializesDeserializes(OBJECT_MAPPER, task);
+    }
+
+    @Test
+    public void hasCorrectPrefixForAutomaticId()
+    {
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .id(ParallelIndexTestingFactory.AUTOMATIC_ID)
+          .build();
+      Assert.assertThat(task.getId(), Matchers.startsWith(PartialDimensionDistributionTask.TYPE));
+    }
+  }
+
+  public static class RunTaskTest
+  {
+    private static final TaskToolbox TASK_TOOLBOX = null;
+
+    @Rule
+    public ExpectedException exception = ExpectedException.none();
+
+    @Rule
+    public LoggerCaptureRule logger = new LoggerCaptureRule(PartialDimensionDistributionTask.class);
+
+    @Test
+    public void requiresPartitionDimension() throws Exception
+    {
+      exception.expect(IllegalArgumentException.class);
+      exception.expectMessage("partitionDimension must be specified");
+
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(
+              new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().partitionDimension(null).build()
+          )
+          .build();
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .build();
+
+      task.runTask(TASK_TOOLBOX);
+    }
+
+    @Test
+    public void logsParseExceptionsIfEnabled() throws Exception
+    {
+      long invalidTimestamp = Long.MAX_VALUE;
+      InputSource inlineInputSource = new InlineInputSource(
+          ParallelIndexTestingFactory.createRow(invalidTimestamp, "a")
+      );
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(SINGLE_DIM_PARTITIONS_SPEC)
+          .logParseExceptions(true)
+          .build();
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .inputSource(inlineInputSource)
+          .tuningConfig(tuningConfig)
+          .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
+          .build();
+
+      task.runTask(TASK_TOOLBOX);
+
+      List<LogEvent> logEvents = logger.getLogEvents();
+      Assert.assertEquals(1, logEvents.size());
+      String logMessage = logEvents.get(0).getMessage().getFormattedMessage();
+      Assert.assertThat(logMessage, Matchers.containsString("Encountered parse exception"));
+    }
+
+    @Test
+    public void doesNotLogParseExceptionsIfDisabled() throws Exception
+    {
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(SINGLE_DIM_PARTITIONS_SPEC)
+          .logParseExceptions(false)
+          .build();
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
+          .build();
+
+      task.runTask(TASK_TOOLBOX);
+
+      Assert.assertEquals(Collections.emptyList(), logger.getLogEvents());
+    }
+
+    @Test
+    public void failsWhenTooManyParseExceptions() throws Exception
+    {
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(SINGLE_DIM_PARTITIONS_SPEC)
+          .maxParseExceptions(0)
+          .build();
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
+          .build();
+
+      exception.expect(RuntimeException.class);
+      exception.expectMessage("Max parse exceptions exceeded");
+
+      task.runTask(TASK_TOOLBOX);
+    }
+
+    @Test
+    public void skipsRowsWithMultipleDimensionValues()
+    {
+      InputSource inlineInputSource = new InlineInputSource(
+          ParallelIndexTestingFactory.createRow(0, Arrays.asList("a", "b"))
+      );
+      PartialDimensionDistributionTaskBuilder taskBuilder = new PartialDimensionDistributionTaskBuilder()
+          .inputSource(inlineInputSource);
+
+      DimensionDistributionReport report = runTask(taskBuilder);
+
+      Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
+      Assert.assertEquals(0, intervalToDistribution.size());
+    }
+
+    @Test
+    public void sendsCorrectReportWhenAssumeGroupedTrue()
+    {
+      long timestamp = 0;
+      String dimensionValue = "a";
+      InputSource inlineInputSource = new InlineInputSource(
+          ParallelIndexTestingFactory.createRow(timestamp, dimensionValue)
+          + "\n" + ParallelIndexTestingFactory.createRow(timestamp + 1, dimensionValue)
+      );
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(
+              new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().assumeGrouped(true).build()
+          )
+          .build();
+      PartialDimensionDistributionTaskBuilder taskBuilder = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .inputSource(inlineInputSource);
+
+      DimensionDistributionReport report = runTask(taskBuilder);
+
+      Assert.assertEquals(ParallelIndexTestingFactory.ID, report.getTaskId());
+      Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
+      StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
+      Assert.assertNotNull(distribution);
+      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(3, partitions.length);
+      Assert.assertEquals(dimensionValue, partitions[0]);
+      Assert.assertEquals(dimensionValue, partitions[1]);
+      Assert.assertEquals(dimensionValue, partitions[2]);
+    }
+
+    @Test
+    public void groupsRowsWhenAssumeGroupedFalse()
+    {
+      long timestamp = 0;
+      String dimensionValue = "a";
+      InputSource inlineInputSource = new InlineInputSource(
+          ParallelIndexTestingFactory.createRow(timestamp, dimensionValue)
+          + "\n" + ParallelIndexTestingFactory.createRow(timestamp + 1, dimensionValue)
+      );
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(
+              new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().assumeGrouped(false).build()
+          )
+          .build();
+      PartialDimensionDistributionTaskBuilder taskBuilder = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .inputSource(inlineInputSource);
+
+      DimensionDistributionReport report = runTask(taskBuilder);
+
+      Assert.assertEquals(ParallelIndexTestingFactory.ID, report.getTaskId());
+      Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
+      StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
+      Assert.assertNotNull(distribution);
+      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(2, partitions.length);
+      Assert.assertEquals(dimensionValue, partitions[0]);
+      Assert.assertEquals(dimensionValue, partitions[1]);
+    }
+
+    @Test
+    public void preservesMinAndMaxWhenAssumeGroupedFalse()
+    {
+      // Create a small bloom filter so that it saturates quickly
+      int smallBloomFilter = 1;
+      double manyFalsePositiveBloomFilter = 0.5;
+      int minBloomFilterBits = Long.SIZE;
+
+      long timestamp = 0;
+      List<String> dimensionValues = IntStream.range(0, minBloomFilterBits * 10)
+                                              .mapToObj(i -> String.format("%010d", i))
+                                              .collect(Collectors.toCollection(ArrayList::new));
+      String minDimensionValue = dimensionValues.get(0);
+      String maxDimensionValue = dimensionValues.get(dimensionValues.size() - 1);
+      List<String> rows = dimensionValues.stream()
+                                         .map(d -> ParallelIndexTestingFactory.createRow(timestamp, d))
+                                         .collect(Collectors.toList());
+      Joiner joiner = Joiner.on("\n");
+      InputSource inlineInputSource = new InlineInputSource(
+          joiner.join(
+              joiner.join(rows.subList(1, rows.size())),  // saturate bloom filter first
+              rows.get(0),
+              rows.get(rows.size() - 1)
+          )
+      );
+      ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+          .partitionsSpec(
+              new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().assumeGrouped(false).build()
+          )
+          .build();
+      DataSchema dataSchema = ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS);
+      PartialDimensionDistributionTaskBuilder taskBuilder = new PartialDimensionDistributionTaskBuilder()
+          .tuningConfig(tuningConfig)
+          .dataSchema(dataSchema)
+          .inputSource(inlineInputSource)
+          .ungroupedRowDimValueFilterSupplier(
+              () -> new PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter(
+                  dataSchema.getGranularitySpec().getQueryGranularity(),
+                  smallBloomFilter,
+                  manyFalsePositiveBloomFilter
+              )
+          );
+
+      DimensionDistributionReport report = runTask(taskBuilder);
+
+      Assert.assertEquals(ParallelIndexTestingFactory.ID, report.getTaskId());
+      Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
+      StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
+      Assert.assertNotNull(distribution);
+      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(minBloomFilterBits + 3, partitions.length); // 3 = min + max + exclusive endpoint
+      Assert.assertEquals(minDimensionValue, partitions[0]);
+      Assert.assertEquals(maxDimensionValue, partitions[partitions.length - 1]);
+    }
+
+    @Test
+    public void returnsSuccessIfNoExceptions() throws Exception
+    {
+      PartialDimensionDistributionTask task = new PartialDimensionDistributionTaskBuilder()
+          .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
+          .build();
+
+      TaskStatus taskStatus = task.runTask(TASK_TOOLBOX);
+
+      Assert.assertEquals(ParallelIndexTestingFactory.ID, taskStatus.getId());
+      Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
+    }
+
+    private static DimensionDistributionReport runTask(PartialDimensionDistributionTaskBuilder taskBuilder)
+    {
+      Capture<SubTaskReport> reportCapture = Capture.newInstance();
+      ParallelIndexSupervisorTaskClient taskClient = EasyMock.mock(ParallelIndexSupervisorTaskClient.class);
+      taskClient.report(EasyMock.eq(ParallelIndexTestingFactory.SUPERVISOR_TASK_ID), EasyMock.capture(reportCapture));
+      EasyMock.replay(taskClient);
+
+      try {
+        taskBuilder.taskClientFactory((taskInfoProvider, callerId, numThreads, httpTimeout, numRetries) -> taskClient)
+                   .build()
+                   .runTask(TASK_TOOLBOX);
+      }
+      catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+
+      return (DimensionDistributionReport) reportCapture.getValue();
+    }
+  }
+
+  private static class PartialDimensionDistributionTaskBuilder
+  {
+    private static final InputFormat INPUT_FORMAT = ParallelIndexTestingFactory.getInputFormat();
+
+    private String id = ParallelIndexTestingFactory.ID;
+    private InputSource inputSource = new InlineInputSource("row-with-invalid-timestamp");
+    private ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+        .partitionsSpec(new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().build())
+        .build();
+    private DataSchema dataSchema =
+        ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS);
+    private IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory =
+        ParallelIndexTestingFactory.TASK_CLIENT_FACTORY;
+    private Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter>
+        ungroupedRowDimValueFilterSupplier = null;
+
+    @SuppressWarnings("SameParameterValue")
+    PartialDimensionDistributionTaskBuilder id(String id)
+    {
+      this.id = id;
+      return this;
+    }
+
+    PartialDimensionDistributionTaskBuilder inputSource(InputSource inputSource)
+    {
+      this.inputSource = inputSource;
+      return this;
+    }
+
+    PartialDimensionDistributionTaskBuilder tuningConfig(ParallelIndexTuningConfig tuningConfig)
+    {
+      this.tuningConfig = tuningConfig;
+      return this;
+    }
+
+    PartialDimensionDistributionTaskBuilder dataSchema(DataSchema dataSchema)
+    {
+      this.dataSchema = dataSchema;
+      return this;
+    }
+
+    PartialDimensionDistributionTaskBuilder taskClientFactory(
+        IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory
+    )
+    {
+      this.taskClientFactory = taskClientFactory;
+      return this;
+    }
+
+    PartialDimensionDistributionTaskBuilder ungroupedRowDimValueFilterSupplier(
+        Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier
+    )
+    {
+      this.ungroupedRowDimValueFilterSupplier = ungroupedRowDimValueFilterSupplier;
+      return this;
+    }
+
+    PartialDimensionDistributionTask build()
+    {
+      ParallelIndexIngestionSpec ingestionSpec =
+          ParallelIndexTestingFactory.createIngestionSpec(inputSource, INPUT_FORMAT, tuningConfig, dataSchema);
+
+      Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter> supplier =
+          ungroupedRowDimValueFilterSupplier == null
+          ? () -> new PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter(
+              dataSchema.getGranularitySpec().getQueryGranularity()
+          )
+          : ungroupedRowDimValueFilterSupplier;
+
+      return new PartialDimensionDistributionTask(
+          id,
+          ParallelIndexTestingFactory.GROUP_ID,
+          ParallelIndexTestingFactory.TASK_RESOURCE,
+          ParallelIndexTestingFactory.SUPERVISOR_TASK_ID,
+          ParallelIndexTestingFactory.NUM_ATTEMPTS,
+          ingestionSpec,
+          ParallelIndexTestingFactory.CONTEXT,
+          ParallelIndexTestingFactory.INDEXING_SERVICE_CLIENT,
+          taskClientFactory,
+          supplier
+      );
+    }
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfigTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfigTest.java
new file mode 100644
index 000000000000..c96adb89a755
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIOConfigTest.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.segment.TestHelper;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Collections;
+
+public class PartialGenericSegmentMergeIOConfigTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+  private static final GenericPartitionLocation GENERIC_PARTITION_LOCATION = new GenericPartitionLocation(
+      ParallelIndexTestingFactory.HOST,
+      ParallelIndexTestingFactory.PORT,
+      ParallelIndexTestingFactory.USE_HTTPS,
+      ParallelIndexTestingFactory.SUBTASK_ID,
+      ParallelIndexTestingFactory.INTERVAL,
+      ParallelIndexTestingFactory.HASH_BASED_NUMBERED_SHARD_SPEC
+  );
+
+  private PartialGenericSegmentMergeIOConfig target;
+
+  @Before
+  public void setup()
+  {
+    target = new PartialGenericSegmentMergeIOConfig(Collections.singletonList(GENERIC_PARTITION_LOCATION));
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpecTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpecTest.java
new file mode 100644
index 000000000000..c30cc9ee3b29
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeIngestionSpecTest.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.segment.TestHelper;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Collections;
+
+public class PartialGenericSegmentMergeIngestionSpecTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+  private static final GenericPartitionLocation GENERIC_PARTITION_LOCATION = new GenericPartitionLocation(
+      ParallelIndexTestingFactory.HOST,
+      ParallelIndexTestingFactory.PORT,
+      ParallelIndexTestingFactory.USE_HTTPS,
+      ParallelIndexTestingFactory.SUBTASK_ID,
+      ParallelIndexTestingFactory.INTERVAL,
+      ParallelIndexTestingFactory.HASH_BASED_NUMBERED_SHARD_SPEC
+  );
+  private static final PartialGenericSegmentMergeIOConfig IO_CONFIG =
+      new PartialGenericSegmentMergeIOConfig(Collections.singletonList(GENERIC_PARTITION_LOCATION));
+  private static final HashedPartitionsSpec PARTITIONS_SPEC = new HashedPartitionsSpec(
+      null,
+      1,
+      Collections.emptyList()
+  );
+
+  private PartialGenericSegmentMergeIngestionSpec target;
+
+  @Before
+  public void setup()
+  {
+    target = new PartialGenericSegmentMergeIngestionSpec(
+        ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS),
+        IO_CONFIG,
+        new ParallelIndexTestingFactory.TuningConfigBuilder()
+            .partitionsSpec(PARTITIONS_SPEC)
+            .build()
+    );
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java
new file mode 100644
index 000000000000..69403bc14414
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTaskTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.segment.TestHelper;
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Collections;
+
+public class PartialGenericSegmentMergeTaskTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+  private static final GenericPartitionLocation GENERIC_PARTITION_LOCATION = new GenericPartitionLocation(
+      ParallelIndexTestingFactory.HOST,
+      ParallelIndexTestingFactory.PORT,
+      ParallelIndexTestingFactory.USE_HTTPS,
+      ParallelIndexTestingFactory.SUBTASK_ID,
+      ParallelIndexTestingFactory.INTERVAL,
+      ParallelIndexTestingFactory.HASH_BASED_NUMBERED_SHARD_SPEC
+  );
+  private static final PartialGenericSegmentMergeIOConfig IO_CONFIG =
+      new PartialGenericSegmentMergeIOConfig(Collections.singletonList(GENERIC_PARTITION_LOCATION));
+  private static final HashedPartitionsSpec PARTITIONS_SPEC = new HashedPartitionsSpec(
+      null,
+      1,
+      Collections.emptyList()
+  );
+  private static final PartialGenericSegmentMergeIngestionSpec INGESTION_SPEC =
+      new PartialGenericSegmentMergeIngestionSpec(
+          ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS),
+          IO_CONFIG,
+          new ParallelIndexTestingFactory.TuningConfigBuilder()
+              .partitionsSpec(PARTITIONS_SPEC)
+              .build()
+      );
+
+  private PartialGenericSegmentMergeTask target;
+
+  @Before
+  public void setup()
+  {
+    target = new PartialGenericSegmentMergeTask(
+        ParallelIndexTestingFactory.AUTOMATIC_ID,
+        ParallelIndexTestingFactory.GROUP_ID,
+        ParallelIndexTestingFactory.TASK_RESOURCE,
+        ParallelIndexTestingFactory.SUPERVISOR_TASK_ID,
+        ParallelIndexTestingFactory.NUM_ATTEMPTS,
+        INGESTION_SPEC,
+        ParallelIndexTestingFactory.CONTEXT,
+        ParallelIndexTestingFactory.INDEXING_SERVICE_CLIENT,
+        ParallelIndexTestingFactory.TASK_CLIENT_FACTORY,
+        ParallelIndexTestingFactory.SHUFFLE_CLIENT
+    );
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+  }
+
+  @Test
+  public void hasCorrectPrefixForAutomaticId()
+  {
+    String id = target.getId();
+    Assert.assertThat(id, Matchers.startsWith(PartialGenericSegmentMergeTask.TYPE));
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
new file mode 100644
index 000000000000..67a4919cd9df
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.druid.data.input.InputFormat;
+import org.apache.druid.data.input.InputSource;
+import org.apache.druid.data.input.impl.InlineInputSource;
+import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.segment.TestHelper;
+import org.apache.druid.segment.indexing.DataSchema;
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.util.Collections;
+
+public class PartialRangeSegmentGenerateTaskTest
+{
+  private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
+  @Test
+  public void requiresForceGuaranteedRollup()
+  {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("single_dim partitionsSpec required");
+
+    ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+        .forceGuaranteedRollup(false)
+        .partitionsSpec(new DynamicPartitionsSpec(null, null))
+        .build();
+
+    new PartialRangeSegmentGenerateTaskBuilder()
+        .tuningConfig(tuningConfig)
+        .build();
+  }
+
+  @Test
+  public void requiresSingleDimensionPartitions()
+  {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("single_dim partitionsSpec required");
+
+    PartitionsSpec partitionsSpec = new HashedPartitionsSpec(null, 1, null);
+    ParallelIndexTuningConfig tuningConfig =
+        new ParallelIndexTestingFactory.TuningConfigBuilder().partitionsSpec(partitionsSpec).build();
+
+    new PartialRangeSegmentGenerateTaskBuilder()
+        .tuningConfig(tuningConfig)
+        .build();
+  }
+
+  @Test
+  public void requiresGranularitySpecInputIntervals()
+  {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Missing intervals in granularitySpec");
+
+    DataSchema dataSchema = ParallelIndexTestingFactory.createDataSchema(Collections.emptyList());
+
+    new PartialRangeSegmentGenerateTaskBuilder()
+        .dataSchema(dataSchema)
+        .build();
+  }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    PartialRangeSegmentGenerateTask task = new PartialRangeSegmentGenerateTaskBuilder().build();
+    TestHelper.testSerializesDeserializes(OBJECT_MAPPER, task);
+  }
+
+  @Test
+  public void hasCorrectPrefixForAutomaticId()
+  {
+    PartialRangeSegmentGenerateTask task = new PartialRangeSegmentGenerateTaskBuilder().build();
+    Assert.assertThat(task.getId(), Matchers.startsWith(PartialRangeSegmentGenerateTask.TYPE));
+  }
+
+  private static class PartialRangeSegmentGenerateTaskBuilder
+  {
+    private static final InputSource INPUT_SOURCE = new InlineInputSource("data");
+    private static final InputFormat INPUT_FORMAT = ParallelIndexTestingFactory.getInputFormat();
+
+    private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory =
+        ParallelIndexTestingFactory.TASK_CLIENT_FACTORY;
+
+    private ParallelIndexTuningConfig tuningConfig = new ParallelIndexTestingFactory.TuningConfigBuilder()
+        .partitionsSpec(new ParallelIndexTestingFactory.SingleDimensionPartitionsSpecBuilder().build())
+        .build();
+    private DataSchema dataSchema =
+        ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS);
+
+    PartialRangeSegmentGenerateTaskBuilder tuningConfig(ParallelIndexTuningConfig tuningConfig)
+    {
+      this.tuningConfig = tuningConfig;
+      return this;
+    }
+
+    PartialRangeSegmentGenerateTaskBuilder dataSchema(DataSchema dataSchema)
+    {
+      this.dataSchema = dataSchema;
+      return this;
+    }
+
+    PartialRangeSegmentGenerateTask build()
+    {
+      ParallelIndexIngestionSpec ingestionSpec =
+          ParallelIndexTestingFactory.createIngestionSpec(INPUT_SOURCE, INPUT_FORMAT, tuningConfig, dataSchema);
+
+      return new PartialRangeSegmentGenerateTask(
+          ParallelIndexTestingFactory.AUTOMATIC_ID,
+          ParallelIndexTestingFactory.GROUP_ID,
+          ParallelIndexTestingFactory.TASK_RESOURCE,
+          ParallelIndexTestingFactory.SUPERVISOR_TASK_ID,
+          ParallelIndexTestingFactory.NUM_ATTEMPTS,
+          ingestionSpec,
+          ParallelIndexTestingFactory.CONTEXT,
+          Collections.emptyMap(),
+          ParallelIndexTestingFactory.INDEXING_SERVICE_CLIENT,
+          taskClientFactory,
+          ParallelIndexTestingFactory.APPENDERATORS_MANAGER
+      );
+    }
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
new file mode 100644
index 000000000000..26814d2c3040
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
@@ -0,0 +1,472 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.SetMultimap;
+import org.apache.druid.client.indexing.IndexingServiceClient;
+import org.apache.druid.data.input.InputSplit;
+import org.apache.druid.data.input.impl.CSVParseSpec;
+import org.apache.druid.data.input.impl.DimensionsSpec;
+import org.apache.druid.data.input.impl.ParseSpec;
+import org.apache.druid.data.input.impl.TimestampSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
+import org.apache.druid.indexing.common.LockGranularity;
+import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.indexing.common.task.TestAppenderatorsManager;
+import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.Intervals;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.java.util.common.guava.Comparators;
+import org.apache.druid.query.scan.ScanResultValue;
+import org.apache.druid.timeline.DataSegment;
+import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
+import org.hamcrest.Matchers;
+import org.joda.time.Interval;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import javax.annotation.Nullable;
+import java.io.File;
+import java.io.IOException;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+@RunWith(Parameterized.class)
+public class RangePartitionMultiPhaseParallelIndexingTest extends AbstractMultiPhaseParallelIndexingTest
+{
+  private static final int NUM_FILE = 10;
+  private static final int NUM_ROW = 20;
+  private static final int NUM_DAY = 2;
+  private static final int NUM_PARTITION = 2;
+  private static final int YEAR = 2017;
+  private static final String DIM1 = "dim1";
+  private static final String DIM2 = "dim2";
+  private static final List<String> DIMS = ImmutableList.of(DIM1, DIM2);
+  private static final String TEST_FILE_NAME_PREFIX = "test_";
+  private static final ParseSpec PARSE_SPEC = new CSVParseSpec(
+      new TimestampSpec(
+          "ts",
+          "auto",
+          null
+      ),
+      new DimensionsSpec(
+          DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", DIM1, DIM2)),
+          new ArrayList<>(),
+          new ArrayList<>()
+      ),
+      null,
+      Arrays.asList("ts", DIM1, DIM2, "val"),
+      false,
+      0
+  );
+
+  @Parameterized.Parameters(name = "{0}, useInputFormatApi={1}")
+  public static Iterable<Object[]> constructorFeeder()
+  {
+    return ImmutableList.of(
+        new Object[]{LockGranularity.TIME_CHUNK, false},
+        new Object[]{LockGranularity.TIME_CHUNK, true},
+        new Object[]{LockGranularity.SEGMENT, true}
+    );
+  }
+
+  private File inputDir;
+  private SetMultimap<Interval, String> intervalToDim1;
+
+  public RangePartitionMultiPhaseParallelIndexingTest(LockGranularity lockGranularity, boolean useInputFormatApi)
+  {
+    super(lockGranularity, useInputFormatApi);
+  }
+
+  @Override
+  @Before
+  public void setup() throws IOException
+  {
+    super.setup();
+    inputDir = temporaryFolder.newFolder("data");
+    intervalToDim1 = createInputFiles(inputDir);
+  }
+
+  private static SetMultimap<Interval, String> createInputFiles(File inputDir) throws IOException
+  {
+    SetMultimap<Interval, String> intervalToDim1 = HashMultimap.create();
+
+    for (int fileIndex = 0; fileIndex < NUM_FILE; fileIndex++) {
+      Path path = new File(inputDir, TEST_FILE_NAME_PREFIX + fileIndex).toPath();
+      try (final Writer writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) {
+        for (int i = 0; i < (NUM_ROW / NUM_DAY); i++) {
+          for (int d = 0; d < NUM_DAY; d++) {
+            writeRow(writer, i + d, fileIndex + d, intervalToDim1);
+          }
+        }
+      }
+    }
+
+    return intervalToDim1;
+  }
+
+  private static void writeRow(Writer writer, int day, int fileIndex, Multimap<Interval, String> intervalToDim1)
+      throws IOException
+  {
+    Interval interval = Intervals.of("%s-12-%d/%s-12-%d", YEAR, day + 1, YEAR, day + 2);
+    String startDate = interval.getStart().toString("y-M-d");
+    String dim1Value = String.valueOf(fileIndex + 10);
+    writer.write(StringUtils.format("%s,%s,%d th test file\n", startDate, dim1Value, fileIndex));
+    intervalToDim1.put(interval, dim1Value);
+  }
+
+  @Test
+  public void createsCorrectRangePartitions() throws Exception
+  {
+    int targetRowsPerSegment = NUM_ROW / NUM_DAY / NUM_PARTITION;
+    final Set<DataSegment> publishedSegments = runTestTask(
+        PARSE_SPEC,
+        Intervals.of("%s/%s", YEAR, YEAR + 1),
+        inputDir,
+        TEST_FILE_NAME_PREFIX + "*",
+        new SingleDimensionPartitionsSpec(
+            targetRowsPerSegment,
+            null,
+            DIM1,
+            false
+        )
+    );
+    assertRangePartitions(publishedSegments);
+  }
+
+  private void assertRangePartitions(Set<DataSegment> publishedSegments) throws IOException
+  {
+    Multimap<Interval, DataSegment> intervalToSegments = ArrayListMultimap.create();
+    publishedSegments.forEach(s -> intervalToSegments.put(s.getInterval(), s));
+
+    SortedSet<Interval> publishedIntervals = new TreeSet<>(Comparators.intervalsByStartThenEnd());
+    publishedIntervals.addAll(intervalToSegments.keySet());
+    assertHasExpectedIntervals(publishedIntervals);
+
+    Interval firstInterval = publishedIntervals.first();
+    Interval lastInterval = publishedIntervals.last();
+    File tempSegmentDir = temporaryFolder.newFolder();
+
+    intervalToSegments.asMap().forEach((interval, segments) -> {
+      assertNumPartition(interval, segments, firstInterval, lastInterval);
+
+      List<String> allValues = new ArrayList<>(NUM_ROW);
+      for (DataSegment segment : segments) {
+        List<String> values = getColumnValues(segment, tempSegmentDir);
+        assertValuesInRange(values, segment);
+        allValues.addAll(values);
+      }
+
+      assertIntervalHasAllExpectedValues(interval, allValues);
+    });
+  }
+
+  private void assertHasExpectedIntervals(Set<Interval> publishedSegmentIntervals)
+  {
+    Assert.assertEquals(intervalToDim1.keySet(), publishedSegmentIntervals);
+  }
+
+  private static void assertNumPartition(
+      Interval interval,
+      Collection<DataSegment> segments,
+      Interval firstInterval,
+      Interval lastInterval
+  )
+  {
+    int expectedNumPartition = NUM_PARTITION;
+    if (interval.equals(firstInterval) || interval.equals(lastInterval)) {
+      expectedNumPartition -= 1;
+    }
+    expectedNumPartition *= NUM_DAY;
+    Assert.assertEquals(expectedNumPartition, segments.size());
+  }
+
+  private List<String> getColumnValues(DataSegment segment, File tempDir)
+  {
+    List<ScanResultValue> results = querySegment(segment, DIMS, tempDir);
+    Assert.assertEquals(1, results.size());
+    List<LinkedHashMap<String, String>> rows = (List<LinkedHashMap<String, String>>) results.get(0).getEvents();
+    return rows.stream()
+               .map(row -> row.get(DIM1))
+               .collect(Collectors.toList());
+  }
+
+  private static void assertValuesInRange(List<String> values, DataSegment segment)
+  {
+    SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
+    String start = shardSpec.getStart();
+    Assert.assertNotNull(start);
+    String end = shardSpec.getEnd();
+
+    for (String value : values) {
+      Assert.assertThat(value.compareTo(start), Matchers.greaterThanOrEqualTo(0));
+
+      if (end != null) {
+        Assert.assertThat(value.compareTo(end), Matchers.lessThan(0));
+      }
+    }
+  }
+
+  private void assertIntervalHasAllExpectedValues(Interval interval, List<String> actualValues)
+  {
+    List<String> expectedValues = new ArrayList<>(intervalToDim1.get(interval));
+    Assert.assertEquals(expectedValues.size(), actualValues.size());
+    Collections.sort(expectedValues);
+    Collections.sort(actualValues);
+    Assert.assertEquals(expectedValues, actualValues);
+  }
+
+  @Override
+  ParallelIndexSupervisorTask createParallelIndexSupervisorTask(
+      String id,
+      TaskResource taskResource,
+      ParallelIndexIngestionSpec ingestionSchema,
+      Map<String, Object> context,
+      IndexingServiceClient indexingServiceClient
+  )
+  {
+    return new TestSupervisorTask(id, taskResource, ingestionSchema, context, indexingServiceClient);
+  }
+
+  private static class TestSupervisorTask extends TestParallelIndexSupervisorTask
+  {
+    TestSupervisorTask(
+        String id,
+        TaskResource taskResource,
+        ParallelIndexIngestionSpec ingestionSchema,
+        Map<String, Object> context,
+        IndexingServiceClient indexingServiceClient
+    )
+    {
+      super(id, taskResource, ingestionSchema, context, indexingServiceClient);
+    }
+
+    @Override
+    PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistributionRunner(TaskToolbox toolbox)
+    {
+      return new TestPartialDimensionDistributionRunner(toolbox, this, getIndexingServiceClient());
+    }
+
+    @Override
+    PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
+        TaskToolbox toolbox,
+        Map<Interval, String[]> intervalToPartitions
+    )
+    {
+      return new TestPartialRangeSegmentGenerateRunner(
+          toolbox,
+          this,
+          getIndexingServiceClient(),
+          intervalToPartitions
+      );
+    }
+
+    @Override
+    public PartialGenericSegmentMergeParallelIndexTaskRunner createPartialGenericSegmentMergeRunner(
+        TaskToolbox toolbox,
+        List<PartialGenericSegmentMergeIOConfig> ioConfigs
+    )
+    {
+      return new TestPartialGenericSegmentMergeParallelIndexTaskRunner(
+          toolbox,
+          this,
+          ioConfigs,
+          getIndexingServiceClient()
+      );
+    }
+  }
+
+  private static class TestPartialDimensionDistributionRunner
+      extends PartialDimensionDistributionParallelIndexTaskRunner
+  {
+    private TestPartialDimensionDistributionRunner(
+        TaskToolbox toolbox,
+        ParallelIndexSupervisorTask supervisorTask,
+        IndexingServiceClient indexingServiceClient
+    )
+    {
+      super(
+          toolbox,
+          supervisorTask.getId(),
+          supervisorTask.getGroupId(),
+          supervisorTask.getIngestionSchema(),
+          supervisorTask.getContext(),
+          indexingServiceClient,
+          new LocalParallelIndexTaskClientFactory(supervisorTask)
+      );
+    }
+  }
+
+  private static class TestPartialRangeSegmentGenerateRunner extends PartialRangeSegmentGenerateParallelIndexTaskRunner
+  {
+    private TestPartialRangeSegmentGenerateRunner(
+        TaskToolbox toolbox,
+        ParallelIndexSupervisorTask supervisorTask,
+        IndexingServiceClient indexingServiceClient,
+        Map<Interval, String[]> intervalToPartitions
+    )
+    {
+      super(
+          toolbox,
+          supervisorTask.getId(),
+          supervisorTask.getGroupId(),
+          supervisorTask.getIngestionSchema(),
+          supervisorTask.getContext(),
+          indexingServiceClient,
+          intervalToPartitions,
+          new LocalParallelIndexTaskClientFactory(supervisorTask),
+          new TestAppenderatorsManager()
+      );
+    }
+  }
+
+
+  private static class TestPartialGenericSegmentMergeParallelIndexTaskRunner
+      extends PartialGenericSegmentMergeParallelIndexTaskRunner
+  {
+    private final ParallelIndexSupervisorTask supervisorTask;
+
+    private TestPartialGenericSegmentMergeParallelIndexTaskRunner(
+        TaskToolbox toolbox,
+        ParallelIndexSupervisorTask supervisorTask,
+        List<PartialGenericSegmentMergeIOConfig> mergeIOConfigs,
+        IndexingServiceClient indexingServiceClient
+    )
+    {
+      super(
+          toolbox,
+          supervisorTask.getId(),
+          supervisorTask.getGroupId(),
+          supervisorTask.getIngestionSchema().getDataSchema(),
+          mergeIOConfigs,
+          supervisorTask.getIngestionSchema().getTuningConfig(),
+          supervisorTask.getContext(),
+          indexingServiceClient
+      );
+      this.supervisorTask = supervisorTask;
+    }
+
+    @Override
+    SubTaskSpec<PartialGenericSegmentMergeTask> newTaskSpec(PartialGenericSegmentMergeIOConfig ioConfig)
+    {
+      final PartialGenericSegmentMergeIngestionSpec ingestionSpec =
+          new PartialGenericSegmentMergeIngestionSpec(
+              supervisorTask.getIngestionSchema().getDataSchema(),
+              ioConfig,
+              getTuningConfig()
+          );
+      return new SubTaskSpec<PartialGenericSegmentMergeTask>(
+          getTaskId() + "_" + getAndIncrementNextSpecId(),
+          getGroupId(),
+          getTaskId(),
+          getContext(),
+          new InputSplit<>(ioConfig.getPartitionLocations())
+      )
+      {
+        @Override
+        public PartialGenericSegmentMergeTask newSubTask(int numAttempts)
+        {
+          return new TestPartialGenericSegmentMergeTask(
+              null,
+              getGroupId(),
+              null,
+              getSupervisorTaskId(),
+              numAttempts,
+              ingestionSpec,
+              getContext(),
+              getIndexingServiceClient(),
+              new LocalParallelIndexTaskClientFactory(supervisorTask),
+              getToolbox()
+          );
+        }
+      };
+    }
+  }
+
+  private static class TestPartialGenericSegmentMergeTask extends PartialGenericSegmentMergeTask
+  {
+    private final TaskToolbox toolbox;
+
+    private TestPartialGenericSegmentMergeTask(
+        @Nullable String id,
+        String groupId,
+        TaskResource taskResource,
+        String supervisorTaskId,
+        int numAttempts,
+        PartialGenericSegmentMergeIngestionSpec ingestionSchema,
+        Map<String, Object> context,
+        IndexingServiceClient indexingServiceClient,
+        IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
+        TaskToolbox toolbox
+    )
+    {
+      super(
+          id,
+          groupId,
+          taskResource,
+          supervisorTaskId,
+          numAttempts,
+          ingestionSchema,
+          context,
+          indexingServiceClient,
+          taskClientFactory,
+          null
+      );
+      this.toolbox = toolbox;
+    }
+
+    @Override
+    File fetchSegmentFile(File partitionDir, GenericPartitionLocation location)
+    {
+      final File zippedFile = toolbox.getIntermediaryDataManager().findPartitionFile(
+          getSupervisorTaskId(),
+          location.getSubTaskId(),
+          location.getInterval(),
+          location.getPartitionId()
+      );
+      if (zippedFile == null) {
+        throw new ISE("Can't find segment file for location[%s] at path[%s]", location);
+      }
+      return zippedFile;
+    }
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
new file mode 100644
index 000000000000..5a39b585a849
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.easymock.EasyMock;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class StringSketchMergerTest
+{
+  private StringSketchMerger target;
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
+  @Before
+  public void setup()
+  {
+    target = new StringSketchMerger();
+  }
+
+  @Test
+  public void requiresStringSketch()
+  {
+    StringDistribution distribution = EasyMock.mock(StringDistribution.class);
+
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Only merging StringSketch instances is currently supported");
+
+    target.merge(distribution);
+  }
+
+  @Test
+  public void mergesCorrectly()
+  {
+    String string1 = "a";
+    StringSketch sketch1 = new StringSketch();
+    sketch1.put(string1);
+
+    String string2 = "mn";
+    StringSketch sketch2 = new StringSketch();
+    sketch2.put(string2);
+
+    String string3 = "z";
+    StringSketch sketch3 = new StringSketch();
+    sketch3.put(string3);
+
+    target.merge(sketch2);
+    target.merge(sketch1);
+    target.merge(sketch3);
+    StringDistribution merged = target.getResult();
+
+    String[] partitions = merged.getEvenPartitionsByMaxSize(1);
+    Assert.assertEquals(4, partitions.length);
+    Assert.assertEquals(string1, partitions[0]);  // min
+    Assert.assertEquals(string2, partitions[1]);  // median
+    Assert.assertEquals(string3, partitions[2]);  // max
+    Assert.assertEquals(string3, partitions[3]);  // max
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
new file mode 100644
index 000000000000..0a0559e6049d
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
@@ -0,0 +1,379 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.yahoo.sketches.quantiles.ItemsSketch;
+import org.apache.druid.jackson.JacksonModule;
+import org.apache.druid.segment.TestHelper;
+import org.hamcrest.Matchers;
+import org.hamcrest.number.IsCloseTo;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.runners.Enclosed;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.StringJoiner;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+@RunWith(Enclosed.class)
+public class StringSketchTest
+{
+  private static final int FACTOR = 2;
+  private static final int NUM_STRING = StringSketch.SKETCH_K * FACTOR;
+  private static final double DELTA = ItemsSketch.getNormalizedRankError(StringSketch.SKETCH_K, true) * NUM_STRING;
+  private static final List<String> STRINGS = IntStream.range(0, NUM_STRING)
+                                                       .mapToObj(i -> String.format("%010d", i))
+                                                       .collect(Collectors.toCollection(ArrayList::new));
+  private static final String MIN_STRING = STRINGS.get(0);
+  private static final String MAX_STRING = STRINGS.get(NUM_STRING - 1);
+
+  static {
+    ItemsSketch.rand.setSeed(0); // make sketches deterministic for testing
+  }
+
+  public static class SerializationDeserializationTest
+  {
+    private static final ObjectMapper OBJECT_MAPPER = new JacksonModule().smileMapper();
+
+    @Test
+    public void serializesDeserializes()
+    {
+      StringSketch target = new StringSketch();
+      target.put(MIN_STRING);
+      target.put(MAX_STRING);
+      TestHelper.testSerializesDeserializes(OBJECT_MAPPER, target);
+    }
+  }
+
+  public static class PutTest
+  {
+    private StringSketch target;
+
+    @Before
+    public void setup()
+    {
+      target = new StringSketch();
+    }
+
+    @Test
+    public void putIfNewMin()
+    {
+      String value = MAX_STRING;
+      Assert.assertEquals(0, getCount());
+
+      target.putIfNewMin(value);
+      Assert.assertEquals(1, getCount());
+
+      target.putIfNewMin(value);
+      Assert.assertEquals(1, getCount());
+      Assert.assertEquals(value, target.getDelegate().getMinValue());
+      Assert.assertEquals(value, target.getDelegate().getMaxValue());
+
+      target.putIfNewMin(MIN_STRING);
+      Assert.assertEquals(2, getCount());
+      Assert.assertEquals(MIN_STRING, target.getDelegate().getMinValue());
+      Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxValue());
+    }
+
+    @Test
+    public void putIfNewMax()
+    {
+      String value = MIN_STRING;
+      Assert.assertEquals(0, getCount());
+
+      target.putIfNewMax(value);
+      Assert.assertEquals(1, getCount());
+
+      target.putIfNewMax(value);
+      Assert.assertEquals(1, getCount());
+      Assert.assertEquals(value, target.getDelegate().getMinValue());
+      Assert.assertEquals(value, target.getDelegate().getMaxValue());
+
+      target.putIfNewMax(MAX_STRING);
+      Assert.assertEquals(2, getCount());
+      Assert.assertEquals(MIN_STRING, target.getDelegate().getMinValue());
+      Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxValue());
+    }
+
+    private long getCount()
+    {
+      return target.getDelegate().getN();
+    }
+  }
+
+  @RunWith(Enclosed.class)
+  public static class PartitionTest
+  {
+    private static final StringSketch SKETCH;
+
+    static {
+      SKETCH = new StringSketch();
+      STRINGS.forEach(SKETCH::put);
+    }
+
+    public static class TargetSizeTest
+    {
+      @Rule
+      public ExpectedException exception = ExpectedException.none();
+
+      @Test
+      public void requiresPositiveSize()
+      {
+        exception.expect(IllegalArgumentException.class);
+        exception.expectMessage("targetSize must be positive but is 0");
+
+        SKETCH.getEvenPartitionsByTargetSize(0);
+      }
+
+      @Test
+      public void handlesEmptySketch()
+      {
+        StringSketch sketch = new StringSketch();
+        String[] partitions = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(0, partitions.length);
+      }
+
+      @Test
+      public void handlesSingletonSketch()
+      {
+        String value = MIN_STRING;
+        StringSketch sketch = new StringSketch();
+        sketch.put(value);
+        String[] partitions = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(2, partitions.length);
+        Assert.assertEquals(value, partitions[0]);
+        Assert.assertEquals(value, partitions[1]);
+      }
+
+      @Test
+      public void handlesMinimimumSize()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(1);
+        assertMaxNumberOfPartitions(partitions);
+      }
+
+      @Test
+      public void handlesUnevenPartitions()
+      {
+        List<Integer> targetSizes = Arrays.asList(127, 257, 509, 1021, 2039, 4093);
+        targetSizes.forEach(TargetSizeTest::testHandlesUnevenPartitions);
+      }
+
+      private static void testHandlesUnevenPartitions(int targetSize)
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
+
+        assertFirstAndLastPartitionsCorrect(partitions);
+
+        String partitionsString = PartitionTest.toString(partitions);
+        int expectedHighPartitionCount = (int) Math.ceil((double) NUM_STRING / targetSize);
+        int expectedLowPartitionCount = expectedHighPartitionCount - 1;
+        Assert.assertThat(
+            "targetSize=" + targetSize + " " + partitionsString,
+            partitions.length,
+            Matchers.lessThanOrEqualTo(expectedHighPartitionCount + 1)
+        );
+        Assert.assertThat(
+            "targetSize=" + targetSize + " " + partitionsString,
+            partitions.length,
+            Matchers.greaterThanOrEqualTo(expectedLowPartitionCount + 1)
+        );
+
+        int previous = 0;
+        for (int i = 1; i < partitions.length; i++) {
+          int current = Integer.parseInt(partitions[i]);
+          int size = current - previous;
+          Assert.assertThat(
+              getErrMsgPrefix(targetSize, i) + partitionsString,
+              (double) size,
+              IsCloseTo.closeTo(targetSize, Math.ceil(DELTA) * 2)
+          );
+          previous = current;
+        }
+      }
+
+      @Test
+      public void handlesSinglePartition()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
+        assertSinglePartition(partitions);
+      }
+
+      @Test
+      public void handlesOversizedPartition()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
+        assertSinglePartition(partitions);
+      }
+    }
+
+    public static class MaxSizeTest
+    {
+      @Rule
+      public ExpectedException exception = ExpectedException.none();
+
+      @Test
+      public void requiresPositiveSize()
+      {
+        exception.expect(IllegalArgumentException.class);
+        exception.expectMessage("maxSize must be positive but is 0");
+
+        SKETCH.getEvenPartitionsByMaxSize(0);
+      }
+
+      @Test
+      public void handlesEmptySketch()
+      {
+        StringSketch sketch = new StringSketch();
+        String[] partitions = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(0, partitions.length);
+      }
+
+      @Test
+      public void handlesSingletonSketch()
+      {
+        String value = MIN_STRING;
+        StringSketch sketch = new StringSketch();
+        sketch.put(value);
+        String[] partitions = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(2, partitions.length);
+        Assert.assertEquals(value, partitions[0]);
+        Assert.assertEquals(value, partitions[1]);
+      }
+
+      @Test
+      public void handlesMinimimumSize()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(1);
+        assertMaxNumberOfPartitions(partitions);
+      }
+
+      @Test
+      public void handlesUnevenPartitions()
+      {
+        List<Integer> maxSizes = Arrays.asList(509, 1021, 2039, 4093);
+        maxSizes.forEach(MaxSizeTest::testHandlesUnevenPartitions);
+      }
+
+      private static void testHandlesUnevenPartitions(int maxSize)
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
+
+        assertFirstAndLastPartitionsCorrect(partitions);
+
+        String partitionsString = PartitionTest.toString(partitions);
+        long expectedPartitionCount = (long) Math.ceil((double) NUM_STRING / maxSize);
+        Assert.assertEquals(
+            "maxSize=" + maxSize + " " + partitionsString,
+            expectedPartitionCount + 1,
+            partitions.length
+        );
+
+        double minSize = (double) NUM_STRING / expectedPartitionCount - DELTA;
+
+        int previous = 0;
+        for (int i = 1; i < partitions.length; i++) {
+          int current = Integer.parseInt(partitions[i]);
+          int size = current - previous;
+          Assert.assertThat(
+              getErrMsgPrefix(maxSize, i) + partitionsString,
+              size,
+              Matchers.lessThanOrEqualTo(maxSize)
+          );
+          Assert.assertThat(
+              getErrMsgPrefix(maxSize, i) + partitionsString,
+              (double) size,
+              Matchers.greaterThanOrEqualTo(minSize)
+          );
+          previous = current;
+        }
+      }
+
+      @Test
+      public void handlesSinglePartition()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
+        assertSinglePartition(partitions);
+      }
+
+      @Test
+      public void handlesOversizedPartition()
+      {
+        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
+        assertSinglePartition(partitions);
+      }
+    }
+
+    private static void assertMaxNumberOfPartitions(String[] partitions)
+    {
+      String partitionsString = toString(partitions);
+
+      Assert.assertEquals(partitionsString, NUM_STRING + 1, partitions.length);
+      assertFirstAndLastPartitionsCorrect(partitions);
+
+      int previous = 0;
+      for (int i = 1; i < partitions.length; i++) {
+        int current = Integer.parseInt(partitions[i]);
+        Assert.assertEquals(
+            getErrMsgPrefix(1, i) + partitionsString,
+            1,
+            current - previous,
+            FACTOR
+        );
+        previous = current;
+      }
+    }
+
+    private static void assertSinglePartition(String[] partitions)
+    {
+      Assert.assertEquals(2, partitions.length);
+      assertFirstAndLastPartitionsCorrect(partitions);
+    }
+
+    private static void assertFirstAndLastPartitionsCorrect(String[] partitions)
+    {
+      Assert.assertEquals(MIN_STRING, partitions[0]);
+      Assert.assertEquals(MAX_STRING, partitions[partitions.length - 1]);
+    }
+
+    private static String getErrMsgPrefix(int size, int i)
+    {
+      return "size=" + size + " i=" + i + " of ";
+    }
+
+    private static String toString(String[] partitions)
+    {
+      String prefix = "partitions[" + partitions.length + "]=";
+      StringJoiner sj = new StringJoiner(" ", prefix, "]");
+      for (int i = 0; i < partitions.length; i++) {
+        sj.add("[" + i + "]=" + partitions[i]);
+      }
+      return sj.toString();
+    }
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactoryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactoryTest.java
new file mode 100644
index 000000000000..4d0b0795b822
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFactoryTest.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.granularity.Granularity;
+import org.joda.time.DateTime;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TimeDimTupleFactoryTest
+{
+  private static final Granularity GRANULARITY = Granularities.SECOND;
+  private static final DateTime TIMESTAMP = DateTimes.utc(0);
+  private static final String DIMENSION_VALUE = "abc";
+
+  private TimeDimTupleFactory target;
+
+  @Before
+  public void setup()
+  {
+    target = new TimeDimTupleFactory(GRANULARITY);
+  }
+
+  @Test
+  public void adjustsTimestamps()
+  {
+    TimeDimTuple timeDimTuple = target.createWithBucketedTimestamp(TIMESTAMP, DIMENSION_VALUE);
+    Assert.assertEquals(TIMESTAMP.getMillis(), timeDimTuple.getTimestamp());
+
+    TimeDimTuple timeDimTuple_plus_1msec = target.createWithBucketedTimestamp(TIMESTAMP.plus(1), DIMENSION_VALUE);
+    Assert.assertEquals(TIMESTAMP.getMillis(), timeDimTuple_plus_1msec.getTimestamp());
+
+    TimeDimTuple timeDimTuple_plus_999msec = target.createWithBucketedTimestamp(TIMESTAMP.plus(999), DIMENSION_VALUE);
+    Assert.assertEquals(TIMESTAMP.getMillis(), timeDimTuple_plus_999msec.getTimestamp());
+
+    TimeDimTuple timeDimTuple_plus_1sec = target.createWithBucketedTimestamp(TIMESTAMP.plus(1000), DIMENSION_VALUE);
+    Assert.assertEquals(TIMESTAMP.getMillis() + 1000, timeDimTuple_plus_1sec.getTimestamp());
+  }
+
+  @Test
+  public void setsDimensionValue()
+  {
+    TimeDimTuple timeDimTuple = target.createWithBucketedTimestamp(TIMESTAMP, DIMENSION_VALUE);
+    Assert.assertEquals(DIMENSION_VALUE, timeDimTuple.getDimensionValue());
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnelTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnelTest.java
new file mode 100644
index 000000000000..87e9f46d2a6e
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleFunnelTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.google.common.hash.BloomFilter;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TimeDimTupleFunnelTest
+{
+  @Test
+  public void worksWithBloomFilter()
+  {
+    TimeDimTuple tuple = new TimeDimTuple(1000, "a");
+    BloomFilter<TimeDimTuple> bloomFilter = BloomFilter.create(TimeDimTupleFunnel.INSTANCE, 10);
+    Assert.assertFalse(bloomFilter.mightContain(tuple));
+    bloomFilter.put(tuple);
+    Assert.assertTrue(bloomFilter.mightContain(tuple));
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleTest.java
new file mode 100644
index 000000000000..0570a030e330
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/TimeDimTupleTest.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.hamcrest.Matchers;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TimeDimTupleTest
+{
+  private static final long TIMESTAMP = 1000;
+  private static final String DIMENSION1 = "a";
+  private static final String DIMENSION2 = "m";
+  private static final String DIMENSION3 = "z";
+
+  private TimeDimTuple target;
+
+  @Before
+  public void setup()
+  {
+    target = new TimeDimTuple(TIMESTAMP, DIMENSION2);
+  }
+
+  @Test
+  public void comparesCorrectlyToSmallerTimestamp()
+  {
+    Assert.assertThat(target.compareTo(new TimeDimTuple(TIMESTAMP - 1, DIMENSION2)), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void comparesCorrectlyToSmallerDimension()
+  {
+    Assert.assertThat(target.compareTo(new TimeDimTuple(TIMESTAMP, DIMENSION1)), Matchers.greaterThan(0));
+  }
+
+  @Test
+  public void comparesCorrectlyToEqual()
+  {
+    Assert.assertEquals(0, target.compareTo(new TimeDimTuple(TIMESTAMP, DIMENSION2)));
+  }
+
+  @Test
+  public void comparesCorrectlyToBiggerTimestamp()
+  {
+    Assert.assertThat(target.compareTo(new TimeDimTuple(TIMESTAMP + 1, DIMENSION2)), Matchers.lessThan(0));
+  }
+
+  @Test
+  public void comparesCorrectlyToBiggerDimension()
+  {
+    Assert.assertThat(target.compareTo(new TimeDimTuple(TIMESTAMP, DIMENSION3)), Matchers.lessThan(0));
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
index 754742fe3780..628a5b008e3c 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
@@ -75,6 +75,7 @@ public boolean hasNext()
         return true;
       }
 
+      @SuppressWarnings("IteratorNextCanNotThrowNoSuchElementException")
       @Override
       public InputRow next()
       {
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
new file mode 100644
index 000000000000..6093d0d0eb46
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.iterator;
+
+import org.apache.druid.data.input.InputRow;
+import org.apache.druid.java.util.common.parsers.CloseableIterator;
+import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+import org.joda.time.DateTime;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+public class RangePartitionTaskInputRowIteratorBuilderTest
+{
+  private static final IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester HANDLER_TESTER =
+      IndexTaskInputRowIteratorBuilderTestingFactory.createHandlerTester(() -> new RangePartitionIndexTaskInputRowIteratorBuilder(IndexTaskInputRowIteratorBuilderTestingFactory.DIMENSION));
+  private static final InputRow NO_NEXT_INPUT_ROW = null;
+
+  @Test
+  public void invokesDimensionValueCountFilterLast()
+  {
+    DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
+    List<String> multipleDimensionValues = Arrays.asList("multiple", "dimension", "values");
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, multipleDimensionValues);
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(inputRow);
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(timestamp, IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT);
+
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory = HANDLER_TESTER.invokeHandlers(
+        inputRowIterator,
+        granularitySpec,
+        NO_NEXT_INPUT_ROW
+    );
+
+    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+  }
+
+  @Test
+  public void doesNotInvokeHandlersIfRowValid()
+  {
+    DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
+    List<String> singleDimensionValue = Collections.singletonList("single-dimension-value");
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, singleDimensionValue);
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(inputRow);
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(timestamp, IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT);
+
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory = HANDLER_TESTER.invokeHandlers(
+        inputRowIterator,
+        granularitySpec,
+        inputRow
+    );
+
+    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+  }
+}
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
index 2a7e0f5956f2..af1b2a40084e 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
@@ -21,8 +21,11 @@
 
 import com.google.inject.Inject;
 import org.apache.commons.io.IOUtils;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialDimensionDistributionTask;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialGenericSegmentMergeTask;
 import org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask;
 import org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentMergeTask;
+import org.apache.druid.indexing.common.task.batch.parallel.PartialRangeSegmentGenerateTask;
 import org.apache.druid.indexing.common.task.batch.parallel.SinglePhaseSubTask;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.Intervals;
@@ -260,7 +263,10 @@ private long countCompleteSubTasks(final String dataSource, final boolean perfec
                       return t.getType().equals(SinglePhaseSubTask.TYPE);
                     } else {
                       return t.getType().equalsIgnoreCase(PartialHashSegmentGenerateTask.TYPE)
-                             || t.getType().equalsIgnoreCase(PartialHashSegmentMergeTask.TYPE);
+                             || t.getType().equalsIgnoreCase(PartialHashSegmentMergeTask.TYPE)
+                             || t.getType().equalsIgnoreCase(PartialDimensionDistributionTask.TYPE)
+                             || t.getType().equalsIgnoreCase(PartialRangeSegmentGenerateTask.TYPE)
+                             || t.getType().equalsIgnoreCase(PartialGenericSegmentMergeTask.TYPE);
                     }
                   })
                   .count();
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
index be99de14933f..58c0270d98c9 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
@@ -23,6 +23,7 @@
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.testing.guice.DruidTestModuleFactory;
 import org.apache.druid.tests.TestNGGroup;
@@ -50,7 +51,8 @@ public static Object[][] resources()
   {
     return new Object[][]{
         {new DynamicPartitionsSpec(null, null)},
-        {new HashedPartitionsSpec(null, 2, null)}
+        {new HashedPartitionsSpec(null, 2, null)},
+        {new SingleDimensionPartitionsSpec(2, null, "namespace", false)},
     };
   }
 

From 1b72540d72a3b53ba3f8044504d2522265354117 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Fri, 22 Nov 2019 08:36:54 -0800
Subject: [PATCH 02/17] Fix dependencies & forbidden apis

---
 extensions-core/datasketches/pom.xml                |  5 -----
 indexing-service/pom.xml                            | 13 +++++++++++++
 .../PartialDimensionDistributionTaskTest.java       |  3 ++-
 .../parallel/distribution/StringSketchTest.java     |  3 ++-
 pom.xml                                             |  5 +++++
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/extensions-core/datasketches/pom.xml b/extensions-core/datasketches/pom.xml
index cf2a3e1c8d38..97e2da9c5603 100644
--- a/extensions-core/datasketches/pom.xml
+++ b/extensions-core/datasketches/pom.xml
@@ -34,10 +34,6 @@
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
-  <properties>
-    <datasketches.memory.version>0.12.2</datasketches.memory.version>
-  </properties>
-
   <dependencies>
     <dependency>
       <groupId>com.yahoo.datasketches</groupId>
@@ -52,7 +48,6 @@
     <dependency>
       <groupId>com.yahoo.datasketches</groupId>
       <artifactId>memory</artifactId>
-      <version>${datasketches.memory.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.calcite</groupId>
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index c55e2de46827..63c0e99e727c 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -199,11 +199,24 @@
             <groupId>it.unimi.dsi</groupId>
             <artifactId>fastutil</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+        </dependency>
         <dependency>
             <groupId>com.yahoo.datasketches</groupId>
             <artifactId>sketches-core</artifactId>
             <scope>provided</scope>
         </dependency>
+        <dependency>
+            <groupId>com.yahoo.datasketches</groupId>
+            <artifactId>memory</artifactId>
+            <scope>provided</scope>
+        </dependency>
 
         <!-- Tests -->
         <dependency>
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
index 86bfc2e0e8c0..6e44472a4dc0 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -34,6 +34,7 @@
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.TestHelper;
 import org.apache.druid.segment.indexing.DataSchema;
 import org.apache.druid.testing.junit.LoggerCaptureRule;
@@ -306,7 +307,7 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
 
       long timestamp = 0;
       List<String> dimensionValues = IntStream.range(0, minBloomFilterBits * 10)
-                                              .mapToObj(i -> String.format("%010d", i))
+                                              .mapToObj(i -> StringUtils.format("%010d", i))
                                               .collect(Collectors.toCollection(ArrayList::new));
       String minDimensionValue = dimensionValues.get(0);
       String maxDimensionValue = dimensionValues.get(dimensionValues.size() - 1);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
index 0a0559e6049d..c5d84d231636 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
@@ -22,6 +22,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.yahoo.sketches.quantiles.ItemsSketch;
 import org.apache.druid.jackson.JacksonModule;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.TestHelper;
 import org.hamcrest.Matchers;
 import org.hamcrest.number.IsCloseTo;
@@ -47,7 +48,7 @@ public class StringSketchTest
   private static final int NUM_STRING = StringSketch.SKETCH_K * FACTOR;
   private static final double DELTA = ItemsSketch.getNormalizedRankError(StringSketch.SKETCH_K, true) * NUM_STRING;
   private static final List<String> STRINGS = IntStream.range(0, NUM_STRING)
-                                                       .mapToObj(i -> String.format("%010d", i))
+                                                       .mapToObj(i -> StringUtils.format("%010d", i))
                                                        .collect(Collectors.toCollection(ArrayList::new));
   private static final String MIN_STRING = STRINGS.get(0);
   private static final String MAX_STRING = STRINGS.get(NUM_STRING - 1);
diff --git a/pom.xml b/pom.xml
index 5ee169b44a8e..d1d38f5a74d6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -987,6 +987,11 @@
                 <artifactId>sketches-core</artifactId>
                 <version>0.13.4</version>
             </dependency>
+            <dependency>
+                <groupId>com.yahoo.datasketches</groupId>
+                <artifactId>memory</artifactId>
+                <version>0.12.2</version>
+            </dependency>
 
             <dependency>
                 <groupId>org.apache.calcite</groupId>

From a2f4877933836cf67710a7717efab60a30f3d323 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Fri, 22 Nov 2019 17:32:52 -0800
Subject: [PATCH 03/17] Fixes for integration test

---
 .../task/CachingLocalSegmentAllocator.java     | 18 +++++++++++-------
 ...hPartitionCachingLocalSegmentAllocator.java |  2 ++
 .../druid/indexing/common/task/IndexTask.java  |  2 +-
 ...ePartitionCachingLocalSegmentAllocator.java |  2 ++
 .../parallel/ParallelIndexSupervisorTask.java  |  4 ++--
 .../PartialHashSegmentGenerateTask.java        |  1 +
 .../PartialRangeSegmentGenerateTask.java       |  1 +
 ...titionCachingLocalSegmentAllocatorTest.java |  2 ++
 ...titionCachingLocalSegmentAllocatorTest.java |  2 ++
 9 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
index fbb9081aafa3..279786472e41 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
@@ -23,6 +23,7 @@
 import org.apache.druid.indexing.common.TaskLock;
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.actions.LockListAction;
+import org.apache.druid.indexing.common.actions.SurrogateAction;
 import org.apache.druid.indexing.common.task.IndexTask.ShardSpecs;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.StringUtils;
@@ -56,6 +57,7 @@ interface IntervalToSegmentIdsCreator
   {
     /**
      * @param versionFinder Returns the version for the specified interval
+     *
      * @return Information for segment preallocation
      */
     Map<Interval, List<SegmentIdWithShardSpec>> create(Function<Interval, String> versionFinder);
@@ -64,19 +66,21 @@ interface IntervalToSegmentIdsCreator
   CachingLocalSegmentAllocator(
       TaskToolbox toolbox,
       String taskId,
+      String supervisorTaskId,
       IntervalToSegmentIdsCreator intervalToSegmentIdsCreator
   ) throws IOException
   {
     this.taskId = taskId;
     this.sequenceNameToSegmentId = new HashMap<>();
 
-    final Map<Interval, String> intervalToVersion = toolbox.getTaskActionClient()
-                                                           .submit(new LockListAction())
-                                                           .stream()
-                                                           .collect(Collectors.toMap(
-                                                               TaskLock::getInterval,
-                                                               TaskLock::getVersion
-                                                           ));
+    final Map<Interval, String> intervalToVersion =
+        toolbox.getTaskActionClient()
+               .submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()))
+               .stream()
+               .collect(Collectors.toMap(
+                   TaskLock::getInterval,
+                   TaskLock::getVersion
+               ));
     Function<Interval, String> versionFinder = interval -> findVersion(intervalToVersion, interval);
 
     final Map<Interval, List<SegmentIdWithShardSpec>> intervalToIds = intervalToSegmentIdsCreator.create(versionFinder);
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
index 9640ed461358..fa54a76295ca 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
@@ -51,6 +51,7 @@ public class HashPartitionCachingLocalSegmentAllocator implements IndexTaskSegme
   public HashPartitionCachingLocalSegmentAllocator(
       TaskToolbox toolbox,
       String taskId,
+      String supervisorTaskId,
       String dataSource,
       Map<Interval, Pair<ShardSpecFactory, Integer>> allocateSpec
   ) throws IOException
@@ -62,6 +63,7 @@ public HashPartitionCachingLocalSegmentAllocator(
     this.delegate = new CachingLocalSegmentAllocator(
         toolbox,
         taskId,
+        supervisorTaskId,
         this::getIntervalToSegmentIds
     );
   }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
index 094a713cde16..4fa9fa095139 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/IndexTask.java
@@ -828,7 +828,7 @@ private IndexTaskSegmentAllocator createSegmentAllocator(
       // We use the timeChunk lock and don't have to ask the overlord to create segmentIds.
       // Instead, a local allocator is used.
       if (isGuaranteedRollup(ingestionSchema.ioConfig, ingestionSchema.tuningConfig)) {
-        return new HashPartitionCachingLocalSegmentAllocator(toolbox, getId(), getDataSource(), allocateSpec);
+        return new HashPartitionCachingLocalSegmentAllocator(toolbox, getId(), getId(), getDataSource(), allocateSpec);
       } else {
         return new LocalSegmentAllocator(toolbox, getId(), getDataSource(), dataSchema.getGranularitySpec());
       }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
index 15c9b56c60d5..d8b8ff25493e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
@@ -52,6 +52,7 @@ public class RangePartitionCachingLocalSegmentAllocator implements IndexTaskSegm
   public RangePartitionCachingLocalSegmentAllocator(
       TaskToolbox toolbox,
       String taskId,
+      String supervisorTaskId,
       String dataSource,
       String partitionDimension,
       Map<Interval, String[]> intervalsToPartitions
@@ -64,6 +65,7 @@ public RangePartitionCachingLocalSegmentAllocator(
     this.delegate = new CachingLocalSegmentAllocator(
         toolbox,
         taskId,
+        supervisorTaskId,
         this::getIntervalToSegmentIds
     );
   }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index d2e94a675e9b..e63d2b9cd67a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -633,8 +633,8 @@ private static void assertDataSketchesAvailable()
       //noinspection ResultOfObjectAllocationIgnored
       new StringSketch();
     }
-    catch (Exception e) {
-      throw new ISE(e, "DataSketches is unvailable. Try loading the druid-datasketches extension.");
+    catch (Throwable t) {
+      throw new ISE(t, "DataSketches is unvailable. Try adding the druid-datasketches extension to the classpath.");
     }
   }
 
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentGenerateTask.java
index d7f886207719..7b6f70b0efd6 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentGenerateTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentGenerateTask.java
@@ -130,6 +130,7 @@ IndexTaskSegmentAllocator createSegmentAllocator(TaskToolbox toolbox) throws IOE
     return new HashPartitionCachingLocalSegmentAllocator(
         toolbox,
         getId(),
+        supervisorTaskId,
         getDataSource(),
         createShardSpecs()
     );
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
index 8956fbee2195..5b8e67d7266d 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
@@ -153,6 +153,7 @@ IndexTaskSegmentAllocator createSegmentAllocator(TaskToolbox toolbox) throws IOE
     return new RangePartitionCachingLocalSegmentAllocator(
         toolbox,
         getId(),
+        supervisorTaskId,
         getDataSource(),
         getPartitionDimension(ingestionSchema),
         intervalToPartitions
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
index 86cb36403c25..a3f4e771abf0 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -50,6 +50,7 @@ public class RangePartitionCachingLocalSegmentAllocatorTest
 {
   private static final String DATASOURCE = "datasource";
   private static final String TASKID = "taskid";
+  private static final String SUPERVISOR_TASKID = "supervisor-taskid";
   private static final String PARTITION_DIMENSION = "dimension";
   private static final Interval INTERVAL_EMPTY = Intervals.utc(0, 1000);
   private static final Interval INTERVAL_SINGLETON = Intervals.utc(1000, 2000);
@@ -97,6 +98,7 @@ public void setup() throws IOException
     target = new RangePartitionCachingLocalSegmentAllocator(
         toolbox,
         TASKID,
+        SUPERVISOR_TASKID,
         DATASOURCE,
         PARTITION_DIMENSION,
         INTERVAL_TO_PARTITONS
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionCachingLocalSegmentAllocatorTest.java
index 5b60bdf7a610..e82101d7386a 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/HashPartitionCachingLocalSegmentAllocatorTest.java
@@ -53,6 +53,7 @@ public class HashPartitionCachingLocalSegmentAllocatorTest
   private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
   private static final String DATASOURCE = "datasource";
   private static final String TASKID = "taskid";
+  private static final String SUPERVISOR_TASKID = "supervisor-taskid";
   private static final Interval INTERVAL = Intervals.utc(0, 1000);
   private static final String VERSION = "version";
   private static final String DIMENSION = "dim";
@@ -76,6 +77,7 @@ public void setup() throws IOException
     target = new HashPartitionCachingLocalSegmentAllocator(
         toolbox,
         TASKID,
+        SUPERVISOR_TASKID,
         DATASOURCE,
         ALLOCATE_SPEC
     );

From 6211c4150f1cc58cc8450c867f087a2fb258f203 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Sun, 1 Dec 2019 22:08:03 -0800
Subject: [PATCH 04/17] Address review comments

---
 .../partition/SingleDimensionShardSpec.java   | 23 ++++++
 docs/ingestion/native-batch.md                |  6 +-
 indexing-service/pom.xml                      |  1 +
 ...> CachingLocalSegmentAllocatorHelper.java} |  4 +-
 ...PartitionCachingLocalSegmentAllocator.java |  4 +-
 ...PartitionCachingLocalSegmentAllocator.java | 46 ++++-------
 ...=> GeneratedPartitionsMetadataReport.java} | 15 ++--
 .../parallel/GeneratedPartitionsReport.java   |  2 +-
 .../parallel/ParallelIndexSupervisorTask.java | 58 +++++++-------
 .../PartialDimensionDistributionTask.java     | 49 ++++++++----
 ...icSegmentMergeParallelIndexTaskRunner.java | 11 ++-
 .../PartialGenericSegmentMergeTask.java       | 28 +++++--
 .../parallel/PartialHashSegmentMergeTask.java |  4 +-
 ...egmentGenerateParallelIndexTaskRunner.java |  9 ++-
 .../PartialRangeSegmentGenerateTask.java      | 30 +++----
 .../parallel/PartialSegmentMergeTask.java     |  2 +-
 ...titionStat.java => PartitionMetadata.java} | 13 +--
 .../task/batch/parallel/SubTaskReport.java    |  2 +-
 .../parallel/distribution/Partitions.java     | 44 ++++++++++
 .../distribution/StringDistribution.java      | 10 +--
 .../parallel/distribution/StringSketch.java   | 12 +--
 .../common/task/IngestionTestBase.java        |  8 +-
 ...itionCachingLocalSegmentAllocatorTest.java | 51 ++++++++----
 .../PartialDimensionDistributionTaskTest.java | 58 +++++++++-----
 ...atTest.java => PartitionMetadataTest.java} |  6 +-
 ...rtitionMultiPhaseParallelIndexingTest.java |  6 +-
 .../parallel/distribution/PartitionsTest.java | 60 ++++++++++++++
 .../distribution/StringSketchMergerTest.java  | 12 +--
 .../distribution/StringSketchTest.java        | 80 +++++++++----------
 29 files changed, 417 insertions(+), 237 deletions(-)
 rename indexing-service/src/main/java/org/apache/druid/indexing/common/task/{CachingLocalSegmentAllocator.java => CachingLocalSegmentAllocatorHelper.java} (97%)
 rename indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/{GeneratedGenericPartitionsReport.java => GeneratedPartitionsMetadataReport.java} (65%)
 rename indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/{GenericPartitionStat.java => PartitionMetadata.java} (82%)
 create mode 100644 indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
 rename indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/{GenericPartitionStatTest.java => PartitionMetadataTest.java} (94%)
 create mode 100644 indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java

diff --git a/core/src/main/java/org/apache/druid/timeline/partition/SingleDimensionShardSpec.java b/core/src/main/java/org/apache/druid/timeline/partition/SingleDimensionShardSpec.java
index 968a1d74cc98..9db390c462fe 100644
--- a/core/src/main/java/org/apache/druid/timeline/partition/SingleDimensionShardSpec.java
+++ b/core/src/main/java/org/apache/druid/timeline/partition/SingleDimensionShardSpec.java
@@ -31,6 +31,7 @@
 import javax.annotation.Nullable;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * {@link ShardSpec} for range partitioning based on a single dimension
@@ -184,4 +185,26 @@ public String toString()
            ", partitionNum=" + partitionNum +
            '}';
   }
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    SingleDimensionShardSpec that = (SingleDimensionShardSpec) o;
+    return partitionNum == that.partitionNum &&
+           Objects.equals(dimension, that.dimension) &&
+           Objects.equals(start, that.start) &&
+           Objects.equals(end, that.end);
+  }
+
+  @Override
+  public int hashCode()
+  {
+    return Objects.hash(dimension, start, end, partitionNum);
+  }
 }
diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index e11986ed7a49..e9d1d4082c07 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -246,8 +246,8 @@ You should use different partitionsSpec depending on the [rollup mode](../ingest
 For perfect rollup, you should use either `hashed` (partitioning based on the hash of dimensions in each row) or
 `single_dim` (based on ranges of a single dimension. For best-effort rollup, you should use `dynamic`.
 
-Hashed partitioning is recommended in most cases, as it will improve indexing performance and create more uniformly
-sized data segments relative to single-dimension or dynamic partitioning.
+For perfect rollup, `ashed partitioning is recommended in most cases, as it will improve indexing
+performance and create more uniformly sized data segments relative to single-dimension partitioning.
 
 #### Hash-based partitioning
 
@@ -266,9 +266,9 @@ sized data segments relative to single-dimension or dynamic partitioning.
 |property|description|default|required?|
 |--------|-----------|-------|---------|
 |type|This should always be `single_dim`|none|yes|
+|partitionDimension|The dimension to partition on. Only rows with a single dimension value will be included.|none|yes|
 |targetRowsPerSegment|Target number of rows to include in a partition, should be a number that targets segments of 500MB\~1GB.|none|either this or `maxRowsPerSegment`|
 |maxRowsPerSegment|Maximum number of rows to include in a partition. Defaults to 50% larger than the `targetRowsPerSegment`.|none|either this or `targetRowsPerSegment`|
-|partitionDimension|The dimension to partition on.|none|yes|
 |assumeGrouped|Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.|false|no|
 
 #### Dynamic partitioning
diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml
index 63c0e99e727c..9b3b0089cba0 100644
--- a/indexing-service/pom.xml
+++ b/indexing-service/pom.xml
@@ -208,6 +208,7 @@
             <artifactId>log4j-api</artifactId>
         </dependency>
         <dependency>
+            <!-- Used in native parallel batch indexing to determine distribution of dimension values -->
             <groupId>com.yahoo.datasketches</groupId>
             <artifactId>sketches-core</artifactId>
             <scope>provided</scope>
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocatorHelper.java
similarity index 97%
rename from indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
rename to indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocatorHelper.java
index 279786472e41..1963fb4c2fdc 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/CachingLocalSegmentAllocatorHelper.java
@@ -46,7 +46,7 @@
  * @see HashPartitionCachingLocalSegmentAllocator
  * @see RangePartitionCachingLocalSegmentAllocator
  */
-class CachingLocalSegmentAllocator implements IndexTaskSegmentAllocator
+class CachingLocalSegmentAllocatorHelper implements IndexTaskSegmentAllocator
 {
   private final String taskId;
   private final Map<String, SegmentIdWithShardSpec> sequenceNameToSegmentId;
@@ -63,7 +63,7 @@ interface IntervalToSegmentIdsCreator
     Map<Interval, List<SegmentIdWithShardSpec>> create(Function<Interval, String> versionFinder);
   }
 
-  CachingLocalSegmentAllocator(
+  CachingLocalSegmentAllocatorHelper(
       TaskToolbox toolbox,
       String taskId,
       String supervisorTaskId,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
index fa54a76295ca..1c1736930603 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HashPartitionCachingLocalSegmentAllocator.java
@@ -39,7 +39,7 @@
 /**
  * Allocates all necessary hash-partitioned segments locally at the beginning and reuses them.
  *
- * @see CachingLocalSegmentAllocator
+ * @see CachingLocalSegmentAllocatorHelper
  */
 public class HashPartitionCachingLocalSegmentAllocator implements IndexTaskSegmentAllocator
 {
@@ -60,7 +60,7 @@ public HashPartitionCachingLocalSegmentAllocator(
     this.dataSource = dataSource;
     this.allocateSpec = allocateSpec;
 
-    this.delegate = new CachingLocalSegmentAllocator(
+    this.delegate = new CachingLocalSegmentAllocatorHelper(
         toolbox,
         taskId,
         supervisorTaskId,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
index d8b8ff25493e..3ef5bd28328c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
@@ -22,6 +22,7 @@
 import com.google.common.collect.Maps;
 import org.apache.druid.data.input.InputRow;
 import org.apache.druid.indexing.common.TaskToolbox;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
 import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
 import org.joda.time.Interval;
@@ -29,7 +30,6 @@
 import javax.annotation.Nullable;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -40,13 +40,13 @@
 /**
  * Allocates all necessary range-partitioned segments locally at the beginning and reuses them.
  *
- * @see CachingLocalSegmentAllocator
+ * @see CachingLocalSegmentAllocatorHelper
  */
 public class RangePartitionCachingLocalSegmentAllocator implements IndexTaskSegmentAllocator
 {
   private final String dataSource;
   private final String partitionDimension;
-  private final Map<Interval, String[]> intervalsToPartitions;
+  private final Map<Interval, Partitions> intervalsToPartitions;
   private final IndexTaskSegmentAllocator delegate;
 
   public RangePartitionCachingLocalSegmentAllocator(
@@ -55,14 +55,14 @@ public RangePartitionCachingLocalSegmentAllocator(
       String supervisorTaskId,
       String dataSource,
       String partitionDimension,
-      Map<Interval, String[]> intervalsToPartitions
+      Map<Interval, Partitions> intervalsToPartitions
   ) throws IOException
   {
     this.dataSource = dataSource;
     this.partitionDimension = partitionDimension;
     this.intervalsToPartitions = intervalsToPartitions;
 
-    this.delegate = new CachingLocalSegmentAllocator(
+    this.delegate = new CachingLocalSegmentAllocatorHelper(
         toolbox,
         taskId,
         supervisorTaskId,
@@ -86,36 +86,23 @@ private Map<Interval, List<SegmentIdWithShardSpec>> getIntervalToSegmentIds(Func
     return intervalToSegmentIds;
   }
 
+  /**
+   * Translate {@link org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution} partititions
+   * into the corresponding {@link org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec} with segment id.
+   */
   private List<SegmentIdWithShardSpec> translatePartitions(
       Interval interval,
-      String[] partitions,
+      Partitions partitions,
       Function<Interval, String> versionFinder
   )
   {
-    if (partitions.length == 0) {
+    if (partitions.isEmpty()) {
       return Collections.emptyList();
     }
 
-    String[] uniquePartitions = Arrays.stream(partitions).distinct().toArray(String[]::new);
+    String[] uniquePartitions = partitions.stream().distinct().toArray(String[]::new);
     int numUniquePartition = uniquePartitions.length;
 
-    if (numUniquePartition == 1) {
-      return Collections.singletonList(
-          createLastSegmentIdWithShardSpec(
-              interval,
-              versionFinder.apply(interval),
-              uniquePartitions[0],
-              0
-          )
-      );
-    }
-
-    if (isLastPartitionOnlyMaxValue(partitions)) {
-      // The last partition only contains the max value. A shard that just contains the max value is likely to be
-      // small, so combine it with the second to last one.
-      numUniquePartition -= 1;
-    }
-
     List<SegmentIdWithShardSpec> segmentIds =
         IntStream.range(0, numUniquePartition - 1)
                  .mapToObj(i -> createSegmentIdWithShardSpec(
@@ -138,13 +125,6 @@ private List<SegmentIdWithShardSpec> translatePartitions(
     return segmentIds;
   }
 
-  private boolean isLastPartitionOnlyMaxValue(String[] partitions)
-  {
-    String lastPartition = partitions[partitions.length - 1];
-    String secondToLastPartition = partitions[partitions.length - 2];
-    return !lastPartition.equals(secondToLastPartition);
-  }
-
   private SegmentIdWithShardSpec createLastSegmentIdWithShardSpec(
       Interval interval,
       String version,
@@ -163,6 +143,8 @@ private SegmentIdWithShardSpec createSegmentIdWithShardSpec(
       int partitionNum
   )
   {
+    // The shardSpec created here will be reused in PartialGenericSegmentMergeTask. This is ok because
+    // all PartialSegmentGenerateTasks create the same set of segmentIds (and thus shardSpecs).
     return new SegmentIdWithShardSpec(
         dataSource,
         interval,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
similarity index 65%
rename from indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java
rename to indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
index 0f6570505003..9b50f9f7f37d 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedGenericPartitionsReport.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
@@ -25,20 +25,19 @@
 import java.util.List;
 
 /**
- * Report containing the {@link GenericPartitionStat}s created by a {@link PartialSegmentGenerateTask}.
- * This report is collected by {@link ParallelIndexSupervisorTask} and
- * used to generate {@link PartialGenericSegmentMergeIOConfig}.
+ * Report containing the {@link PartitionMetadata}s created by a {@link PartialSegmentGenerateTask}. This report is
+ * collected by {@link ParallelIndexSupervisorTask} and used to generate {@link PartialGenericSegmentMergeIOConfig}.
  */
-class GeneratedGenericPartitionsReport extends GeneratedPartitionsReport<GenericPartitionStat> implements SubTaskReport
+class GeneratedPartitionsMetadataReport extends GeneratedPartitionsReport<PartitionMetadata> implements SubTaskReport
 {
-  public static final String TYPE = "generated_generic_partitions";
+  public static final String TYPE = "generated_partitions_metadata";
 
   @JsonCreator
-  GeneratedGenericPartitionsReport(
+  GeneratedPartitionsMetadataReport(
       @JsonProperty("taskId") String taskId,
-      @JsonProperty("partitionStats") List<GenericPartitionStat> partitionStats
+      @JsonProperty("partitionStats") List<PartitionMetadata> partitionMetadata
   )
   {
-    super(taskId, partitionStats);
+    super(taskId, partitionMetadata);
   }
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsReport.java
index 23449dcefeee..bfe8cef79c3f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsReport.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsReport.java
@@ -27,7 +27,7 @@
 /**
  * Report containing the {@link PartitionStat}s created by a {@link PartialSegmentGenerateTask}.
  * This report is collected by {@link ParallelIndexSupervisorTask} and
- * used to generate {@link PartialHashSegmentMergeIOConfig}.
+ * used to generate {@link PartialSegmentMergeIOConfig}.
  */
 abstract class GeneratedPartitionsReport<T extends PartitionStat> implements SubTaskReport
 {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index e63d2b9cd67a..84133a857157 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -36,7 +36,6 @@
 import org.apache.druid.data.input.impl.InputRowParser;
 import org.apache.druid.indexer.TaskState;
 import org.apache.druid.indexer.TaskStatus;
-import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
@@ -59,6 +58,7 @@
 import org.apache.druid.indexing.common.task.TaskResource;
 import org.apache.druid.indexing.common.task.Tasks;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
@@ -323,7 +323,7 @@ PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistri
   @VisibleForTesting
   PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
       TaskToolbox toolbox,
-      Map<Interval, String[]> intervalToPartitions
+      Map<Interval, Partitions> intervalToPartitions
   )
   {
     return new PartialRangeSegmentGenerateParallelIndexTaskRunner(
@@ -379,6 +379,22 @@ public boolean isReady(TaskActionClient taskActionClient) throws Exception
     return determineLockGranularityAndTryLock(taskActionClient, ingestionSchema.getDataSchema().getGranularitySpec());
   }
 
+  private boolean useRangePartitions()
+  {
+    return (ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec() instanceof SingleDimensionPartitionsSpec);
+  }
+
+  private static void assertDataSketchesAvailable()
+  {
+    try {
+      //noinspection ResultOfObjectAllocationIgnored
+      new StringSketch();
+    }
+    catch (NoClassDefFoundError e) {
+      throw new ISE(e, "DataSketches is unvailable. Try adding the druid-datasketches extension to the classpath.");
+    }
+  }
+
   @Override
   public List<DataSegment> findSegmentsToLock(TaskActionClient taskActionClient, List<Interval> intervals)
       throws IOException
@@ -528,14 +544,9 @@ private TaskStatus runSinglePhaseParallel(TaskToolbox toolbox) throws Exception
    */
   private TaskStatus runMultiPhaseParallel(TaskToolbox toolbox) throws Exception
   {
-    return useHashPartitions()
-           ? runHashPartitionMultiPhaseParallel(toolbox)
-           : runRangePartitionMultiPhaseParallel(toolbox);
-  }
-
-  private boolean useHashPartitions()
-  {
-    return (ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec() instanceof HashedPartitionsSpec);
+    return useRangePartitions()
+           ? runRangePartitionMultiPhaseParallel(toolbox)
+           : runHashPartitionMultiPhaseParallel(toolbox);
   }
 
   private TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception
@@ -576,8 +587,6 @@ private TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throw
 
   private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception
   {
-    assertDataSketchesAvailable();
-
     ParallelIndexTaskRunner<PartialDimensionDistributionTask, DimensionDistributionReport> distributionRunner =
         createRunner(
             toolbox,
@@ -589,7 +598,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
       return TaskStatus.failure(getId());
     }
 
-    Map<Interval, String[]> intervalToPartitions =
+    Map<Interval, Partitions> intervalToPartitions =
         determineAllRangePartitions(distributionRunner.getReports().values());
 
     if (intervalToPartitions.isEmpty()) {
@@ -599,7 +608,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
       return TaskStatus.success(getId(), msg);
     }
 
-    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>> indexingRunner =
+    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<PartitionMetadata>> indexingRunner =
         createRunner(toolbox, tb -> createPartialRangeSegmentGenerateRunner(tb, intervalToPartitions));
 
     TaskState indexingState = runNextPhase(indexingRunner);
@@ -627,18 +636,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
     return TaskStatus.fromCode(getId(), mergeState);
   }
 
-  private static void assertDataSketchesAvailable()
-  {
-    try {
-      //noinspection ResultOfObjectAllocationIgnored
-      new StringSketch();
-    }
-    catch (Throwable t) {
-      throw new ISE(t, "DataSketches is unvailable. Try adding the druid-datasketches extension to the classpath.");
-    }
-  }
-
-  private Map<Interval, String[]> determineAllRangePartitions(Collection<DimensionDistributionReport> reports)
+  private Map<Interval, Partitions> determineAllRangePartitions(Collection<DimensionDistributionReport> reports)
   {
     Multimap<Interval, StringDistribution> intervalToDistributions = ArrayListMultimap.create();
     reports.forEach(report -> {
@@ -649,7 +647,7 @@ private Map<Interval, String[]> determineAllRangePartitions(Collection<Dimension
     return CollectionUtils.mapValues(intervalToDistributions.asMap(), this::determineRangePartition);
   }
 
-  private String[] determineRangePartition(Collection<StringDistribution> distributions)
+  private Partitions determineRangePartition(Collection<StringDistribution> distributions)
   {
     StringDistributionMerger distributionMerger = new StringSketchMerger();
     distributions.forEach(distributionMerger::merge);
@@ -658,7 +656,7 @@ private String[] determineRangePartition(Collection<StringDistribution> distribu
     SingleDimensionPartitionsSpec partitionsSpec =
         (SingleDimensionPartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
 
-    final String[] partitions;
+    final Partitions partitions;
     Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
     if (targetRowsPerSegment == null) {
       partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
@@ -688,10 +686,10 @@ private static Map<Pair<Interval, Integer>, List<HashPartitionLocation>> groupHa
   }
 
   private static Map<Pair<Interval, Integer>, List<GenericPartitionLocation>> groupGenericPartitionLocationsPerPartition(
-      Map<String, GeneratedPartitionsReport<GenericPartitionStat>> subTaskIdToReport
+      Map<String, GeneratedPartitionsReport<PartitionMetadata>> subTaskIdToReport
   )
   {
-    BiFunction<String, GenericPartitionStat, GenericPartitionLocation> createPartitionLocationFunction =
+    BiFunction<String, PartitionMetadata, GenericPartitionLocation> createPartitionLocationFunction =
         (subtaskId, partitionStat) ->
             new GenericPartitionLocation(
                 partitionStat.getTaskExecutorHost(),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
index 508b5c8615ed..a50239362181 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -24,6 +24,7 @@
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
 import com.google.common.hash.BloomFilter;
 import org.apache.druid.client.indexing.IndexingServiceClient;
 import org.apache.druid.data.input.HandlingInputRowIterator;
@@ -202,7 +203,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
     List<String> metricsNames = Arrays.stream(dataSchema.getAggregators())
                                       .map(AggregatorFactory::getName)
                                       .collect(Collectors.toList());
-    InputFormat inputFormat = ParallelIndexSupervisorTask.getInputFormat(ingestionSchema);
+    InputFormat inputFormat = inputSource.needsFormat()
+                              ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema)
+                              : null;
     InputSourceReader inputSourceReader = dataSchema.getTransformSpec().decorate(
         inputSource.reader(
             new InputRowSchema(
@@ -211,7 +214,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
                 metricsNames
             ),
             inputFormat,
-            null
+            toolbox.getIndexingTmpDir()
         )
     );
 
@@ -244,16 +247,16 @@ private Map<Interval, StringDistribution> determineDistribution(
       String partitionDimension,
       boolean isAssumeGrouped,
       boolean isLogParseExceptions,
-      long maxParseExceptions
+      int maxParseExceptions
   )
   {
     Map<Interval, StringDistribution> intervalToDistribution = new HashMap<>();
     DimensionValueFilter dimValueFilter =
-        isAssumeGrouped
+        isAssumeGrouped && granularitySpec.isRollup()
         ? new GroupedRowDimensionValueFilter()
         : ungroupedRowDimValueFilterSupplier.get();
 
-    long numParseExceptions = 0;
+    int numParseExceptions = 0;
 
     while (inputRowIterator.hasNext()) {
       try {
@@ -272,7 +275,7 @@ private Map<Interval, StringDistribution> determineDistribution(
         String dimensionValue = dimValueFilter.accept(
             interval,
             timestamp,
-            inputRow.getDimension(partitionDimension).get(0)
+            Iterables.getOnlyElement(inputRow.getDimension(partitionDimension))
         );
 
         if (dimensionValue != null) {
@@ -319,7 +322,7 @@ private interface DimensionValueFilter
      * @return Dimension value if it should be accepted, else null
      */
     @Nullable
-    String accept(Interval interval, DateTime timestamp, String dimesionValue);
+    String accept(Interval interval, DateTime timestamp, String dimensionValue);
 
     /**
      * @return Minimum dimension value for each interval processed so far.
@@ -332,6 +335,10 @@ private interface DimensionValueFilter
     Map<Interval, String> getIntervalToMaxDimensionValue();
   }
 
+  /**
+   * Filters out reoccurrences of rows that have timestamps with the same query granularity and dimension value.
+   * Approximate matching is used, so there is a small probability that rows that are not reoccurences are discarded.
+   */
   @VisibleForTesting
   static class UngroupedRowDimensionValueFilter implements DimensionValueFilter
   {
@@ -419,18 +426,30 @@ public String accept(Interval interval, DateTime timestamp, String dimensionValu
 
     private void updateMinDimensionValue(Interval interval, String dimensionValue)
     {
-      String minDimensionValue = intervalToMinDimensionValue.get(interval);
-      if (minDimensionValue == null || dimensionValue.compareTo(minDimensionValue) < 0) {
-        intervalToMinDimensionValue.put(interval, dimensionValue);
-      }
+      intervalToMinDimensionValue.compute(
+          interval,
+          (intervalKey, currentMinValue) -> {
+            if (currentMinValue == null || dimensionValue.compareTo(currentMinValue) < 0) {
+              return dimensionValue;
+            } else {
+              return currentMinValue;
+            }
+          }
+      );
     }
 
     private void updateMaxDimensionValue(Interval interval, String dimensionValue)
     {
-      String maxDimensionValue = intervalToMaxDimensionValue.get(interval);
-      if (maxDimensionValue == null || dimensionValue.compareTo(maxDimensionValue) > 0) {
-        intervalToMaxDimensionValue.put(interval, dimensionValue);
-      }
+      intervalToMaxDimensionValue.compute(
+          interval,
+          (intervalKey, currentMaxValue) -> {
+            if (currentMaxValue == null || dimensionValue.compareTo(currentMaxValue) > 0) {
+              return dimensionValue;
+            } else {
+              return currentMaxValue;
+            }
+          }
+      );
     }
 
     @Override
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
index e53b1d22451a..de9810342113 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeParallelIndexTaskRunner.java
@@ -76,12 +76,11 @@ int getTotalNumSubTasks()
   @VisibleForTesting
   SubTaskSpec<PartialGenericSegmentMergeTask> newTaskSpec(PartialGenericSegmentMergeIOConfig ioConfig)
   {
-    final PartialGenericSegmentMergeIngestionSpec ingestionSpec =
-        new PartialGenericSegmentMergeIngestionSpec(
-            dataSchema,
-            ioConfig,
-            getTuningConfig()
-        );
+    final PartialGenericSegmentMergeIngestionSpec ingestionSpec = new PartialGenericSegmentMergeIngestionSpec(
+        dataSchema,
+        ioConfig,
+        getTuningConfig()
+    );
     return new SubTaskSpec<PartialGenericSegmentMergeTask>(
         getTaskId() + "_" + getAndIncrementNextSpecId(),
         getGroupId(),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
index 559a9b5317ef..0c369bf0f106 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
@@ -22,6 +22,7 @@
 import com.fasterxml.jackson.annotation.JacksonInject;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.HashBasedTable;
 import com.google.common.collect.Table;
 import org.apache.druid.client.indexing.IndexingServiceClient;
@@ -45,7 +46,7 @@ public class PartialGenericSegmentMergeTask extends PartialSegmentMergeTask<Shar
   public static final String TYPE = "partial_index_generic_merge";
 
   private final PartialGenericSegmentMergeIngestionSpec ingestionSchema;
-  private final Table<Interval, Integer, ShardSpec> createIntervalAndIntegerToShardSpec;
+  private final Table<Interval, Integer, ShardSpec> intervalAndIntegerToShardSpec;
 
   @JsonCreator
   public PartialGenericSegmentMergeTask(
@@ -78,7 +79,7 @@ public PartialGenericSegmentMergeTask(
     );
 
     this.ingestionSchema = ingestionSchema;
-    this.createIntervalAndIntegerToShardSpec = createIntervalAndIntegerToShardSpec(
+    this.intervalAndIntegerToShardSpec = createIntervalAndIntegerToShardSpec(
         ingestionSchema.getIOConfig().getPartitionLocations()
     );
   }
@@ -90,7 +91,18 @@ private static Table<Interval, Integer, ShardSpec> createIntervalAndIntegerToSha
     Table<Interval, Integer, ShardSpec> intervalAndIntegerToShardSpec = HashBasedTable.create();
 
     partitionLocations.forEach(
-        p -> intervalAndIntegerToShardSpec.put(p.getInterval(), p.getPartitionId(), p.getShardSpec())
+        p -> {
+          ShardSpec currShardSpec = intervalAndIntegerToShardSpec.get(p.getInterval(), p.getPartitionId());
+          Preconditions.checkArgument(
+              currShardSpec == null || p.getShardSpec().equals(currShardSpec),
+              "interval %s, partitionId %d mismatched shard specs: %s",
+              p.getInterval(),
+              p.getPartitionId(),
+              partitionLocations
+          );
+
+          intervalAndIntegerToShardSpec.put(p.getInterval(), p.getPartitionId(), p.getShardSpec());
+        }
     );
 
     return intervalAndIntegerToShardSpec;
@@ -109,8 +121,14 @@ public String getType()
   }
 
   @Override
-  ShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionNum)
+  ShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionId)
   {
-    return createIntervalAndIntegerToShardSpec.get(interval, partitionNum);
+    return Preconditions.checkNotNull(
+        intervalAndIntegerToShardSpec.get(interval, partitionId),
+        "no shard spec exists for interval %s, partitionId %d: %s",
+        interval,
+        partitionId,
+        intervalAndIntegerToShardSpec.rowMap()
+    );
   }
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentMergeTask.java
index fa23eed2d1a5..157f5e943e7b 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentMergeTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialHashSegmentMergeTask.java
@@ -102,10 +102,10 @@ public String getType()
   }
 
   @Override
-  HashBasedNumberedShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionNum)
+  HashBasedNumberedShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionId)
   {
     return new HashBasedNumberedShardSpec(
-        partitionNum,
+        partitionId,
         Preconditions.checkNotNull(partitionsSpec.getNumShards(), "numShards"),
         partitionsSpec.getPartitionDimensions(),
         toolbox.getJsonMapper()
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
index 57002a8311c4..06f6ddb1d2b9 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
@@ -24,6 +24,7 @@
 import org.apache.druid.data.input.InputSplit;
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.joda.time.Interval;
 
@@ -35,11 +36,11 @@
  * @see PartialHashSegmentMergeParallelIndexTaskRunner
  */
 class PartialRangeSegmentGenerateParallelIndexTaskRunner
-    extends InputSourceSplitParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>>
+    extends InputSourceSplitParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<PartitionMetadata>>
 {
   private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
   private final AppenderatorsManager appenderatorsManager;
-  private final Map<Interval, String[]> intervalToPartitions;
+  private final Map<Interval, Partitions> intervalToPartitions;
 
   PartialRangeSegmentGenerateParallelIndexTaskRunner(
       TaskToolbox toolbox,
@@ -48,7 +49,7 @@ class PartialRangeSegmentGenerateParallelIndexTaskRunner
       ParallelIndexIngestionSpec ingestionSchema,
       Map<String, Object> context,
       IndexingServiceClient indexingServiceClient,
-      Map<Interval, String[]> intervalToPartitions
+      Map<Interval, Partitions> intervalToPartitions
   )
   {
     this(
@@ -72,7 +73,7 @@ class PartialRangeSegmentGenerateParallelIndexTaskRunner
       ParallelIndexIngestionSpec ingestionSchema,
       Map<String, Object> context,
       IndexingServiceClient indexingServiceClient,
-      Map<Interval, String[]> intervalToPartitions,
+      Map<Interval, Partitions> intervalToPartitions,
       IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
       AppenderatorsManager appenderatorsManager
   )
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
index 5b8e67d7266d..00ec70f22ef9 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
@@ -32,6 +32,7 @@
 import org.apache.druid.indexing.common.task.IndexTaskSegmentAllocator;
 import org.apache.druid.indexing.common.task.RangePartitionCachingLocalSegmentAllocator;
 import org.apache.druid.indexing.common.task.TaskResource;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
 import org.apache.druid.indexing.worker.ShuffleDataSegmentPusher;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
@@ -45,12 +46,11 @@
 import java.util.stream.Collectors;
 
 /**
- * The worker task of {@link PartialRangeSegmentGenerateParallelIndexTaskRunner}. This task
- * partitions input data by ranges of the partition dimension specified in
- * {@link SingleDimensionPartitionsSpec}. Partitioned segments are stored in local storage using
- * {@link ShuffleDataSegmentPusher}.
+ * The worker task of {@link PartialRangeSegmentGenerateParallelIndexTaskRunner}. This task partitions input data by
+ * ranges of the partition dimension specified in {@link SingleDimensionPartitionsSpec}. Partitioned segments are stored
+ * in local storage using {@link ShuffleDataSegmentPusher}.
  */
-public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<GeneratedGenericPartitionsReport>
+public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<GeneratedPartitionsMetadataReport>
 {
   public static final String TYPE = "partial_range_index_generate";
   private static final String PROP_SPEC = "spec";
@@ -58,7 +58,7 @@ public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<
   private final String supervisorTaskId;
   private final int numAttempts;
   private final ParallelIndexIngestionSpec ingestionSchema;
-  private final Map<Interval, String[]> intervalToPartitions;
+  private final Map<Interval, Partitions> intervalToPartitions;
 
   @JsonCreator
   public PartialRangeSegmentGenerateTask(
@@ -70,7 +70,7 @@ public PartialRangeSegmentGenerateTask(
       @JsonProperty("numAttempts") int numAttempts, // zero-based counting
       @JsonProperty(PROP_SPEC) ParallelIndexIngestionSpec ingestionSchema,
       @JsonProperty("context") Map<String, Object> context,
-      @JsonProperty("intervalToPartitions") Map<Interval, String[]> intervalToPartitions,
+      @JsonProperty("intervalToPartitions") Map<Interval, Partitions> intervalToPartitions,
       @JacksonInject IndexingServiceClient indexingServiceClient,
       @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
       @JacksonInject AppenderatorsManager appenderatorsManager
@@ -130,7 +130,7 @@ public String getSupervisorTaskId()
   }
 
   @JsonProperty
-  public Map<Interval, String[]> getIntervalToPartitions()
+  public Map<Interval, Partitions> getIntervalToPartitions()
   {
     return intervalToPartitions;
   }
@@ -161,17 +161,17 @@ IndexTaskSegmentAllocator createSegmentAllocator(TaskToolbox toolbox) throws IOE
   }
 
   @Override
-  GeneratedGenericPartitionsReport createGeneratedPartitionsReport(TaskToolbox toolbox, List<DataSegment> segments)
+  GeneratedPartitionsMetadataReport createGeneratedPartitionsReport(TaskToolbox toolbox, List<DataSegment> segments)
   {
-    List<GenericPartitionStat> partitionStats = segments.stream()
-                                                     .map(segment -> createPartitionStat(toolbox, segment))
-                                                     .collect(Collectors.toList());
-    return new GeneratedGenericPartitionsReport(getId(), partitionStats);
+    List<PartitionMetadata> partitionsMetadata = segments.stream()
+                                                         .map(segment -> createPartitionStat(toolbox, segment))
+                                                         .collect(Collectors.toList());
+    return new GeneratedPartitionsMetadataReport(getId(), partitionsMetadata);
   }
 
-  private GenericPartitionStat createPartitionStat(TaskToolbox toolbox, DataSegment segment)
+  private PartitionMetadata createPartitionStat(TaskToolbox toolbox, DataSegment segment)
   {
-    return new GenericPartitionStat(
+    return new PartitionMetadata(
         toolbox.getTaskExecutorNode().getHost(),
         toolbox.getTaskExecutorNode().getPortToUse(),
         toolbox.getTaskExecutorNode().isEnableTlsPort(),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java
index ea0ac936925c..495a7008565c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialSegmentMergeTask.java
@@ -281,7 +281,7 @@ File fetchSegmentFile(File partitionDir, P location) throws IOException
   /**
    * Create a {@link ShardSpec} suitable for the desired secondary partitioning strategy.
    */
-  abstract S createShardSpec(TaskToolbox toolbox, Interval interval, int partitionNum);
+  abstract S createShardSpec(TaskToolbox toolbox, Interval interval, int partitionId);
 
   private Set<DataSegment> mergeAndPushSegments(
       TaskToolbox toolbox,
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java
similarity index 82%
rename from indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
rename to indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java
index 04a98c284476..e8f5c4a5503a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java
@@ -28,11 +28,12 @@
 import java.util.Objects;
 
 /**
- * Statistics about a partition created by {@link PartialSegmentGenerateTask}. Each partition is a set of data
- * of the same time chunk (primary partition key) and the same {@link ShardSpec} (secondary partition key). This class
- * holds the statistics of a single partition created by a task.
+ * Partition description ({@link ShardSpec}) and statistics created by {@link PartialSegmentGenerateTask}. Each
+ * partition is a set of data of the same time chunk (primary partition key) and the same {@link ShardSpec} (secondary
+ * partition key). The {@link ShardSpec} is later used by {@link PartialGenericSegmentMergeTask} to merge the partial
+ * segments.
  */
-public class GenericPartitionStat extends PartitionStat<ShardSpec>
+public class PartitionMetadata extends PartitionStat<ShardSpec>
 {
   private static final String PROP_SHARD_SPEC = "shardSpec";
 
@@ -40,7 +41,7 @@ public class GenericPartitionStat extends PartitionStat<ShardSpec>
   private final ShardSpec shardSpec;
 
   @JsonCreator
-  public GenericPartitionStat(
+  public PartitionMetadata(
       @JsonProperty("taskExecutorHost") String taskExecutorHost,
       @JsonProperty("taskExecutorPort") int taskExecutorPort,
       @JsonProperty("useHttps") boolean useHttps,
@@ -79,7 +80,7 @@ public boolean equals(Object o)
     if (!super.equals(o)) {
       return false;
     }
-    GenericPartitionStat that = (GenericPartitionStat) o;
+    PartitionMetadata that = (PartitionMetadata) o;
     return Objects.equals(shardSpec, that.shardSpec);
   }
 
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
index 8cc6db91e94e..564b3af8ab6f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/SubTaskReport.java
@@ -32,7 +32,7 @@
     @Type(name = PushedSegmentsReport.TYPE, value = PushedSegmentsReport.class),
     @Type(name = GeneratedHashPartitionsReport.TYPE, value = GeneratedHashPartitionsReport.class),
     @Type(name = DimensionDistributionReport.TYPE, value = DimensionDistributionReport.class),
-    @Type(name = GeneratedGenericPartitionsReport.TYPE, value = GeneratedGenericPartitionsReport.class)
+    @Type(name = GeneratedPartitionsMetadataReport.TYPE, value = GeneratedPartitionsMetadataReport.class)
 })
 public interface SubTaskReport
 {
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
new file mode 100644
index 000000000000..d3f967923471
--- /dev/null
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import com.google.common.collect.ForwardingList;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * Convenience wrapper to make code more readable.
+ */
+public class Partitions extends ForwardingList<String> implements List<String>
+{
+  private final List<String> delegate;
+
+  public Partitions(String... partitions)
+  {
+    delegate = ImmutableList.copyOf(partitions);
+  }
+
+  @Override
+  protected List<String> delegate()
+  {
+    return delegate;
+  }
+}
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
index 643a1a8276e8..116dea63f2c2 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
@@ -32,17 +32,17 @@
 public interface StringDistribution
 {
   /**
-   * Record occurence of {@link String}
+   * Record occurrence of {@link String}
    */
   void put(String element);
 
   /**
-   * Record occurence of {@link String} if it will become the new minimum element.
+   * Record occurrence of {@link String} if it will become the new minimum element.
    */
   void putIfNewMin(String element);
 
   /**
-   * Record occurence of {@link String} if it will become the new maximum element;
+   * Record occurrence of {@link String} if it will become the new maximum element;
    */
   void putIfNewMax(String element);
 
@@ -53,7 +53,7 @@ public interface StringDistribution
    * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
    * sorted elements.
    */
-  String[] getEvenPartitionsByMaxSize(int maxSize);
+  Partitions getEvenPartitionsByMaxSize(int maxSize);
 
   /**
    * Split the distribution in the fewest number of evenly-sized partitions while honoring a target
@@ -62,5 +62,5 @@ public interface StringDistribution
    * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
    * sorted elements.
    */
-  String[] getEvenPartitionsByTargetSize(int targetSize);
+  Partitions getEvenPartitionsByTargetSize(int targetSize);
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
index 74a97b2d7537..7fb71a10e539 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
@@ -86,26 +86,26 @@ public void putIfNewMax(String string)
   }
 
   @Override
-  public String[] getEvenPartitionsByMaxSize(int maxSize)
+  public Partitions getEvenPartitionsByMaxSize(int maxSize)
   {
     Preconditions.checkArgument(maxSize > 0, "maxSize must be positive but is %s", maxSize);
     long n = delegate.getN();
     double delta = delegate.getNormalizedRankError(true) * n;  // account for approx distribution
     int targetSize = Math.max(1, (int) Math.floor(maxSize - delta));  // floor() to increase chance below max size
     int evenPartitionCount = (int) Math.ceil((double) n / targetSize);  // ceil() to increase chance below max size
-    return getEventPartitionsByCount(Math.max(1, evenPartitionCount));
+    return getEvenPartitionsByCount(Math.max(1, evenPartitionCount));
   }
 
   @Override
-  public String[] getEvenPartitionsByTargetSize(int targetSize)
+  public Partitions getEvenPartitionsByTargetSize(int targetSize)
   {
     Preconditions.checkArgument(targetSize > 0, "targetSize must be positive but is %s", targetSize);
     long n = delegate.getN();
     int evenPartitionCount = Math.max(1, (int) Math.round((double) n / targetSize));
-    return getEventPartitionsByCount(evenPartitionCount);
+    return getEvenPartitionsByCount(evenPartitionCount);
   }
 
-  private String[] getEventPartitionsByCount(int evenPartitionCount)
+  private Partitions getEvenPartitionsByCount(int evenPartitionCount)
   {
     Preconditions.checkArgument(
         evenPartitionCount > 0,
@@ -113,7 +113,7 @@ private String[] getEventPartitionsByCount(int evenPartitionCount)
         evenPartitionCount
     );
     String[] partitions = delegate.getQuantiles(evenPartitionCount + 1); // add 1 since this returns endpoints
-    return (partitions == null) ? new String[0] : partitions;
+    return new Partitions((partitions == null) ? new String[0] : partitions);
   }
 
   @Override
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
index fe6c534e837a..e87e0f796177 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java
@@ -23,7 +23,6 @@
 import com.google.common.base.Optional;
 import com.google.common.util.concurrent.Futures;
 import com.google.common.util.concurrent.ListenableFuture;
-import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.indexer.TaskStatus;
 import org.apache.druid.indexing.common.SegmentLoaderFactory;
 import org.apache.druid.indexing.common.SingleFileTaskReportFileWriter;
@@ -63,6 +62,7 @@
 import org.apache.druid.segment.loading.SegmentLoader;
 import org.apache.druid.server.DruidNode;
 import org.apache.druid.server.metrics.NoopServiceEmitter;
+import org.apache.druid.testing.InitializedNullHandlingTest;
 import org.apache.druid.timeline.DataSegment;
 import org.junit.After;
 import org.junit.Before;
@@ -79,12 +79,8 @@
 import java.util.Set;
 import java.util.concurrent.Executor;
 
-public abstract class IngestionTestBase
+public abstract class IngestionTestBase extends InitializedNullHandlingTest
 {
-  static {
-    NullHandling.initializeForTests();
-  }
-
   @Rule
   public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
index a3f4e771abf0..b4ab9f77a30d 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -25,6 +25,7 @@
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.actions.LockListAction;
 import org.apache.druid.indexing.common.actions.TaskActionClient;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
@@ -67,13 +68,23 @@ public class RangePartitionCachingLocalSegmentAllocatorTest
   private static final String PARTITION0 = "0";
   private static final String PARTITION5 = "5";
   private static final String PARTITION9 = "9";
-  private static final String[] EMPTY_PARTITIONS = new String[]{};
-  private static final String[] SINGLETON_PARTITIONS = new String[]{PARTITION0, PARTITION0};
-  private static final String[] NORMAL_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION9};
-  private static final String[] FREQUENT_MID_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION5, PARTITION9};
-  private static final String[] FREQUENT_MAX_PARTITIONS = new String[]{PARTITION0, PARTITION5, PARTITION9, PARTITION9};
+  private static final Partitions EMPTY_PARTITIONS = new Partitions();
+  private static final Partitions SINGLETON_PARTITIONS = new Partitions(PARTITION0, PARTITION0);
+  private static final Partitions NORMAL_PARTITIONS = new Partitions(PARTITION0, PARTITION5, PARTITION9);
+  private static final Partitions FREQUENT_MID_PARTITIONS = new Partitions(
+      PARTITION0,
+      PARTITION5,
+      PARTITION5,
+      PARTITION9
+  );
+  private static final Partitions FREQUENT_MAX_PARTITIONS = new Partitions(
+      PARTITION0,
+      PARTITION5,
+      PARTITION9,
+      PARTITION9
+  );
 
-  private static final Map<Interval, String[]> INTERVAL_TO_PARTITONS = ImmutableMap.of(
+  private static final Map<Interval, Partitions> INTERVAL_TO_PARTITONS = ImmutableMap.of(
       INTERVAL_EMPTY, EMPTY_PARTITIONS,
       INTERVAL_SINGLETON, SINGLETON_PARTITIONS,
       INTERVAL_NORMAL, NORMAL_PARTITIONS,
@@ -108,14 +119,14 @@ public void setup() throws IOException
   @Test
   public void failsIfAllocateFromEmptyInterval()
   {
-    int dummy = 0;
     Interval interval = INTERVAL_EMPTY;
     InputRow row = createInputRow(interval, PARTITION9);
 
     exception.expect(IllegalStateException.class);
     exception.expectMessage("Failed to get shardSpec");
 
-    testAllocate(row, interval, dummy, null);
+    String sequenceName = target.getSequenceName(interval, row);
+    allocate(row, sequenceName);
   }
 
   @Test
@@ -140,15 +151,16 @@ public void allocatesCorrectShardSpecsForLastPartitionWithoutFrequentValue()
   {
     Interval interval = INTERVAL_NORMAL;
     InputRow row = createInputRow(interval, PARTITION9);
-    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 2, null);
+    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).size() - 1, null);
   }
 
   @Test
-  public void allocatesCorrectShardSpecsForLPartitionWithFrequentMid()
+  public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMid()
   {
     Interval interval = INTERVAL_FREQUENT_MID;
     InputRow row = createInputRow(interval, PARTITION9);
-    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 3, null);
+    Partitions partitions = INTERVAL_TO_PARTITONS.get(interval);
+    testAllocate(row, interval, partitions.size() - 2, partitions.get(partitions.size() - 1), null);
   }
 
   @Test
@@ -156,15 +168,27 @@ public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMax()
   {
     Interval interval = INTERVAL_FREQUENT_MAX;
     InputRow row = createInputRow(interval, PARTITION9);
-    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).length - 2, null);
+    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).size() - 2, null);
   }
 
+  @SuppressWarnings("SameParameterValue")
   private void testAllocate(InputRow row, Interval interval, int partitionNum)
   {
-    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval)[partitionNum + 1]);
+    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval).get(partitionNum + 1));
   }
 
   private void testAllocate(InputRow row, Interval interval, int partitionNum, @Nullable String partitionEnd)
+  {
+    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval).get(partitionNum), partitionEnd);
+  }
+
+  private void testAllocate(
+      InputRow row,
+      Interval interval,
+      int partitionNum,
+      String partitionStart,
+      @Nullable String partitionEnd
+  )
   {
     String sequenceName = target.getSequenceName(interval, row);
     SegmentIdWithShardSpec segmentIdWithShardSpec = allocate(row, sequenceName);
@@ -176,7 +200,6 @@ private void testAllocate(InputRow row, Interval interval, int partitionNum, @Nu
     SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segmentIdWithShardSpec.getShardSpec();
     Assert.assertEquals(PARTITION_DIMENSION, shardSpec.getDimension());
     Assert.assertEquals(partitionNum, shardSpec.getPartitionNum());
-    String partitionStart = INTERVAL_TO_PARTITONS.get(interval)[partitionNum];
     Assert.assertEquals(partitionStart, shardSpec.getStart());
     Assert.assertEquals(partitionEnd, shardSpec.getEnd());
   }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
index 6e44472a4dc0..e627068e0e92 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -33,6 +33,7 @@
 import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.TestHelper;
@@ -44,10 +45,12 @@
 import org.hamcrest.Matchers;
 import org.joda.time.Interval;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.runners.Enclosed;
 import org.junit.rules.ExpectedException;
+import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 
 import java.util.ArrayList;
@@ -135,14 +138,25 @@ public void hasCorrectPrefixForAutomaticId()
 
   public static class RunTaskTest
   {
-    private static final TaskToolbox TASK_TOOLBOX = null;
-
     @Rule
     public ExpectedException exception = ExpectedException.none();
 
+    @Rule
+    public TemporaryFolder temporaryFolder = new TemporaryFolder();
+
     @Rule
     public LoggerCaptureRule logger = new LoggerCaptureRule(PartialDimensionDistributionTask.class);
 
+    private TaskToolbox taskToolbox;
+
+    @Before
+    public void setup()
+    {
+      taskToolbox = EasyMock.mock(TaskToolbox.class);
+      EasyMock.expect(taskToolbox.getIndexingTmpDir()).andStubReturn(temporaryFolder.getRoot());
+      EasyMock.replay(taskToolbox);
+    }
+
     @Test
     public void requiresPartitionDimension() throws Exception
     {
@@ -158,7 +172,7 @@ public void requiresPartitionDimension() throws Exception
           .tuningConfig(tuningConfig)
           .build();
 
-      task.runTask(TASK_TOOLBOX);
+      task.runTask(taskToolbox);
     }
 
     @Test
@@ -178,7 +192,7 @@ public void logsParseExceptionsIfEnabled() throws Exception
           .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
           .build();
 
-      task.runTask(TASK_TOOLBOX);
+      task.runTask(taskToolbox);
 
       List<LogEvent> logEvents = logger.getLogEvents();
       Assert.assertEquals(1, logEvents.size());
@@ -198,7 +212,7 @@ public void doesNotLogParseExceptionsIfDisabled() throws Exception
           .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
           .build();
 
-      task.runTask(TASK_TOOLBOX);
+      task.runTask(taskToolbox);
 
       Assert.assertEquals(Collections.emptyList(), logger.getLogEvents());
     }
@@ -218,7 +232,7 @@ public void failsWhenTooManyParseExceptions() throws Exception
       exception.expect(RuntimeException.class);
       exception.expectMessage("Max parse exceptions exceeded");
 
-      task.runTask(TASK_TOOLBOX);
+      task.runTask(taskToolbox);
     }
 
     @Test
@@ -260,11 +274,11 @@ public void sendsCorrectReportWhenAssumeGroupedTrue()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
-      Assert.assertEquals(3, partitions.length);
-      Assert.assertEquals(dimensionValue, partitions[0]);
-      Assert.assertEquals(dimensionValue, partitions[1]);
-      Assert.assertEquals(dimensionValue, partitions[2]);
+      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(3, partitions.size());
+      Assert.assertEquals(dimensionValue, partitions.get(0));
+      Assert.assertEquals(dimensionValue, partitions.get(1));
+      Assert.assertEquals(dimensionValue, partitions.get(2));
     }
 
     @Test
@@ -291,10 +305,10 @@ public void groupsRowsWhenAssumeGroupedFalse()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
-      Assert.assertEquals(2, partitions.length);
-      Assert.assertEquals(dimensionValue, partitions[0]);
-      Assert.assertEquals(dimensionValue, partitions[1]);
+      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(2, partitions.size());
+      Assert.assertEquals(dimensionValue, partitions.get(0));
+      Assert.assertEquals(dimensionValue, partitions.get(1));
     }
 
     @Test
@@ -346,10 +360,10 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      String[] partitions = distribution.getEvenPartitionsByMaxSize(1);
-      Assert.assertEquals(minBloomFilterBits + 3, partitions.length); // 3 = min + max + exclusive endpoint
-      Assert.assertEquals(minDimensionValue, partitions[0]);
-      Assert.assertEquals(maxDimensionValue, partitions[partitions.length - 1]);
+      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(minBloomFilterBits + 3, partitions.size()); // 3 = min + max + exclusive endpoint
+      Assert.assertEquals(minDimensionValue, partitions.get(0));
+      Assert.assertEquals(maxDimensionValue, partitions.get(partitions.size() - 1));
     }
 
     @Test
@@ -359,13 +373,13 @@ public void returnsSuccessIfNoExceptions() throws Exception
           .taskClientFactory(ParallelIndexTestingFactory.createTaskClientFactory())
           .build();
 
-      TaskStatus taskStatus = task.runTask(TASK_TOOLBOX);
+      TaskStatus taskStatus = task.runTask(taskToolbox);
 
       Assert.assertEquals(ParallelIndexTestingFactory.ID, taskStatus.getId());
       Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
     }
 
-    private static DimensionDistributionReport runTask(PartialDimensionDistributionTaskBuilder taskBuilder)
+    private DimensionDistributionReport runTask(PartialDimensionDistributionTaskBuilder taskBuilder)
     {
       Capture<SubTaskReport> reportCapture = Capture.newInstance();
       ParallelIndexSupervisorTaskClient taskClient = EasyMock.mock(ParallelIndexSupervisorTaskClient.class);
@@ -375,7 +389,7 @@ private static DimensionDistributionReport runTask(PartialDimensionDistributionT
       try {
         taskBuilder.taskClientFactory((taskInfoProvider, callerId, numThreads, httpTimeout, numRetries) -> taskClient)
                    .build()
-                   .runTask(TASK_TOOLBOX);
+                   .runTask(taskToolbox);
       }
       catch (Exception e) {
         throw new RuntimeException(e);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java
similarity index 94%
rename from indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
rename to indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java
index 2bcac8edfd47..3deb64d391e7 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java
@@ -25,16 +25,16 @@
 import org.junit.Before;
 import org.junit.Test;
 
-public class GenericPartitionStatTest
+public class PartitionMetadataTest
 {
   private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
 
-  private GenericPartitionStat target;
+  private PartitionMetadata target;
 
   @Before
   public void setup()
   {
-    target = new GenericPartitionStat(
+    target = new PartitionMetadata(
         ParallelIndexTestingFactory.TASK_EXECUTOR_HOST,
         ParallelIndexTestingFactory.TASK_EXECUTOR_PORT,
         ParallelIndexTestingFactory.USE_HTTPS,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
index 26814d2c3040..0ab776864251 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
@@ -36,6 +36,7 @@
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
 import org.apache.druid.indexing.common.task.TaskResource;
 import org.apache.druid.indexing.common.task.TestAppenderatorsManager;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.StringUtils;
@@ -217,6 +218,7 @@ private static void assertNumPartition(
       expectedNumPartition -= 1;
     }
     expectedNumPartition *= NUM_DAY;
+    expectedNumPartition += 1;  // max dimension value has its own partition
     Assert.assertEquals(expectedNumPartition, segments.size());
   }
 
@@ -289,7 +291,7 @@ PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistri
     @Override
     PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
         TaskToolbox toolbox,
-        Map<Interval, String[]> intervalToPartitions
+        Map<Interval, Partitions> intervalToPartitions
     )
     {
       return new TestPartialRangeSegmentGenerateRunner(
@@ -342,7 +344,7 @@ private TestPartialRangeSegmentGenerateRunner(
         TaskToolbox toolbox,
         ParallelIndexSupervisorTask supervisorTask,
         IndexingServiceClient indexingServiceClient,
-        Map<Interval, String[]> intervalToPartitions
+        Map<Interval, Partitions> intervalToPartitions
     )
     {
       super(
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
new file mode 100644
index 000000000000..6564ec8db368
--- /dev/null
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.indexing.common.task.batch.parallel.distribution;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+public class PartitionsTest
+{
+  private Partitions target;
+  private String[] values;
+
+  @Before
+  public void setup()
+  {
+    values = new String[]{"a", "b"};
+    target = new Partitions(values);
+  }
+
+  @Test
+  public void hasCorrectValues()
+  {
+    Assert.assertEquals(Arrays.asList(values), target);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void isImmutable()
+  {
+    target.add("should fail");
+  }
+
+  @Test
+  public void cannotBeIndirectlyModified()
+  {
+    String[] originalValues = Arrays.copyOf(values, values.length);
+    values[0] = "changed";
+    Assert.assertEquals(Arrays.asList(originalValues), target);
+    Assert.assertNotEquals(Arrays.asList(values), target);
+  }
+}
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
index 5a39b585a849..9ca6c07b8835 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
@@ -70,11 +70,11 @@ public void mergesCorrectly()
     target.merge(sketch3);
     StringDistribution merged = target.getResult();
 
-    String[] partitions = merged.getEvenPartitionsByMaxSize(1);
-    Assert.assertEquals(4, partitions.length);
-    Assert.assertEquals(string1, partitions[0]);  // min
-    Assert.assertEquals(string2, partitions[1]);  // median
-    Assert.assertEquals(string3, partitions[2]);  // max
-    Assert.assertEquals(string3, partitions[3]);  // max
+    Partitions partitions = merged.getEvenPartitionsByMaxSize(1);
+    Assert.assertEquals(4, partitions.size());
+    Assert.assertEquals(string1, partitions.get(0));  // min
+    Assert.assertEquals(string2, partitions.get(1));  // median
+    Assert.assertEquals(string3, partitions.get(2));  // max
+    Assert.assertEquals(string3, partitions.get(3));  // max
   }
 }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
index c5d84d231636..d49868187e6a 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
@@ -155,8 +155,8 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        String[] partitions = sketch.getEvenPartitionsByTargetSize(1);
-        Assert.assertEquals(0, partitions.length);
+        Partitions partitions = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(0, partitions.size());
       }
 
       @Test
@@ -165,16 +165,16 @@ public void handlesSingletonSketch()
         String value = MIN_STRING;
         StringSketch sketch = new StringSketch();
         sketch.put(value);
-        String[] partitions = sketch.getEvenPartitionsByTargetSize(1);
-        Assert.assertEquals(2, partitions.length);
-        Assert.assertEquals(value, partitions[0]);
-        Assert.assertEquals(value, partitions[1]);
+        Partitions partitions = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(2, partitions.size());
+        Assert.assertEquals(value, partitions.get(0));
+        Assert.assertEquals(value, partitions.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(1);
+        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(1);
         assertMaxNumberOfPartitions(partitions);
       }
 
@@ -187,7 +187,7 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int targetSize)
       {
-        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
+        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
 
         assertFirstAndLastPartitionsCorrect(partitions);
 
@@ -196,18 +196,18 @@ private static void testHandlesUnevenPartitions(int targetSize)
         int expectedLowPartitionCount = expectedHighPartitionCount - 1;
         Assert.assertThat(
             "targetSize=" + targetSize + " " + partitionsString,
-            partitions.length,
+            partitions.size(),
             Matchers.lessThanOrEqualTo(expectedHighPartitionCount + 1)
         );
         Assert.assertThat(
             "targetSize=" + targetSize + " " + partitionsString,
-            partitions.length,
+            partitions.size(),
             Matchers.greaterThanOrEqualTo(expectedLowPartitionCount + 1)
         );
 
         int previous = 0;
-        for (int i = 1; i < partitions.length; i++) {
-          int current = Integer.parseInt(partitions[i]);
+        for (int i = 1; i < partitions.size(); i++) {
+          int current = Integer.parseInt(partitions.get(i));
           int size = current - previous;
           Assert.assertThat(
               getErrMsgPrefix(targetSize, i) + partitionsString,
@@ -221,14 +221,14 @@ private static void testHandlesUnevenPartitions(int targetSize)
       @Test
       public void handlesSinglePartition()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
+        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
         assertSinglePartition(partitions);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
+        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
         assertSinglePartition(partitions);
       }
     }
@@ -251,8 +251,8 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        String[] partitions = sketch.getEvenPartitionsByMaxSize(1);
-        Assert.assertEquals(0, partitions.length);
+        Partitions partitions = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(0, partitions.size());
       }
 
       @Test
@@ -261,16 +261,16 @@ public void handlesSingletonSketch()
         String value = MIN_STRING;
         StringSketch sketch = new StringSketch();
         sketch.put(value);
-        String[] partitions = sketch.getEvenPartitionsByMaxSize(1);
-        Assert.assertEquals(2, partitions.length);
-        Assert.assertEquals(value, partitions[0]);
-        Assert.assertEquals(value, partitions[1]);
+        Partitions partitions = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(2, partitions.size());
+        Assert.assertEquals(value, partitions.get(0));
+        Assert.assertEquals(value, partitions.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(1);
+        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(1);
         assertMaxNumberOfPartitions(partitions);
       }
 
@@ -283,7 +283,7 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int maxSize)
       {
-        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
+        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
 
         assertFirstAndLastPartitionsCorrect(partitions);
 
@@ -292,14 +292,14 @@ private static void testHandlesUnevenPartitions(int maxSize)
         Assert.assertEquals(
             "maxSize=" + maxSize + " " + partitionsString,
             expectedPartitionCount + 1,
-            partitions.length
+            partitions.size()
         );
 
         double minSize = (double) NUM_STRING / expectedPartitionCount - DELTA;
 
         int previous = 0;
-        for (int i = 1; i < partitions.length; i++) {
-          int current = Integer.parseInt(partitions[i]);
+        for (int i = 1; i < partitions.size(); i++) {
+          int current = Integer.parseInt(partitions.get(i));
           int size = current - previous;
           Assert.assertThat(
               getErrMsgPrefix(maxSize, i) + partitionsString,
@@ -318,28 +318,28 @@ private static void testHandlesUnevenPartitions(int maxSize)
       @Test
       public void handlesSinglePartition()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
+        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
         assertSinglePartition(partitions);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        String[] partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
+        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
         assertSinglePartition(partitions);
       }
     }
 
-    private static void assertMaxNumberOfPartitions(String[] partitions)
+    private static void assertMaxNumberOfPartitions(Partitions partitions)
     {
       String partitionsString = toString(partitions);
 
-      Assert.assertEquals(partitionsString, NUM_STRING + 1, partitions.length);
+      Assert.assertEquals(partitionsString, NUM_STRING + 1, partitions.size());
       assertFirstAndLastPartitionsCorrect(partitions);
 
       int previous = 0;
-      for (int i = 1; i < partitions.length; i++) {
-        int current = Integer.parseInt(partitions[i]);
+      for (int i = 1; i < partitions.size(); i++) {
+        int current = Integer.parseInt(partitions.get(i));
         Assert.assertEquals(
             getErrMsgPrefix(1, i) + partitionsString,
             1,
@@ -350,16 +350,16 @@ private static void assertMaxNumberOfPartitions(String[] partitions)
       }
     }
 
-    private static void assertSinglePartition(String[] partitions)
+    private static void assertSinglePartition(Partitions partitions)
     {
-      Assert.assertEquals(2, partitions.length);
+      Assert.assertEquals(2, partitions.size());
       assertFirstAndLastPartitionsCorrect(partitions);
     }
 
-    private static void assertFirstAndLastPartitionsCorrect(String[] partitions)
+    private static void assertFirstAndLastPartitionsCorrect(Partitions partitions)
     {
-      Assert.assertEquals(MIN_STRING, partitions[0]);
-      Assert.assertEquals(MAX_STRING, partitions[partitions.length - 1]);
+      Assert.assertEquals(MIN_STRING, partitions.get(0));
+      Assert.assertEquals(MAX_STRING, partitions.get(partitions.size() - 1));
     }
 
     private static String getErrMsgPrefix(int size, int i)
@@ -367,12 +367,12 @@ private static String getErrMsgPrefix(int size, int i)
       return "size=" + size + " i=" + i + " of ";
     }
 
-    private static String toString(String[] partitions)
+    private static String toString(Partitions partitions)
     {
-      String prefix = "partitions[" + partitions.length + "]=";
+      String prefix = "partitions[" + partitions.size() + "]=";
       StringJoiner sj = new StringJoiner(" ", prefix, "]");
-      for (int i = 0; i < partitions.length; i++) {
-        sj.add("[" + i + "]=" + partitions[i]);
+      for (int i = 0; i < partitions.size(); i++) {
+        sj.add("[" + i + "]=" + partitions.get(i));
       }
       return sj.toString();
     }

From 15d35228ffa0d5f07d5864f3a0960458728e432a Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Tue, 3 Dec 2019 12:03:05 -0800
Subject: [PATCH 05/17] Fix docs, strict compile, sketch check, rollup check

---
 docs/ingestion/native-batch.md                |  4 +--
 .../parallel/ParallelIndexSupervisorTask.java |  3 ++
 .../PartialDimensionDistributionTask.java     | 32 +++++++++++--------
 .../PartialGenericSegmentMergeTask.java       |  4 +--
 .../PartialDimensionDistributionTaskTest.java | 22 ++++++-------
 5 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 24e2f479ca98..0202e937bfea 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -246,7 +246,7 @@ You should use different partitionsSpec depending on the [rollup mode](../ingest
 For perfect rollup, you should use either `hashed` (partitioning based on the hash of dimensions in each row) or
 `single_dim` (based on ranges of a single dimension. For best-effort rollup, you should use `dynamic`.
 
-For perfect rollup, `ashed partitioning is recommended in most cases, as it will improve indexing
+For perfect rollup, `hashed` partitioning is recommended in most cases, as it will improve indexing
 performance and create more uniformly sized data segments relative to single-dimension partitioning.
 
 #### Hash-based partitioning
@@ -261,7 +261,7 @@ performance and create more uniformly sized data segments relative to single-dim
 
 > Single-dimension range partitioning currently requires the
 > [druid-datasketches](../development/extensions-core/datasketches-extension.md)
-> extension to be loaded.
+> extension to be added to the classpath.
 
 |property|description|default|required?|
 |--------|-----------|-------|---------|
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 84133a857157..dd13cbf2b21e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -376,6 +376,9 @@ PartialGenericSegmentMergeParallelIndexTaskRunner createPartialGenericSegmentMer
   @Override
   public boolean isReady(TaskActionClient taskActionClient) throws Exception
   {
+    if (useRangePartitions()) {
+      assertDataSketchesAvailable();
+    }
     return determineLockGranularityAndTryLock(taskActionClient, ingestionSchema.getDataSchema().getGranularitySpec());
   }
 
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
index a50239362181..7b013b8cb926 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -82,7 +82,7 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
   private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
 
   // For testing
-  private final Supplier<UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier;
+  private final Supplier<DedupRowDimensionValueFilter> dedupRowDimValueFilterSupplier;
 
   @JsonCreator
   PartialDimensionDistributionTask(
@@ -108,7 +108,7 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
         context,
         indexingServiceClient,
         taskClientFactory,
-        () -> new UngroupedRowDimensionValueFilter(
+        () -> new DedupRowDimensionValueFilter(
             ingestionSchema.getDataSchema().getGranularitySpec().getQueryGranularity()
         )
     );
@@ -125,7 +125,7 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
       final Map<String, Object> context,
       IndexingServiceClient indexingServiceClient,
       IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
-      Supplier<UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier
+      Supplier<DedupRowDimensionValueFilter> dedupRowDimValueFilterSupplier
   )
   {
     super(
@@ -148,7 +148,7 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
     this.supervisorTaskId = supervisorTaskId;
     this.indexingServiceClient = indexingServiceClient;
     this.taskClientFactory = taskClientFactory;
-    this.ungroupedRowDimValueFilterSupplier = ungroupedRowDimValueFilterSupplier;
+    this.dedupRowDimValueFilterSupplier = dedupRowDimValueFilterSupplier;
   }
 
   @JsonProperty
@@ -252,9 +252,9 @@ private Map<Interval, StringDistribution> determineDistribution(
   {
     Map<Interval, StringDistribution> intervalToDistribution = new HashMap<>();
     DimensionValueFilter dimValueFilter =
-        isAssumeGrouped && granularitySpec.isRollup()
-        ? new GroupedRowDimensionValueFilter()
-        : ungroupedRowDimValueFilterSupplier.get();
+        !isAssumeGrouped && granularitySpec.isRollup()
+        ? dedupRowDimValueFilterSupplier.get()
+        : new PassthroughRowDimensionValueFilter();
 
     int numParseExceptions = 0;
 
@@ -340,7 +340,7 @@ private interface DimensionValueFilter
    * Approximate matching is used, so there is a small probability that rows that are not reoccurences are discarded.
    */
   @VisibleForTesting
-  static class UngroupedRowDimensionValueFilter implements DimensionValueFilter
+  static class DedupRowDimensionValueFilter implements DimensionValueFilter
   {
     // A bloom filter is used to approximately group rows by query granularity. These values assume
     // time chunks have fewer than BLOOM_FILTER_EXPECTED_INSERTIONS rows. With the below values, the
@@ -351,23 +351,23 @@ static class UngroupedRowDimensionValueFilter implements DimensionValueFilter
     private static final int BLOOM_FILTER_EXPECTED_INSERTIONS = 100_000_000;
     private static final double BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY = 0.001;
 
-    private final GroupedRowDimensionValueFilter delegate;
+    private final PassthroughRowDimensionValueFilter delegate;
     private final TimeDimTupleFactory timeDimTupleFactory;
     private final BloomFilter<TimeDimTuple> timeDimTupleBloomFilter;
 
-    UngroupedRowDimensionValueFilter(Granularity queryGranularity)
+    DedupRowDimensionValueFilter(Granularity queryGranularity)
     {
       this(queryGranularity, BLOOM_FILTER_EXPECTED_INSERTIONS, BLOOM_FILTER_EXPECTED_FALSE_POSITIVE_PROBABILTY);
     }
 
     @VisibleForTesting  // to allow controlling false positive rate of bloom filter
-    UngroupedRowDimensionValueFilter(
+    DedupRowDimensionValueFilter(
         Granularity queryGranularity,
         int bloomFilterExpectedInsertions,
         double bloomFilterFalsePositiveProbability
     )
     {
-      delegate = new GroupedRowDimensionValueFilter();
+      delegate = new PassthroughRowDimensionValueFilter();
       timeDimTupleFactory = new TimeDimTupleFactory(queryGranularity);
       timeDimTupleBloomFilter = BloomFilter.create(
           TimeDimTupleFunnel.INSTANCE,
@@ -404,12 +404,16 @@ public Map<Interval, String> getIntervalToMaxDimensionValue()
     }
   }
 
-  private static class GroupedRowDimensionValueFilter implements DimensionValueFilter
+  /**
+   * Accepts all input rows, even if they are reoccurrences of timestamps with the same query granularity and dimension
+   * value.
+   */
+  private static class PassthroughRowDimensionValueFilter implements DimensionValueFilter
   {
     private final Map<Interval, String> intervalToMinDimensionValue;
     private final Map<Interval, String> intervalToMaxDimensionValue;
 
-    GroupedRowDimensionValueFilter()
+    PassthroughRowDimensionValueFilter()
     {
       this.intervalToMinDimensionValue = new HashMap<>();
       this.intervalToMaxDimensionValue = new HashMap<>();
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
index 0c369bf0f106..56865750fa6d 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialGenericSegmentMergeTask.java
@@ -95,7 +95,7 @@ private static Table<Interval, Integer, ShardSpec> createIntervalAndIntegerToSha
           ShardSpec currShardSpec = intervalAndIntegerToShardSpec.get(p.getInterval(), p.getPartitionId());
           Preconditions.checkArgument(
               currShardSpec == null || p.getShardSpec().equals(currShardSpec),
-              "interval %s, partitionId %d mismatched shard specs: %s",
+              "interval %s, partitionId %s mismatched shard specs: %s",
               p.getInterval(),
               p.getPartitionId(),
               partitionLocations
@@ -125,7 +125,7 @@ ShardSpec createShardSpec(TaskToolbox toolbox, Interval interval, int partitionI
   {
     return Preconditions.checkNotNull(
         intervalAndIntegerToShardSpec.get(interval, partitionId),
-        "no shard spec exists for interval %s, partitionId %d: %s",
+        "no shard spec exists for interval %s, partitionId %s: %s",
         interval,
         partitionId,
         intervalAndIntegerToShardSpec.rowMap()
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
index e627068e0e92..e7334ba0cf45 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -346,8 +346,8 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
           .tuningConfig(tuningConfig)
           .dataSchema(dataSchema)
           .inputSource(inlineInputSource)
-          .ungroupedRowDimValueFilterSupplier(
-              () -> new PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter(
+          .dedupRowDimValueFilterSupplier(
+              () -> new PartialDimensionDistributionTask.DedupRowDimensionValueFilter(
                   dataSchema.getGranularitySpec().getQueryGranularity(),
                   smallBloomFilter,
                   manyFalsePositiveBloomFilter
@@ -412,8 +412,8 @@ private static class PartialDimensionDistributionTaskBuilder
         ParallelIndexTestingFactory.createDataSchema(ParallelIndexTestingFactory.INPUT_INTERVALS);
     private IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory =
         ParallelIndexTestingFactory.TASK_CLIENT_FACTORY;
-    private Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter>
-        ungroupedRowDimValueFilterSupplier = null;
+    private Supplier<PartialDimensionDistributionTask.DedupRowDimensionValueFilter> dedupRowDimValueFilterSupplier =
+        null;
 
     @SuppressWarnings("SameParameterValue")
     PartialDimensionDistributionTaskBuilder id(String id)
@@ -448,11 +448,11 @@ PartialDimensionDistributionTaskBuilder taskClientFactory(
       return this;
     }
 
-    PartialDimensionDistributionTaskBuilder ungroupedRowDimValueFilterSupplier(
-        Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter> ungroupedRowDimValueFilterSupplier
+    PartialDimensionDistributionTaskBuilder dedupRowDimValueFilterSupplier(
+        Supplier<PartialDimensionDistributionTask.DedupRowDimensionValueFilter> dedupRowDimValueFilterSupplier
     )
     {
-      this.ungroupedRowDimValueFilterSupplier = ungroupedRowDimValueFilterSupplier;
+      this.dedupRowDimValueFilterSupplier = dedupRowDimValueFilterSupplier;
       return this;
     }
 
@@ -461,12 +461,12 @@ PartialDimensionDistributionTask build()
       ParallelIndexIngestionSpec ingestionSpec =
           ParallelIndexTestingFactory.createIngestionSpec(inputSource, INPUT_FORMAT, tuningConfig, dataSchema);
 
-      Supplier<PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter> supplier =
-          ungroupedRowDimValueFilterSupplier == null
-          ? () -> new PartialDimensionDistributionTask.UngroupedRowDimensionValueFilter(
+      Supplier<PartialDimensionDistributionTask.DedupRowDimensionValueFilter> supplier =
+          dedupRowDimValueFilterSupplier == null
+          ? () -> new PartialDimensionDistributionTask.DedupRowDimensionValueFilter(
               dataSchema.getGranularitySpec().getQueryGranularity()
           )
-          : ungroupedRowDimValueFilterSupplier;
+          : dedupRowDimValueFilterSupplier;
 
       return new PartialDimensionDistributionTask(
           id,

From 5f10caeebf96e3de62272e86a4f86864e872db9c Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Wed, 4 Dec 2019 18:23:32 -0800
Subject: [PATCH 06/17] Fix first shard spec, partition serde, single subtask

---
 ...PartitionCachingLocalSegmentAllocator.java |  4 ++++
 .../parallel/ParallelIndexSupervisorTask.java |  6 ++++-
 .../parallel/distribution/Partitions.java     |  8 +++++++
 ...itionCachingLocalSegmentAllocatorTest.java | 23 ++++++++++++++++---
 .../PartialRangeSegmentGenerateTaskTest.java  |  5 +++-
 .../parallel/distribution/PartitionsTest.java |  7 ++++++
 6 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
index 3ef5bd28328c..59cf8513fc3a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
@@ -103,6 +103,9 @@ private List<SegmentIdWithShardSpec> translatePartitions(
     String[] uniquePartitions = partitions.stream().distinct().toArray(String[]::new);
     int numUniquePartition = uniquePartitions.length;
 
+    // First partition starts with null (see StringPartitionChunk.isStart())
+    uniquePartitions[0] = null;
+
     List<SegmentIdWithShardSpec> segmentIds =
         IntStream.range(0, numUniquePartition - 1)
                  .mapToObj(i -> createSegmentIdWithShardSpec(
@@ -132,6 +135,7 @@ private SegmentIdWithShardSpec createLastSegmentIdWithShardSpec(
       int partitionNum
   )
   {
+    // Last partition ends with null (see StringPartitionChunk.isEnd())
     return createSegmentIdWithShardSpec(interval, version, partitionStart, null, partitionNum);
   }
 
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index dd13cbf2b21e..130a05cc975f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -512,7 +512,11 @@ private void initializeSubTaskCleaner()
 
   private boolean isParallelMode()
   {
-    return baseInputSource.isSplittable() && ingestionSchema.getTuningConfig().getMaxNumConcurrentSubTasks() > 1;
+    // Range partitioning is not implemented for runSequential() (but hash partitioning is)
+    int minRequiredNumConcurrentSubTasks = useRangePartitions() ? 1 : 2;
+
+    return baseInputSource.isSplittable()
+           && ingestionSchema.getTuningConfig().getMaxNumConcurrentSubTasks() >= minRequiredNumConcurrentSubTasks;
   }
 
   /**
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
index d3f967923471..f19bb4fea406 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
@@ -22,6 +22,7 @@
 import com.google.common.collect.ForwardingList;
 import com.google.common.collect.ImmutableList;
 
+import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -31,6 +32,13 @@ public class Partitions extends ForwardingList<String> implements List<String>
 {
   private final List<String> delegate;
 
+  // For jackson
+  @SuppressWarnings("unused")
+  private Partitions()
+  {
+    delegate = new ArrayList<>();
+  }
+
   public Partitions(String... partitions)
   {
     delegate = ImmutableList.copyOf(partitions);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
index b4ab9f77a30d..590069bf946f 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -174,19 +174,36 @@ public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMax()
   @SuppressWarnings("SameParameterValue")
   private void testAllocate(InputRow row, Interval interval, int partitionNum)
   {
-    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval).get(partitionNum + 1));
+    String partitionEnd = getPartitionEnd(interval, partitionNum);
+    testAllocate(row, interval, partitionNum, partitionEnd);
+  }
+
+  @Nullable
+  private static String getPartitionEnd(Interval interval, int partitionNum)
+  {
+    Partitions partitions = INTERVAL_TO_PARTITONS.get(interval);
+    boolean isLastPartition = (partitionNum + 1) == partitions.size();
+    return isLastPartition ? null : partitions.get(partitionNum + 1);
   }
 
   private void testAllocate(InputRow row, Interval interval, int partitionNum, @Nullable String partitionEnd)
   {
-    testAllocate(row, interval, partitionNum, INTERVAL_TO_PARTITONS.get(interval).get(partitionNum), partitionEnd);
+    String partitionStart = getPartitionStart(interval, partitionNum);
+    testAllocate(row, interval, partitionNum, partitionStart, partitionEnd);
+  }
+
+  @Nullable
+  private static String getPartitionStart(Interval interval, int partitionNum)
+  {
+    boolean isFirstPartition = partitionNum == 0;
+    return isFirstPartition ? null : INTERVAL_TO_PARTITONS.get(interval).get(partitionNum);
   }
 
   private void testAllocate(
       InputRow row,
       Interval interval,
       int partitionNum,
-      String partitionStart,
+      @Nullable String partitionStart,
       @Nullable String partitionEnd
   )
   {
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
index 67a4919cd9df..9bfc1f53fbf1 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
@@ -20,6 +20,7 @@
 package org.apache.druid.indexing.common.task.batch.parallel;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableMap;
 import org.apache.druid.data.input.InputFormat;
 import org.apache.druid.data.input.InputSource;
 import org.apache.druid.data.input.impl.InlineInputSource;
@@ -27,6 +28,8 @@
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.segment.TestHelper;
 import org.apache.druid.segment.indexing.DataSchema;
 import org.hamcrest.Matchers;
@@ -141,7 +144,7 @@ PartialRangeSegmentGenerateTask build()
           ParallelIndexTestingFactory.NUM_ATTEMPTS,
           ingestionSpec,
           ParallelIndexTestingFactory.CONTEXT,
-          Collections.emptyMap(),
+          ImmutableMap.of(Intervals.ETERNITY, new Partitions("a")),
           ParallelIndexTestingFactory.INDEXING_SERVICE_CLIENT,
           taskClientFactory,
           ParallelIndexTestingFactory.APPENDERATORS_MANAGER
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
index 6564ec8db368..861ec2323b57 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.indexing.common.task.batch.parallel.distribution;
 
+import org.apache.druid.segment.TestHelper;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
@@ -57,4 +58,10 @@ public void cannotBeIndirectlyModified()
     Assert.assertEquals(Arrays.asList(originalValues), target);
     Assert.assertNotEquals(Arrays.asList(values), target);
   }
+
+  @Test
+  public void serializesDeserializes()
+  {
+    TestHelper.testSerializesDeserializes(TestHelper.JSON_MAPPER, target);
+  }
 }

From c338bc0ac48647ba2b9e0ac912f48195bed5df20 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Wed, 4 Dec 2019 19:27:26 -0800
Subject: [PATCH 07/17] Fix first partition check in test

---
 .../RangePartitionMultiPhaseParallelIndexingTest.java       | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
index 0ab776864251..39e8fabc4a77 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
@@ -236,11 +236,13 @@ private static void assertValuesInRange(List<String> values, DataSegment segment
   {
     SingleDimensionShardSpec shardSpec = (SingleDimensionShardSpec) segment.getShardSpec();
     String start = shardSpec.getStart();
-    Assert.assertNotNull(start);
     String end = shardSpec.getEnd();
+    Assert.assertTrue(shardSpec.toString(), start != null || end != null);
 
     for (String value : values) {
-      Assert.assertThat(value.compareTo(start), Matchers.greaterThanOrEqualTo(0));
+      if (start != null) {
+        Assert.assertThat(value.compareTo(start), Matchers.greaterThanOrEqualTo(0));
+      }
 
       if (end != null) {
         Assert.assertThat(value.compareTo(end), Matchers.lessThan(0));

From 83ab7a86a24c1eb739c3f6e44974d68b188ab898 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Thu, 5 Dec 2019 21:33:29 -0800
Subject: [PATCH 08/17] Misc rewording/refactoring to address code review

---
 docs/ingestion/native-batch.md                | 14 ++--
 ...PartitionCachingLocalSegmentAllocator.java | 67 ++++++-------------
 .../GeneratedPartitionsMetadataReport.java    |  8 +--
 ...etadata.java => GenericPartitionStat.java} |  8 +--
 .../parallel/ParallelIndexSupervisorTask.java | 24 ++++---
 .../PartialDimensionDistributionTask.java     |  6 +-
 ...egmentGenerateParallelIndexTaskRunner.java | 10 +--
 .../PartialRangeSegmentGenerateTask.java      | 20 +++---
 ...rtitions.java => PartitionBoundaries.java} | 32 +++++++--
 .../distribution/StringDistribution.java      |  8 +--
 .../parallel/distribution/StringSketch.java   |  8 +--
 ...itionCachingLocalSegmentAllocatorTest.java | 52 +++-----------
 ...est.java => GenericPartitionStatTest.java} |  6 +-
 .../PartialDimensionDistributionTaskTest.java | 31 +++++----
 .../PartialRangeSegmentGenerateTaskTest.java  |  4 +-
 ...rtitionMultiPhaseParallelIndexingTest.java |  6 +-
 ...Test.java => PartitionBoundariesTest.java} | 32 ++++++---
 .../distribution/StringSketchMergerTest.java  | 10 +--
 .../distribution/StringSketchTest.java        | 67 +++++++++----------
 19 files changed, 200 insertions(+), 213 deletions(-)
 rename indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/{PartitionMetadata.java => GenericPartitionStat.java} (90%)
 rename indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/{Partitions.java => PartitionBoundaries.java} (54%)
 rename indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/{PartitionMetadataTest.java => GenericPartitionStatTest.java} (94%)
 rename indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/{PartitionsTest.java => PartitionBoundariesTest.java} (67%)

diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index 0202e937bfea..68ba38674cea 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -244,10 +244,13 @@ Currently only one splitHintSpec, i.e., `segments`, is available.
 PartitionsSpec is used to describe the secondary partitioning method.
 You should use different partitionsSpec depending on the [rollup mode](../ingestion/index.md#rollup) you want.
 For perfect rollup, you should use either `hashed` (partitioning based on the hash of dimensions in each row) or
-`single_dim` (based on ranges of a single dimension. For best-effort rollup, you should use `dynamic`.
+`single_dim` (based on ranges of a single dimension). For best-effort rollup, you should use `dynamic`.
 
-For perfect rollup, `hashed` partitioning is recommended in most cases, as it will improve indexing
-performance and create more uniformly sized data segments relative to single-dimension partitioning.
+The three `partitionsSpec` types have different pros and cons:
+- `dynamic`: Fastest ingestion speed. Guarantees a well-balanced distribution in segment size. Only best-effort rollup.
+- `hashed`: Moderate ingestion speed. Creates a well-balanced distribution in segment size. Allows perfect rollup. 
+- `single_dim`: Slowest ingestion speed. Segment sizes may be skewed depending on the partition key, but the broker can
+   use the partition information to efficiently prune segments early to speed up queries. Allows perfect rollup.
 
 #### Hash-based partitioning
 
@@ -261,7 +264,10 @@ performance and create more uniformly sized data segments relative to single-dim
 
 > Single-dimension range partitioning currently requires the
 > [druid-datasketches](../development/extensions-core/datasketches-extension.md)
-> extension to be added to the classpath.
+> extension to be [loaded from the classpath](..development/extension.md#loading-extensions-from-the-classpath).
+
+> Because single-range partitioning makes two passes over the input, the index task may fail if the input changes
+> in between the two passes. 
 
 |property|description|default|required?|
 |--------|-----------|-------|---------|
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
index 59cf8513fc3a..977a9bf2fc49 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocator.java
@@ -22,14 +22,13 @@
 import com.google.common.collect.Maps;
 import org.apache.druid.data.input.InputRow;
 import org.apache.druid.indexing.common.TaskToolbox;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
 import org.apache.druid.timeline.partition.SingleDimensionShardSpec;
 import org.joda.time.Interval;
 
 import javax.annotation.Nullable;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -46,7 +45,7 @@ public class RangePartitionCachingLocalSegmentAllocator implements IndexTaskSegm
 {
   private final String dataSource;
   private final String partitionDimension;
-  private final Map<Interval, Partitions> intervalsToPartitions;
+  private final Map<Interval, PartitionBoundaries> intervalsToPartitions;
   private final IndexTaskSegmentAllocator delegate;
 
   public RangePartitionCachingLocalSegmentAllocator(
@@ -55,7 +54,7 @@ public RangePartitionCachingLocalSegmentAllocator(
       String supervisorTaskId,
       String dataSource,
       String partitionDimension,
-      Map<Interval, Partitions> intervalsToPartitions
+      Map<Interval, PartitionBoundaries> intervalsToPartitions
   ) throws IOException
   {
     this.dataSource = dataSource;
@@ -76,10 +75,10 @@ private Map<Interval, List<SegmentIdWithShardSpec>> getIntervalToSegmentIds(Func
         Maps.newHashMapWithExpectedSize(intervalsToPartitions.size());
 
     intervalsToPartitions.forEach(
-        (interval, partitions) ->
+        (interval, partitionBoundaries) ->
             intervalToSegmentIds.put(
                 interval,
-                translatePartitions(interval, partitions, versionFinder)
+                translatePartitionBoundaries(interval, partitionBoundaries, versionFinder)
             )
     );
 
@@ -87,56 +86,28 @@ private Map<Interval, List<SegmentIdWithShardSpec>> getIntervalToSegmentIds(Func
   }
 
   /**
-   * Translate {@link org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution} partititions
-   * into the corresponding {@link org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec} with segment id.
+   * Translate {@link PartitionBoundaries} into the corresponding
+   * {@link org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec} with segment id.
    */
-  private List<SegmentIdWithShardSpec> translatePartitions(
+  private List<SegmentIdWithShardSpec> translatePartitionBoundaries(
       Interval interval,
-      Partitions partitions,
+      PartitionBoundaries partitionBoundaries,
       Function<Interval, String> versionFinder
   )
   {
-    if (partitions.isEmpty()) {
+    if (partitionBoundaries.isEmpty()) {
       return Collections.emptyList();
     }
 
-    String[] uniquePartitions = partitions.stream().distinct().toArray(String[]::new);
-    int numUniquePartition = uniquePartitions.length;
-
-    // First partition starts with null (see StringPartitionChunk.isStart())
-    uniquePartitions[0] = null;
-
-    List<SegmentIdWithShardSpec> segmentIds =
-        IntStream.range(0, numUniquePartition - 1)
-                 .mapToObj(i -> createSegmentIdWithShardSpec(
-                     interval,
-                     versionFinder.apply(interval),
-                     uniquePartitions[i],
-                     uniquePartitions[i + 1],
-                     i
-                 ))
-                 .collect(Collectors.toCollection(ArrayList::new));
-    segmentIds.add(
-        createLastSegmentIdWithShardSpec(
-            interval,
-            versionFinder.apply(interval),
-            uniquePartitions[numUniquePartition - 1],
-            segmentIds.size()
-        )
-    );
-
-    return segmentIds;
-  }
-
-  private SegmentIdWithShardSpec createLastSegmentIdWithShardSpec(
-      Interval interval,
-      String version,
-      String partitionStart,
-      int partitionNum
-  )
-  {
-    // Last partition ends with null (see StringPartitionChunk.isEnd())
-    return createSegmentIdWithShardSpec(interval, version, partitionStart, null, partitionNum);
+    return IntStream.range(0, partitionBoundaries.size() - 1)
+                    .mapToObj(i -> createSegmentIdWithShardSpec(
+                        interval,
+                        versionFinder.apply(interval),
+                        partitionBoundaries.get(i),
+                        partitionBoundaries.get(i + 1),
+                        i
+                    ))
+                    .collect(Collectors.toList());
   }
 
   private SegmentIdWithShardSpec createSegmentIdWithShardSpec(
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
index 9b50f9f7f37d..021422bd3dd1 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GeneratedPartitionsMetadataReport.java
@@ -25,19 +25,19 @@
 import java.util.List;
 
 /**
- * Report containing the {@link PartitionMetadata}s created by a {@link PartialSegmentGenerateTask}. This report is
+ * Report containing the {@link GenericPartitionStat}s created by a {@link PartialSegmentGenerateTask}. This report is
  * collected by {@link ParallelIndexSupervisorTask} and used to generate {@link PartialGenericSegmentMergeIOConfig}.
  */
-class GeneratedPartitionsMetadataReport extends GeneratedPartitionsReport<PartitionMetadata> implements SubTaskReport
+class GeneratedPartitionsMetadataReport extends GeneratedPartitionsReport<GenericPartitionStat> implements SubTaskReport
 {
   public static final String TYPE = "generated_partitions_metadata";
 
   @JsonCreator
   GeneratedPartitionsMetadataReport(
       @JsonProperty("taskId") String taskId,
-      @JsonProperty("partitionStats") List<PartitionMetadata> partitionMetadata
+      @JsonProperty("partitionStats") List<GenericPartitionStat> partitionStats
   )
   {
-    super(taskId, partitionMetadata);
+    super(taskId, partitionStats);
   }
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
similarity index 90%
rename from indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java
rename to indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
index e8f5c4a5503a..5f4d16db2b19 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadata.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStat.java
@@ -28,12 +28,12 @@
 import java.util.Objects;
 
 /**
- * Partition description ({@link ShardSpec}) and statistics created by {@link PartialSegmentGenerateTask}. Each
+ * Generic partition description ({@link ShardSpec}) and statistics created by {@link PartialSegmentGenerateTask}. Each
  * partition is a set of data of the same time chunk (primary partition key) and the same {@link ShardSpec} (secondary
  * partition key). The {@link ShardSpec} is later used by {@link PartialGenericSegmentMergeTask} to merge the partial
  * segments.
  */
-public class PartitionMetadata extends PartitionStat<ShardSpec>
+public class GenericPartitionStat extends PartitionStat<ShardSpec>
 {
   private static final String PROP_SHARD_SPEC = "shardSpec";
 
@@ -41,7 +41,7 @@ public class PartitionMetadata extends PartitionStat<ShardSpec>
   private final ShardSpec shardSpec;
 
   @JsonCreator
-  public PartitionMetadata(
+  public GenericPartitionStat(
       @JsonProperty("taskExecutorHost") String taskExecutorHost,
       @JsonProperty("taskExecutorPort") int taskExecutorPort,
       @JsonProperty("useHttps") boolean useHttps,
@@ -80,7 +80,7 @@ public boolean equals(Object o)
     if (!super.equals(o)) {
       return false;
     }
-    PartitionMetadata that = (PartitionMetadata) o;
+    GenericPartitionStat that = (GenericPartitionStat) o;
     return Objects.equals(shardSpec, that.shardSpec);
   }
 
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 130a05cc975f..444ad6e3cdd6 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -58,7 +58,7 @@
 import org.apache.druid.indexing.common.task.TaskResource;
 import org.apache.druid.indexing.common.task.Tasks;
 import org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
@@ -323,7 +323,7 @@ PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistri
   @VisibleForTesting
   PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
       TaskToolbox toolbox,
-      Map<Interval, Partitions> intervalToPartitions
+      Map<Interval, PartitionBoundaries> intervalToPartitions
   )
   {
     return new PartialRangeSegmentGenerateParallelIndexTaskRunner(
@@ -394,7 +394,11 @@ private static void assertDataSketchesAvailable()
       new StringSketch();
     }
     catch (NoClassDefFoundError e) {
-      throw new ISE(e, "DataSketches is unvailable. Try adding the druid-datasketches extension to the classpath.");
+      throw new ISE(
+          e,
+          "DataSketches is unvailable."
+          + " Try loading the druid-datasketches extension from the classpath for the overlord and middleManagers/indexers."
+      );
     }
   }
 
@@ -605,7 +609,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
       return TaskStatus.failure(getId());
     }
 
-    Map<Interval, Partitions> intervalToPartitions =
+    Map<Interval, PartitionBoundaries> intervalToPartitions =
         determineAllRangePartitions(distributionRunner.getReports().values());
 
     if (intervalToPartitions.isEmpty()) {
@@ -615,7 +619,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
       return TaskStatus.success(getId(), msg);
     }
 
-    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<PartitionMetadata>> indexingRunner =
+    ParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>> indexingRunner =
         createRunner(toolbox, tb -> createPartialRangeSegmentGenerateRunner(tb, intervalToPartitions));
 
     TaskState indexingState = runNextPhase(indexingRunner);
@@ -643,7 +647,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
     return TaskStatus.fromCode(getId(), mergeState);
   }
 
-  private Map<Interval, Partitions> determineAllRangePartitions(Collection<DimensionDistributionReport> reports)
+  private Map<Interval, PartitionBoundaries> determineAllRangePartitions(Collection<DimensionDistributionReport> reports)
   {
     Multimap<Interval, StringDistribution> intervalToDistributions = ArrayListMultimap.create();
     reports.forEach(report -> {
@@ -654,7 +658,7 @@ private Map<Interval, Partitions> determineAllRangePartitions(Collection<Dimensi
     return CollectionUtils.mapValues(intervalToDistributions.asMap(), this::determineRangePartition);
   }
 
-  private Partitions determineRangePartition(Collection<StringDistribution> distributions)
+  private PartitionBoundaries determineRangePartition(Collection<StringDistribution> distributions)
   {
     StringDistributionMerger distributionMerger = new StringSketchMerger();
     distributions.forEach(distributionMerger::merge);
@@ -663,7 +667,7 @@ private Partitions determineRangePartition(Collection<StringDistribution> distri
     SingleDimensionPartitionsSpec partitionsSpec =
         (SingleDimensionPartitionsSpec) ingestionSchema.getTuningConfig().getGivenOrDefaultPartitionsSpec();
 
-    final Partitions partitions;
+    final PartitionBoundaries partitions;
     Integer targetRowsPerSegment = partitionsSpec.getTargetRowsPerSegment();
     if (targetRowsPerSegment == null) {
       partitions = mergedDistribution.getEvenPartitionsByMaxSize(partitionsSpec.getMaxRowsPerSegment());
@@ -693,10 +697,10 @@ private static Map<Pair<Interval, Integer>, List<HashPartitionLocation>> groupHa
   }
 
   private static Map<Pair<Interval, Integer>, List<GenericPartitionLocation>> groupGenericPartitionLocationsPerPartition(
-      Map<String, GeneratedPartitionsReport<PartitionMetadata>> subTaskIdToReport
+      Map<String, GeneratedPartitionsReport<GenericPartitionStat>> subTaskIdToReport
   )
   {
-    BiFunction<String, PartitionMetadata, GenericPartitionLocation> createPartitionLocationFunction =
+    BiFunction<String, GenericPartitionStat, GenericPartitionLocation> createPartitionLocationFunction =
         (subtaskId, partitionStat) ->
             new GenericPartitionLocation(
                 partitionStat.getTaskExecutorHost(),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
index 7b013b8cb926..ca4c1838b939 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -192,9 +192,9 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
     ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
 
     SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) tuningConfig.getPartitionsSpec();
-    Preconditions.checkNotNull(partitionsSpec);
+    Preconditions.checkNotNull(partitionsSpec, "partitionsSpec required in tuningConfig");
     String partitionDimension = partitionsSpec.getPartitionDimension();
-    Preconditions.checkNotNull(partitionDimension, "partitionDimension required");
+    Preconditions.checkNotNull(partitionDimension, "partitionDimension required in partitionsSpec");
     boolean isAssumeGrouped = partitionsSpec.isAssumeGrouped();
 
     InputSource inputSource = ingestionSchema.getIOConfig().getNonNullInputSource(
@@ -294,7 +294,7 @@ private Map<Interval, StringDistribution> determineDistribution(
       }
     }
 
-    // UngroupedDimValueFilter may not accept the min/max dimensionValue. If needed, add the min/max
+    // DedupRowDimensionValueFilter may not accept the min/max dimensionValue. If needed, add the min/max
     // values to the distributions so they have an accurate min/max.
     dimValueFilter.getIntervalToMinDimensionValue()
                   .forEach((interval, min) -> intervalToDistribution.get(interval).putIfNewMin(min));
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
index 06f6ddb1d2b9..71f084dab86e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateParallelIndexTaskRunner.java
@@ -24,7 +24,7 @@
 import org.apache.druid.data.input.InputSplit;
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
 import org.joda.time.Interval;
 
@@ -36,11 +36,11 @@
  * @see PartialHashSegmentMergeParallelIndexTaskRunner
  */
 class PartialRangeSegmentGenerateParallelIndexTaskRunner
-    extends InputSourceSplitParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<PartitionMetadata>>
+    extends InputSourceSplitParallelIndexTaskRunner<PartialRangeSegmentGenerateTask, GeneratedPartitionsReport<GenericPartitionStat>>
 {
   private final IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory;
   private final AppenderatorsManager appenderatorsManager;
-  private final Map<Interval, Partitions> intervalToPartitions;
+  private final Map<Interval, PartitionBoundaries> intervalToPartitions;
 
   PartialRangeSegmentGenerateParallelIndexTaskRunner(
       TaskToolbox toolbox,
@@ -49,7 +49,7 @@ class PartialRangeSegmentGenerateParallelIndexTaskRunner
       ParallelIndexIngestionSpec ingestionSchema,
       Map<String, Object> context,
       IndexingServiceClient indexingServiceClient,
-      Map<Interval, Partitions> intervalToPartitions
+      Map<Interval, PartitionBoundaries> intervalToPartitions
   )
   {
     this(
@@ -73,7 +73,7 @@ class PartialRangeSegmentGenerateParallelIndexTaskRunner
       ParallelIndexIngestionSpec ingestionSchema,
       Map<String, Object> context,
       IndexingServiceClient indexingServiceClient,
-      Map<Interval, Partitions> intervalToPartitions,
+      Map<Interval, PartitionBoundaries> intervalToPartitions,
       IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
       AppenderatorsManager appenderatorsManager
   )
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
index 00ec70f22ef9..3cecf67c8690 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
@@ -32,7 +32,7 @@
 import org.apache.druid.indexing.common.task.IndexTaskSegmentAllocator;
 import org.apache.druid.indexing.common.task.RangePartitionCachingLocalSegmentAllocator;
 import org.apache.druid.indexing.common.task.TaskResource;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.indexing.common.task.batch.parallel.iterator.RangePartitionIndexTaskInputRowIteratorBuilder;
 import org.apache.druid.indexing.worker.ShuffleDataSegmentPusher;
 import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
@@ -58,7 +58,7 @@ public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<
   private final String supervisorTaskId;
   private final int numAttempts;
   private final ParallelIndexIngestionSpec ingestionSchema;
-  private final Map<Interval, Partitions> intervalToPartitions;
+  private final Map<Interval, PartitionBoundaries> intervalToPartitions;
 
   @JsonCreator
   public PartialRangeSegmentGenerateTask(
@@ -70,7 +70,7 @@ public PartialRangeSegmentGenerateTask(
       @JsonProperty("numAttempts") int numAttempts, // zero-based counting
       @JsonProperty(PROP_SPEC) ParallelIndexIngestionSpec ingestionSchema,
       @JsonProperty("context") Map<String, Object> context,
-      @JsonProperty("intervalToPartitions") Map<Interval, Partitions> intervalToPartitions,
+      @JsonProperty("intervalToPartitions") Map<Interval, PartitionBoundaries> intervalToPartitions,
       @JacksonInject IndexingServiceClient indexingServiceClient,
       @JacksonInject IndexTaskClientFactory<ParallelIndexSupervisorTaskClient> taskClientFactory,
       @JacksonInject AppenderatorsManager appenderatorsManager
@@ -130,7 +130,7 @@ public String getSupervisorTaskId()
   }
 
   @JsonProperty
-  public Map<Interval, Partitions> getIntervalToPartitions()
+  public Map<Interval, PartitionBoundaries> getIntervalToPartitions()
   {
     return intervalToPartitions;
   }
@@ -163,15 +163,15 @@ IndexTaskSegmentAllocator createSegmentAllocator(TaskToolbox toolbox) throws IOE
   @Override
   GeneratedPartitionsMetadataReport createGeneratedPartitionsReport(TaskToolbox toolbox, List<DataSegment> segments)
   {
-    List<PartitionMetadata> partitionsMetadata = segments.stream()
-                                                         .map(segment -> createPartitionStat(toolbox, segment))
-                                                         .collect(Collectors.toList());
-    return new GeneratedPartitionsMetadataReport(getId(), partitionsMetadata);
+    List<GenericPartitionStat> partitionStats = segments.stream()
+                                                        .map(segment -> createPartitionStat(toolbox, segment))
+                                                        .collect(Collectors.toList());
+    return new GeneratedPartitionsMetadataReport(getId(), partitionStats);
   }
 
-  private PartitionMetadata createPartitionStat(TaskToolbox toolbox, DataSegment segment)
+  private GenericPartitionStat createPartitionStat(TaskToolbox toolbox, DataSegment segment)
   {
-    return new PartitionMetadata(
+    return new GenericPartitionStat(
         toolbox.getTaskExecutorNode().getHost(),
         toolbox.getTaskExecutorNode().getPortToUse(),
         toolbox.getTaskExecutorNode().isEnableTlsPort(),
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
similarity index 54%
rename from indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
rename to indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
index f19bb4fea406..dc14ace91c6e 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/Partitions.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
@@ -20,28 +20,48 @@
 package org.apache.druid.indexing.common.task.batch.parallel.distribution;
 
 import com.google.common.collect.ForwardingList;
-import com.google.common.collect.ImmutableList;
 
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
- * Convenience wrapper to make code more readable.
+ * List of range partition boundaries.
  */
-public class Partitions extends ForwardingList<String> implements List<String>
+public class PartitionBoundaries extends ForwardingList<String> implements List<String>
 {
   private final List<String> delegate;
 
   // For jackson
   @SuppressWarnings("unused")
-  private Partitions()
+  private PartitionBoundaries()
   {
     delegate = new ArrayList<>();
   }
 
-  public Partitions(String... partitions)
+  /**
+   * @param partitions Elements corresponding to evenly-spaced fractional ranks of the distribution
+   */
+  public PartitionBoundaries(String... partitions)
   {
-    delegate = ImmutableList.copyOf(partitions);
+    if (partitions.length == 0) {
+      delegate = Collections.emptyList();
+      return;
+    }
+
+    List<String> partitionBoundaries = Arrays.stream(partitions)
+                                             .distinct()
+                                             .collect(Collectors.toCollection(ArrayList::new));
+
+    // First partition starts with null (see StringPartitionChunk.isStart())
+    partitionBoundaries.set(0, null);
+
+    // Last partition ends with null (see StringPartitionChunk.isEnd())
+    partitionBoundaries.add(null);
+
+    delegate = Collections.unmodifiableList(partitionBoundaries);
   }
 
   @Override
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
index 116dea63f2c2..5fbd8d61abb4 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringDistribution.java
@@ -50,17 +50,17 @@ public interface StringDistribution
    * Split the distribution in the fewest number of evenly-sized partitions while honoring a max
    * partition size.
    *
-   * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
+   * @return List of elements that correspond to the endpoints of evenly-sized partitions of the
    * sorted elements.
    */
-  Partitions getEvenPartitionsByMaxSize(int maxSize);
+  PartitionBoundaries getEvenPartitionsByMaxSize(int maxSize);
 
   /**
    * Split the distribution in the fewest number of evenly-sized partitions while honoring a target
    * partition size (actual partition sizes may be slightly lower or higher).
    *
-   * @return Array of elements that correspond to the endpoints of evenly-sized partitions of the
+   * @return List of elements that correspond to the endpoints of evenly-sized partitions of the
    * sorted elements.
    */
-  Partitions getEvenPartitionsByTargetSize(int targetSize);
+  PartitionBoundaries getEvenPartitionsByTargetSize(int targetSize);
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
index 8fff0f742ee4..8203ffad04cc 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
@@ -86,7 +86,7 @@ public void putIfNewMax(String string)
   }
 
   @Override
-  public Partitions getEvenPartitionsByMaxSize(int maxSize)
+  public PartitionBoundaries getEvenPartitionsByMaxSize(int maxSize)
   {
     Preconditions.checkArgument(maxSize > 0, "maxSize must be positive but is %s", maxSize);
     long n = delegate.getN();
@@ -97,7 +97,7 @@ public Partitions getEvenPartitionsByMaxSize(int maxSize)
   }
 
   @Override
-  public Partitions getEvenPartitionsByTargetSize(int targetSize)
+  public PartitionBoundaries getEvenPartitionsByTargetSize(int targetSize)
   {
     Preconditions.checkArgument(targetSize > 0, "targetSize must be positive but is %s", targetSize);
     long n = delegate.getN();
@@ -105,7 +105,7 @@ public Partitions getEvenPartitionsByTargetSize(int targetSize)
     return getEvenPartitionsByCount(evenPartitionCount);
   }
 
-  private Partitions getEvenPartitionsByCount(int evenPartitionCount)
+  private PartitionBoundaries getEvenPartitionsByCount(int evenPartitionCount)
   {
     Preconditions.checkArgument(
         evenPartitionCount > 0,
@@ -113,7 +113,7 @@ private Partitions getEvenPartitionsByCount(int evenPartitionCount)
         evenPartitionCount
     );
     String[] partitions = delegate.getQuantiles(evenPartitionCount + 1); // add 1 since this returns endpoints
-    return new Partitions((partitions == null) ? new String[0] : partitions);
+    return new PartitionBoundaries((partitions == null) ? new String[0] : partitions);
   }
 
   @Override
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
index 590069bf946f..c2de2200a465 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -25,7 +25,7 @@
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.actions.LockListAction;
 import org.apache.druid.indexing.common.actions.TaskActionClient;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec;
@@ -56,40 +56,26 @@ public class RangePartitionCachingLocalSegmentAllocatorTest
   private static final Interval INTERVAL_EMPTY = Intervals.utc(0, 1000);
   private static final Interval INTERVAL_SINGLETON = Intervals.utc(1000, 2000);
   private static final Interval INTERVAL_NORMAL = Intervals.utc(2000, 3000);
-  private static final Interval INTERVAL_FREQUENT_MID = Intervals.utc(3000, 4000);
-  private static final Interval INTERVAL_FREQUENT_MAX = Intervals.utc(5000, 6000);
   private static final Map<Interval, String> INTERVAL_TO_VERSION = ImmutableMap.of(
       INTERVAL_EMPTY, "version-empty",
       INTERVAL_SINGLETON, "version-singleton",
-      INTERVAL_NORMAL, "version-normal",
-      INTERVAL_FREQUENT_MID, "version-frequent-mid",
-      INTERVAL_FREQUENT_MAX, "version-frequent-max"
+      INTERVAL_NORMAL, "version-normal"
   );
   private static final String PARTITION0 = "0";
   private static final String PARTITION5 = "5";
   private static final String PARTITION9 = "9";
-  private static final Partitions EMPTY_PARTITIONS = new Partitions();
-  private static final Partitions SINGLETON_PARTITIONS = new Partitions(PARTITION0, PARTITION0);
-  private static final Partitions NORMAL_PARTITIONS = new Partitions(PARTITION0, PARTITION5, PARTITION9);
-  private static final Partitions FREQUENT_MID_PARTITIONS = new Partitions(
+  private static final PartitionBoundaries EMPTY_PARTITIONS = new PartitionBoundaries();
+  private static final PartitionBoundaries SINGLETON_PARTITIONS = new PartitionBoundaries(PARTITION0, PARTITION0);
+  private static final PartitionBoundaries NORMAL_PARTITIONS = new PartitionBoundaries(
       PARTITION0,
       PARTITION5,
-      PARTITION5,
-      PARTITION9
-  );
-  private static final Partitions FREQUENT_MAX_PARTITIONS = new Partitions(
-      PARTITION0,
-      PARTITION5,
-      PARTITION9,
       PARTITION9
   );
 
-  private static final Map<Interval, Partitions> INTERVAL_TO_PARTITONS = ImmutableMap.of(
+  private static final Map<Interval, PartitionBoundaries> INTERVAL_TO_PARTITONS = ImmutableMap.of(
       INTERVAL_EMPTY, EMPTY_PARTITIONS,
       INTERVAL_SINGLETON, SINGLETON_PARTITIONS,
-      INTERVAL_NORMAL, NORMAL_PARTITIONS,
-      INTERVAL_FREQUENT_MID, FREQUENT_MID_PARTITIONS,
-      INTERVAL_FREQUENT_MAX, FREQUENT_MAX_PARTITIONS
+      INTERVAL_NORMAL, NORMAL_PARTITIONS
   );
 
   private RangePartitionCachingLocalSegmentAllocator target;
@@ -147,28 +133,12 @@ public void allocatesCorrectShardSpecsForFirstPartition()
   }
 
   @Test
-  public void allocatesCorrectShardSpecsForLastPartitionWithoutFrequentValue()
+  public void allocatesCorrectShardSpecsForLastPartition()
   {
     Interval interval = INTERVAL_NORMAL;
     InputRow row = createInputRow(interval, PARTITION9);
-    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).size() - 1, null);
-  }
-
-  @Test
-  public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMid()
-  {
-    Interval interval = INTERVAL_FREQUENT_MID;
-    InputRow row = createInputRow(interval, PARTITION9);
-    Partitions partitions = INTERVAL_TO_PARTITONS.get(interval);
-    testAllocate(row, interval, partitions.size() - 2, partitions.get(partitions.size() - 1), null);
-  }
-
-  @Test
-  public void allocatesCorrectShardSpecsForLastPartitionWithFrequentMax()
-  {
-    Interval interval = INTERVAL_FREQUENT_MAX;
-    InputRow row = createInputRow(interval, PARTITION9);
-    testAllocate(row, interval, INTERVAL_TO_PARTITONS.get(interval).size() - 2, null);
+    int partitionNum = INTERVAL_TO_PARTITONS.get(interval).size() - 2;  // -2 = -1 0-based + -1 skip null tail
+    testAllocate(row, interval, partitionNum, null);
   }
 
   @SuppressWarnings("SameParameterValue")
@@ -181,7 +151,7 @@ private void testAllocate(InputRow row, Interval interval, int partitionNum)
   @Nullable
   private static String getPartitionEnd(Interval interval, int partitionNum)
   {
-    Partitions partitions = INTERVAL_TO_PARTITONS.get(interval);
+    PartitionBoundaries partitions = INTERVAL_TO_PARTITONS.get(interval);
     boolean isLastPartition = (partitionNum + 1) == partitions.size();
     return isLastPartition ? null : partitions.get(partitionNum + 1);
   }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
similarity index 94%
rename from indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java
rename to indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
index 3deb64d391e7..2bcac8edfd47 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartitionMetadataTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/GenericPartitionStatTest.java
@@ -25,16 +25,16 @@
 import org.junit.Before;
 import org.junit.Test;
 
-public class PartitionMetadataTest
+public class GenericPartitionStatTest
 {
   private static final ObjectMapper OBJECT_MAPPER = ParallelIndexTestingFactory.createObjectMapper();
 
-  private PartitionMetadata target;
+  private GenericPartitionStat target;
 
   @Before
   public void setup()
   {
-    target = new PartitionMetadata(
+    target = new GenericPartitionStat(
         ParallelIndexTestingFactory.TASK_EXECUTOR_HOST,
         ParallelIndexTestingFactory.TASK_EXECUTOR_PORT,
         ParallelIndexTestingFactory.USE_HTTPS,
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
index e7334ba0cf45..9e622b83e869 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -33,7 +33,7 @@
 import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.indexing.common.TaskToolbox;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.TestHelper;
@@ -274,11 +274,10 @@ public void sendsCorrectReportWhenAssumeGroupedTrue()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
-      Assert.assertEquals(3, partitions.size());
-      Assert.assertEquals(dimensionValue, partitions.get(0));
-      Assert.assertEquals(dimensionValue, partitions.get(1));
-      Assert.assertEquals(dimensionValue, partitions.get(2));
+      PartitionBoundaries partitions = distribution.getEvenPartitionsByMaxSize(1);
+      Assert.assertEquals(2, partitions.size());
+      Assert.assertNull(partitions.get(0));
+      Assert.assertNull(partitions.get(1));
     }
 
     @Test
@@ -305,10 +304,10 @@ public void groupsRowsWhenAssumeGroupedFalse()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
+      PartitionBoundaries partitions = distribution.getEvenPartitionsByMaxSize(1);
       Assert.assertEquals(2, partitions.size());
-      Assert.assertEquals(dimensionValue, partitions.get(0));
-      Assert.assertEquals(dimensionValue, partitions.get(1));
+      Assert.assertNull(partitions.get(0));
+      Assert.assertNull(partitions.get(1));
     }
 
     @Test
@@ -323,8 +322,6 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
       List<String> dimensionValues = IntStream.range(0, minBloomFilterBits * 10)
                                               .mapToObj(i -> StringUtils.format("%010d", i))
                                               .collect(Collectors.toCollection(ArrayList::new));
-      String minDimensionValue = dimensionValues.get(0);
-      String maxDimensionValue = dimensionValues.get(dimensionValues.size() - 1);
       List<String> rows = dimensionValues.stream()
                                          .map(d -> ParallelIndexTestingFactory.createRow(timestamp, d))
                                          .collect(Collectors.toList());
@@ -360,10 +357,16 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
       Map<Interval, StringDistribution> intervalToDistribution = report.getIntervalToDistribution();
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
-      Partitions partitions = distribution.getEvenPartitionsByMaxSize(1);
+      PartitionBoundaries partitions = distribution.getEvenPartitionsByMaxSize(1);
       Assert.assertEquals(minBloomFilterBits + 3, partitions.size()); // 3 = min + max + exclusive endpoint
-      Assert.assertEquals(minDimensionValue, partitions.get(0));
-      Assert.assertEquals(maxDimensionValue, partitions.get(partitions.size() - 1));
+
+      // Min
+      Assert.assertNull(partitions.get(0));
+      Assert.assertEquals(dimensionValues.get(1), partitions.get(1));
+
+      // Max
+      Assert.assertNull(partitions.get(partitions.size() - 1));
+      Assert.assertEquals(dimensionValues.get(dimensionValues.size() - 1), partitions.get(partitions.size() - 2));
     }
 
     @Test
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
index 9bfc1f53fbf1..0e12010185e4 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTaskTest.java
@@ -28,7 +28,7 @@
 import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.segment.TestHelper;
 import org.apache.druid.segment.indexing.DataSchema;
@@ -144,7 +144,7 @@ PartialRangeSegmentGenerateTask build()
           ParallelIndexTestingFactory.NUM_ATTEMPTS,
           ingestionSpec,
           ParallelIndexTestingFactory.CONTEXT,
-          ImmutableMap.of(Intervals.ETERNITY, new Partitions("a")),
+          ImmutableMap.of(Intervals.ETERNITY, new PartitionBoundaries("a")),
           ParallelIndexTestingFactory.INDEXING_SERVICE_CLIENT,
           taskClientFactory,
           ParallelIndexTestingFactory.APPENDERATORS_MANAGER
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
index 39e8fabc4a77..1f75da389552 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
@@ -36,7 +36,7 @@
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
 import org.apache.druid.indexing.common.task.TaskResource;
 import org.apache.druid.indexing.common.task.TestAppenderatorsManager;
-import org.apache.druid.indexing.common.task.batch.parallel.distribution.Partitions;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.StringUtils;
@@ -293,7 +293,7 @@ PartialDimensionDistributionParallelIndexTaskRunner createPartialDimensionDistri
     @Override
     PartialRangeSegmentGenerateParallelIndexTaskRunner createPartialRangeSegmentGenerateRunner(
         TaskToolbox toolbox,
-        Map<Interval, Partitions> intervalToPartitions
+        Map<Interval, PartitionBoundaries> intervalToPartitions
     )
     {
       return new TestPartialRangeSegmentGenerateRunner(
@@ -346,7 +346,7 @@ private TestPartialRangeSegmentGenerateRunner(
         TaskToolbox toolbox,
         ParallelIndexSupervisorTask supervisorTask,
         IndexingServiceClient indexingServiceClient,
-        Map<Interval, Partitions> intervalToPartitions
+        Map<Interval, PartitionBoundaries> intervalToPartitions
     )
     {
       super(
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
similarity index 67%
rename from indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
rename to indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
index 861ec2323b57..d1b20fde71cf 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionsTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
@@ -25,23 +25,27 @@
 import org.junit.Test;
 
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
 
-public class PartitionsTest
+public class PartitionBoundariesTest
 {
-  private Partitions target;
+  private PartitionBoundaries target;
   private String[] values;
+  private List<String> expected;
 
   @Before
   public void setup()
   {
-    values = new String[]{"a", "b"};
-    target = new Partitions(values);
+    values = new String[]{"a", "dup", "dup", "z"};
+    expected = Arrays.asList(null, "dup", "z", null);
+    target = new PartitionBoundaries(values);
   }
 
   @Test
   public void hasCorrectValues()
   {
-    Assert.assertEquals(Arrays.asList(values), target);
+    Assert.assertEquals(expected, target);
   }
 
   @Test(expected = UnsupportedOperationException.class)
@@ -53,10 +57,20 @@ public void isImmutable()
   @Test
   public void cannotBeIndirectlyModified()
   {
-    String[] originalValues = Arrays.copyOf(values, values.length);
-    values[0] = "changed";
-    Assert.assertEquals(Arrays.asList(originalValues), target);
-    Assert.assertNotEquals(Arrays.asList(values), target);
+    values[1] = "changed";
+    Assert.assertEquals(expected, target);
+  }
+
+  @Test
+  public void handlesNoValues()
+  {
+    Assert.assertEquals(Collections.emptyList(), new PartitionBoundaries());
+  }
+
+  @Test
+  public void handlesRepeatedValue()
+  {
+    Assert.assertEquals(Arrays.asList(null, null), new PartitionBoundaries("a", "a", "a"));
   }
 
   @Test
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
index 9ca6c07b8835..8d0c987d0630 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
@@ -70,11 +70,11 @@ public void mergesCorrectly()
     target.merge(sketch3);
     StringDistribution merged = target.getResult();
 
-    Partitions partitions = merged.getEvenPartitionsByMaxSize(1);
+    PartitionBoundaries partitions = merged.getEvenPartitionsByMaxSize(1);
     Assert.assertEquals(4, partitions.size());
-    Assert.assertEquals(string1, partitions.get(0));  // min
-    Assert.assertEquals(string2, partitions.get(1));  // median
-    Assert.assertEquals(string3, partitions.get(2));  // max
-    Assert.assertEquals(string3, partitions.get(3));  // max
+    Assert.assertNull(partitions.get(0));
+    Assert.assertEquals(string2, partitions.get(1));
+    Assert.assertEquals(string3, partitions.get(2));
+    Assert.assertNull(partitions.get(3));
   }
 }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
index 9381a7b33a5c..0e21c137fb75 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
@@ -155,26 +155,25 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        Partitions partitions = sketch.getEvenPartitionsByTargetSize(1);
+        PartitionBoundaries partitions = sketch.getEvenPartitionsByTargetSize(1);
         Assert.assertEquals(0, partitions.size());
       }
 
       @Test
       public void handlesSingletonSketch()
       {
-        String value = MIN_STRING;
         StringSketch sketch = new StringSketch();
-        sketch.put(value);
-        Partitions partitions = sketch.getEvenPartitionsByTargetSize(1);
+        sketch.put(MIN_STRING);
+        PartitionBoundaries partitions = sketch.getEvenPartitionsByTargetSize(1);
         Assert.assertEquals(2, partitions.size());
-        Assert.assertEquals(value, partitions.get(0));
-        Assert.assertEquals(value, partitions.get(1));
+        Assert.assertNull(partitions.get(0));
+        Assert.assertNull(partitions.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(1);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(1);
         assertMaxNumberOfPartitions(partitions);
       }
 
@@ -187,7 +186,7 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int targetSize)
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
 
         assertFirstAndLastPartitionsCorrect(partitions);
 
@@ -197,16 +196,16 @@ private static void testHandlesUnevenPartitions(int targetSize)
         Assert.assertThat(
             "targetSize=" + targetSize + " " + partitionsString,
             partitions.size(),
-            Matchers.lessThanOrEqualTo(expectedHighPartitionCount + 1)
+            Matchers.lessThanOrEqualTo(expectedHighPartitionCount + 2)  // +2 = endpoint + null
         );
         Assert.assertThat(
             "targetSize=" + targetSize + " " + partitionsString,
             partitions.size(),
-            Matchers.greaterThanOrEqualTo(expectedLowPartitionCount + 1)
+            Matchers.greaterThanOrEqualTo(expectedLowPartitionCount + 2)   // +2 = endpoint + null
         );
 
         int previous = 0;
-        for (int i = 1; i < partitions.size(); i++) {
+        for (int i = 1; i < partitions.size() - 1; i++) {
           int current = Integer.parseInt(partitions.get(i));
           int size = current - previous;
           Assert.assertThat(
@@ -221,14 +220,14 @@ private static void testHandlesUnevenPartitions(int targetSize)
       @Test
       public void handlesSinglePartition()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
         assertSinglePartition(partitions);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
         assertSinglePartition(partitions);
       }
     }
@@ -251,26 +250,25 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        Partitions partitions = sketch.getEvenPartitionsByMaxSize(1);
+        PartitionBoundaries partitions = sketch.getEvenPartitionsByMaxSize(1);
         Assert.assertEquals(0, partitions.size());
       }
 
       @Test
       public void handlesSingletonSketch()
       {
-        String value = MIN_STRING;
         StringSketch sketch = new StringSketch();
-        sketch.put(value);
-        Partitions partitions = sketch.getEvenPartitionsByMaxSize(1);
+        sketch.put(MIN_STRING);
+        PartitionBoundaries partitions = sketch.getEvenPartitionsByMaxSize(1);
         Assert.assertEquals(2, partitions.size());
-        Assert.assertEquals(value, partitions.get(0));
-        Assert.assertEquals(value, partitions.get(1));
+        Assert.assertNull(partitions.get(0));
+        Assert.assertNull(partitions.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(1);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(1);
         assertMaxNumberOfPartitions(partitions);
       }
 
@@ -283,7 +281,7 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int maxSize)
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
 
         assertFirstAndLastPartitionsCorrect(partitions);
 
@@ -291,14 +289,14 @@ private static void testHandlesUnevenPartitions(int maxSize)
         long expectedPartitionCount = (long) Math.ceil((double) NUM_STRING / maxSize);
         Assert.assertEquals(
             "maxSize=" + maxSize + " " + partitionsString,
-            expectedPartitionCount + 1,
+            expectedPartitionCount + 2,  // +2 = endpoint + null
             partitions.size()
         );
 
         double minSize = (double) NUM_STRING / expectedPartitionCount - DELTA;
 
         int previous = 0;
-        for (int i = 1; i < partitions.size(); i++) {
+        for (int i = 1; i < partitions.size() - 1; i++) {
           int current = Integer.parseInt(partitions.get(i));
           int size = current - previous;
           Assert.assertThat(
@@ -318,27 +316,27 @@ private static void testHandlesUnevenPartitions(int maxSize)
       @Test
       public void handlesSinglePartition()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
         assertSinglePartition(partitions);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        Partitions partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
+        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
         assertSinglePartition(partitions);
       }
     }
 
-    private static void assertMaxNumberOfPartitions(Partitions partitions)
+    private static void assertMaxNumberOfPartitions(PartitionBoundaries partitions)
     {
       String partitionsString = toString(partitions);
 
-      Assert.assertEquals(partitionsString, NUM_STRING + 1, partitions.size());
+      Assert.assertEquals(partitionsString, StringSketch.SKETCH_K + 2, partitions.size());  // +2 = endpoint + null
       assertFirstAndLastPartitionsCorrect(partitions);
 
       int previous = 0;
-      for (int i = 1; i < partitions.size(); i++) {
+      for (int i = 1; i < partitions.size() - 1; i++) {
         int current = Integer.parseInt(partitions.get(i));
         Assert.assertEquals(
             getErrMsgPrefix(1, i) + partitionsString,
@@ -350,16 +348,17 @@ private static void assertMaxNumberOfPartitions(Partitions partitions)
       }
     }
 
-    private static void assertSinglePartition(Partitions partitions)
+    private static void assertSinglePartition(PartitionBoundaries partitions)
     {
-      Assert.assertEquals(2, partitions.size());
+      Assert.assertEquals(3, partitions.size());  // +2 = endpoint + null
       assertFirstAndLastPartitionsCorrect(partitions);
     }
 
-    private static void assertFirstAndLastPartitionsCorrect(Partitions partitions)
+    private static void assertFirstAndLastPartitionsCorrect(PartitionBoundaries partitions)
     {
-      Assert.assertEquals(MIN_STRING, partitions.get(0));
-      Assert.assertEquals(MAX_STRING, partitions.get(partitions.size() - 1));
+      Assert.assertNull(partitions.get(0));
+      Assert.assertEquals(MAX_STRING, partitions.get(partitions.size() - 2));
+      Assert.assertNull(partitions.get(partitions.size() - 1));
     }
 
     private static String getErrMsgPrefix(int size, int i)
@@ -367,7 +366,7 @@ private static String getErrMsgPrefix(int size, int i)
       return "size=" + size + " i=" + i + " of ";
     }
 
-    private static String toString(Partitions partitions)
+    private static String toString(PartitionBoundaries partitions)
     {
       String prefix = "partitions[" + partitions.size() + "]=";
       StringJoiner sj = new StringJoiner(" ", prefix, "]");

From ded55f994a567eb801d8a0a6f16e77981627d9d2 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Fri, 6 Dec 2019 19:56:12 -0800
Subject: [PATCH 09/17] Fix doc link

---
 docs/ingestion/native-batch.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md
index e55408bd72a9..f1af93590bb4 100644
--- a/docs/ingestion/native-batch.md
+++ b/docs/ingestion/native-batch.md
@@ -264,7 +264,7 @@ The three `partitionsSpec` types have different pros and cons:
 
 > Single-dimension range partitioning currently requires the
 > [druid-datasketches](../development/extensions-core/datasketches-extension.md)
-> extension to be [loaded from the classpath](..development/extension.md#loading-extensions-from-the-classpath).
+> extension to be [loaded from the classpath](../development/extensions.md#loading-extensions-from-the-classpath).
 
 > Because single-range partitioning makes two passes over the input, the index task may fail if the input changes
 > in between the two passes. 
@@ -968,4 +968,4 @@ A spec that applies a filter and reads a subset of the original datasource's col
 }
 ```
 
-This spec above will only return the `page`, `user` dimensions and `added` metric. Only rows where `page` = `Druid` will be returned.
\ No newline at end of file
+This spec above will only return the `page`, `user` dimensions and `added` metric. Only rows where `page` = `Druid` will be returned.

From 15235ea36ca5738cea732e875fdfa45fefb36587 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Fri, 6 Dec 2019 20:57:59 -0800
Subject: [PATCH 10/17] Split batch index integration test

---
 .travis.yml                                                | 7 +++++++
 .../src/test/java/org/apache/druid/tests/TestNGGroup.java  | 1 +
 .../apache/druid/tests/indexer/ITParallelIndexTest.java    | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a0ad65bdb1fe..8c77835f57fc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -290,6 +290,13 @@ jobs:
           docker exec -it druid-$v sh -c 'dmesg | tail -3' ;
           done
 
+    - &integration_parallel_batch_index
+      name: "parallel batch index integration test"
+      services: *integration_test_services
+      env: TESTNG_GROUPS='-Dgroups=parallel-batch-index'
+      script: *run_integration_test
+      after_failure: *integration_test_diags
+
     - &integration_kafka_index
       name: "kafka index integration test"
       services: *integration_test_services
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
index dc37952ff436..10497cf0d0df 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
@@ -29,6 +29,7 @@ public class TestNGGroup
   public static final String HADOOP_INDEX = "hadoop-index";
   public static final String KAFKA_INDEX = "kafka-index";
   public static final String OTHER_INDEX = "other-index";
+  public static final String PARALLEL_BATCH_INDEX = "parallel-batch-index";
   public static final String QUERY = "query";
   public static final String REALTIME_INDEX = "realtime-index";
   public static final String SECURITY = "security";
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
index 77ccb411e1c4..1e2dff2c908d 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
@@ -34,7 +34,7 @@
 import java.io.Closeable;
 import java.util.function.Function;
 
-@Test(groups = TestNGGroup.BATCH_INDEX)
+@Test(groups = TestNGGroup.PARALLEL_BATCH_INDEX)
 @Guice(moduleFactory = DruidTestModuleFactory.class)
 public class ITParallelIndexTest extends AbstractITBatchIndexTest
 {

From 275ad8eed6284411d0f7440800b2fab26eab6518 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Fri, 6 Dec 2019 22:33:12 -0800
Subject: [PATCH 11/17] Do not run parallel-batch-index twice

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8c77835f57fc..0047c1667b1a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -321,6 +321,6 @@ jobs:
     - &integration_tests
       name: "other integration test"
       services: *integration_test_services
-      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,kafka-index,query,realtime-index'
+      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,parallel-batch-index,kafka-index,query,realtime-index'
       script: *run_integration_test
       after_failure: *integration_test_diags

From f40ed699c0a4b684b490ccbc7d77bfd8111548dc Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Sat, 7 Dec 2019 16:22:52 -0800
Subject: [PATCH 12/17] Adjust last partition

---
 .../distribution/PartitionBoundaries.java     |   7 +-
 .../parallel/distribution/StringSketch.java   |  13 ++
 ...itionCachingLocalSegmentAllocatorTest.java |   2 +-
 .../PartialDimensionDistributionTaskTest.java |  13 +-
 ...rtitionMultiPhaseParallelIndexingTest.java |   1 -
 .../distribution/PartitionBoundariesTest.java |   2 +-
 .../distribution/StringSketchMergerTest.java  |   5 +-
 .../distribution/StringSketchTest.java        | 135 +++++++++---------
 8 files changed, 97 insertions(+), 81 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
index dc14ace91c6e..32a0a0ffaf0f 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundaries.java
@@ -51,6 +51,7 @@ public PartitionBoundaries(String... partitions)
       return;
     }
 
+    // Future improvement: Handle skewed partitions better (e.g., many values are repeated).
     List<String> partitionBoundaries = Arrays.stream(partitions)
                                              .distinct()
                                              .collect(Collectors.toCollection(ArrayList::new));
@@ -59,7 +60,11 @@ public PartitionBoundaries(String... partitions)
     partitionBoundaries.set(0, null);
 
     // Last partition ends with null (see StringPartitionChunk.isEnd())
-    partitionBoundaries.add(null);
+    if (partitionBoundaries.size() == 1) {
+      partitionBoundaries.add(null);
+    } else {
+      partitionBoundaries.set(partitionBoundaries.size() - 1, null);
+    }
 
     delegate = Collections.unmodifiableList(partitionBoundaries);
   }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
index 8203ffad04cc..bba16cc46628 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketch.java
@@ -29,6 +29,7 @@
 import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
 import com.fasterxml.jackson.databind.jsontype.TypeSerializer;
 import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.datasketches.ArrayOfStringsSerDe;
 import org.apache.datasketches.memory.Memory;
@@ -105,6 +106,18 @@ public PartitionBoundaries getEvenPartitionsByTargetSize(int targetSize)
     return getEvenPartitionsByCount(evenPartitionCount);
   }
 
+  @VisibleForTesting
+  public String getMin()
+  {
+    return delegate.getMinValue();
+  }
+
+  @VisibleForTesting
+  public String getMax()
+  {
+    return delegate.getMaxValue();
+  }
+
   private PartitionBoundaries getEvenPartitionsByCount(int evenPartitionCount)
   {
     Preconditions.checkArgument(
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
index c2de2200a465..6e91d10066af 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/RangePartitionCachingLocalSegmentAllocatorTest.java
@@ -137,7 +137,7 @@ public void allocatesCorrectShardSpecsForLastPartition()
   {
     Interval interval = INTERVAL_NORMAL;
     InputRow row = createInputRow(interval, PARTITION9);
-    int partitionNum = INTERVAL_TO_PARTITONS.get(interval).size() - 2;  // -2 = -1 0-based + -1 skip null tail
+    int partitionNum = INTERVAL_TO_PARTITONS.get(interval).size() - 2;
     testAllocate(row, interval, partitionNum, null);
   }
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
index 9e622b83e869..5d905f064d71 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTaskTest.java
@@ -35,6 +35,7 @@
 import org.apache.druid.indexing.common.task.IndexTaskClientFactory;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.PartitionBoundaries;
 import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution;
+import org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketch;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.segment.TestHelper;
 import org.apache.druid.segment.indexing.DataSchema;
@@ -358,15 +359,13 @@ public void preservesMinAndMaxWhenAssumeGroupedFalse()
       StringDistribution distribution = Iterables.getOnlyElement(intervalToDistribution.values());
       Assert.assertNotNull(distribution);
       PartitionBoundaries partitions = distribution.getEvenPartitionsByMaxSize(1);
-      Assert.assertEquals(minBloomFilterBits + 3, partitions.size()); // 3 = min + max + exclusive endpoint
+      Assert.assertEquals(minBloomFilterBits + 2, partitions.size()); // 2 = min + max
 
-      // Min
-      Assert.assertNull(partitions.get(0));
-      Assert.assertEquals(dimensionValues.get(1), partitions.get(1));
+      String minDimensionValue = dimensionValues.get(0);
+      Assert.assertEquals(minDimensionValue, ((StringSketch) distribution).getMin());
 
-      // Max
-      Assert.assertNull(partitions.get(partitions.size() - 1));
-      Assert.assertEquals(dimensionValues.get(dimensionValues.size() - 1), partitions.get(partitions.size() - 2));
+      String maxDimensionValue = dimensionValues.get(dimensionValues.size() - 1);
+      Assert.assertEquals(maxDimensionValue, ((StringSketch) distribution).getMax());
     }
 
     @Test
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
index 1f75da389552..94ccf5cb03a1 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/RangePartitionMultiPhaseParallelIndexingTest.java
@@ -218,7 +218,6 @@ private static void assertNumPartition(
       expectedNumPartition -= 1;
     }
     expectedNumPartition *= NUM_DAY;
-    expectedNumPartition += 1;  // max dimension value has its own partition
     Assert.assertEquals(expectedNumPartition, segments.size());
   }
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
index d1b20fde71cf..8f98bb1d59b1 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/PartitionBoundariesTest.java
@@ -38,7 +38,7 @@ public class PartitionBoundariesTest
   public void setup()
   {
     values = new String[]{"a", "dup", "dup", "z"};
-    expected = Arrays.asList(null, "dup", "z", null);
+    expected = Arrays.asList(null, "dup", null);
     target = new PartitionBoundaries(values);
   }
 
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
index 8d0c987d0630..fb363536f6a9 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchMergerTest.java
@@ -71,10 +71,9 @@ public void mergesCorrectly()
     StringDistribution merged = target.getResult();
 
     PartitionBoundaries partitions = merged.getEvenPartitionsByMaxSize(1);
-    Assert.assertEquals(4, partitions.size());
+    Assert.assertEquals(3, partitions.size());
     Assert.assertNull(partitions.get(0));
     Assert.assertEquals(string2, partitions.get(1));
-    Assert.assertEquals(string3, partitions.get(2));
-    Assert.assertNull(partitions.get(3));
+    Assert.assertNull(partitions.get(2));
   }
 }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
index 0e21c137fb75..b09634df3f89 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/distribution/StringSketchTest.java
@@ -54,7 +54,7 @@ public class StringSketchTest
   private static final String MAX_STRING = STRINGS.get(NUM_STRING - 1);
 
   static {
-    ItemsSketch.rand.setSeed(0); // make sketches deterministic for testing
+    ItemsSketch.rand.setSeed(0);  // make sketches deterministic for testing
   }
 
   public static class SerializationDeserializationTest
@@ -155,8 +155,8 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        PartitionBoundaries partitions = sketch.getEvenPartitionsByTargetSize(1);
-        Assert.assertEquals(0, partitions.size());
+        PartitionBoundaries partitionBoundaries = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(0, partitionBoundaries.size());
       }
 
       @Test
@@ -164,17 +164,17 @@ public void handlesSingletonSketch()
       {
         StringSketch sketch = new StringSketch();
         sketch.put(MIN_STRING);
-        PartitionBoundaries partitions = sketch.getEvenPartitionsByTargetSize(1);
-        Assert.assertEquals(2, partitions.size());
-        Assert.assertNull(partitions.get(0));
-        Assert.assertNull(partitions.get(1));
+        PartitionBoundaries partitionBoundaries = sketch.getEvenPartitionsByTargetSize(1);
+        Assert.assertEquals(2, partitionBoundaries.size());
+        Assert.assertNull(partitionBoundaries.get(0));
+        Assert.assertNull(partitionBoundaries.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(1);
-        assertMaxNumberOfPartitions(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByTargetSize(1);
+        assertMaxNumberOfPartitions(partitionBoundaries);
       }
 
       @Test
@@ -186,30 +186,30 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int targetSize)
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(targetSize);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByTargetSize(targetSize);
 
-        assertFirstAndLastPartitionsCorrect(partitions);
+        assertFirstAndLastPartitionsCorrect(partitionBoundaries);
 
-        String partitionsString = PartitionTest.toString(partitions);
-        int expectedHighPartitionCount = (int) Math.ceil((double) NUM_STRING / targetSize);
-        int expectedLowPartitionCount = expectedHighPartitionCount - 1;
+        String partitionBoundariesString = PartitionTest.toString(partitionBoundaries);
+        int expectedHighPartitionBoundaryCount = (int) Math.ceil((double) NUM_STRING / targetSize);
+        int expectedLowPartitionBoundaryCount = expectedHighPartitionBoundaryCount - 1;
         Assert.assertThat(
-            "targetSize=" + targetSize + " " + partitionsString,
-            partitions.size(),
-            Matchers.lessThanOrEqualTo(expectedHighPartitionCount + 2)  // +2 = endpoint + null
+            "targetSize=" + targetSize + " " + partitionBoundariesString,
+            partitionBoundaries.size(),
+            Matchers.lessThanOrEqualTo(expectedHighPartitionBoundaryCount + 1)
         );
         Assert.assertThat(
-            "targetSize=" + targetSize + " " + partitionsString,
-            partitions.size(),
-            Matchers.greaterThanOrEqualTo(expectedLowPartitionCount + 2)   // +2 = endpoint + null
+            "targetSize=" + targetSize + " " + partitionBoundariesString,
+            partitionBoundaries.size(),
+            Matchers.greaterThanOrEqualTo(expectedLowPartitionBoundaryCount + 1)
         );
 
         int previous = 0;
-        for (int i = 1; i < partitions.size() - 1; i++) {
-          int current = Integer.parseInt(partitions.get(i));
+        for (int i = 1; i < partitionBoundaries.size() - 1; i++) {
+          int current = Integer.parseInt(partitionBoundaries.get(i));
           int size = current - previous;
           Assert.assertThat(
-              getErrMsgPrefix(targetSize, i) + partitionsString,
+              getErrMsgPrefix(targetSize, i) + partitionBoundariesString,
               (double) size,
               IsCloseTo.closeTo(targetSize, Math.ceil(DELTA) * 2)
           );
@@ -220,15 +220,15 @@ private static void testHandlesUnevenPartitions(int targetSize)
       @Test
       public void handlesSinglePartition()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
-        assertSinglePartition(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByTargetSize(NUM_STRING);
+        assertSinglePartition(partitionBoundaries);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
-        assertSinglePartition(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByTargetSize(Integer.MAX_VALUE);
+        assertSinglePartition(partitionBoundaries);
       }
     }
 
@@ -250,8 +250,8 @@ public void requiresPositiveSize()
       public void handlesEmptySketch()
       {
         StringSketch sketch = new StringSketch();
-        PartitionBoundaries partitions = sketch.getEvenPartitionsByMaxSize(1);
-        Assert.assertEquals(0, partitions.size());
+        PartitionBoundaries partitionBoundaries = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(0, partitionBoundaries.size());
       }
 
       @Test
@@ -259,17 +259,17 @@ public void handlesSingletonSketch()
       {
         StringSketch sketch = new StringSketch();
         sketch.put(MIN_STRING);
-        PartitionBoundaries partitions = sketch.getEvenPartitionsByMaxSize(1);
-        Assert.assertEquals(2, partitions.size());
-        Assert.assertNull(partitions.get(0));
-        Assert.assertNull(partitions.get(1));
+        PartitionBoundaries partitionBoundaries = sketch.getEvenPartitionsByMaxSize(1);
+        Assert.assertEquals(2, partitionBoundaries.size());
+        Assert.assertNull(partitionBoundaries.get(0));
+        Assert.assertNull(partitionBoundaries.get(1));
       }
 
       @Test
       public void handlesMinimimumSize()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(1);
-        assertMaxNumberOfPartitions(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByMaxSize(1);
+        assertMaxNumberOfPartitions(partitionBoundaries);
       }
 
       @Test
@@ -281,31 +281,31 @@ public void handlesUnevenPartitions()
 
       private static void testHandlesUnevenPartitions(int maxSize)
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(maxSize);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByMaxSize(maxSize);
 
-        assertFirstAndLastPartitionsCorrect(partitions);
+        assertFirstAndLastPartitionsCorrect(partitionBoundaries);
 
-        String partitionsString = PartitionTest.toString(partitions);
+        String partitionBoundariesString = PartitionTest.toString(partitionBoundaries);
         long expectedPartitionCount = (long) Math.ceil((double) NUM_STRING / maxSize);
         Assert.assertEquals(
-            "maxSize=" + maxSize + " " + partitionsString,
-            expectedPartitionCount + 2,  // +2 = endpoint + null
-            partitions.size()
+            "maxSize=" + maxSize + " " + partitionBoundariesString,
+            expectedPartitionCount + 1,
+            partitionBoundaries.size()
         );
 
         double minSize = (double) NUM_STRING / expectedPartitionCount - DELTA;
 
         int previous = 0;
-        for (int i = 1; i < partitions.size() - 1; i++) {
-          int current = Integer.parseInt(partitions.get(i));
+        for (int i = 1; i < partitionBoundaries.size() - 1; i++) {
+          int current = Integer.parseInt(partitionBoundaries.get(i));
           int size = current - previous;
           Assert.assertThat(
-              getErrMsgPrefix(maxSize, i) + partitionsString,
+              getErrMsgPrefix(maxSize, i) + partitionBoundariesString,
               size,
               Matchers.lessThanOrEqualTo(maxSize)
           );
           Assert.assertThat(
-              getErrMsgPrefix(maxSize, i) + partitionsString,
+              getErrMsgPrefix(maxSize, i) + partitionBoundariesString,
               (double) size,
               Matchers.greaterThanOrEqualTo(minSize)
           );
@@ -316,30 +316,32 @@ private static void testHandlesUnevenPartitions(int maxSize)
       @Test
       public void handlesSinglePartition()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize((int) Math.ceil(NUM_STRING + DELTA));
-        assertSinglePartition(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByMaxSize(
+            (int) Math.ceil(NUM_STRING + DELTA)
+        );
+        assertSinglePartition(partitionBoundaries);
       }
 
       @Test
       public void handlesOversizedPartition()
       {
-        PartitionBoundaries partitions = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
-        assertSinglePartition(partitions);
+        PartitionBoundaries partitionBoundaries = SKETCH.getEvenPartitionsByMaxSize(Integer.MAX_VALUE);
+        assertSinglePartition(partitionBoundaries);
       }
     }
 
-    private static void assertMaxNumberOfPartitions(PartitionBoundaries partitions)
+    private static void assertMaxNumberOfPartitions(PartitionBoundaries partitionBoundaries)
     {
-      String partitionsString = toString(partitions);
+      String partitionBoundariesString = toString(partitionBoundaries);
 
-      Assert.assertEquals(partitionsString, StringSketch.SKETCH_K + 2, partitions.size());  // +2 = endpoint + null
-      assertFirstAndLastPartitionsCorrect(partitions);
+      Assert.assertEquals(partitionBoundariesString, StringSketch.SKETCH_K + 1, partitionBoundaries.size());
+      assertFirstAndLastPartitionsCorrect(partitionBoundaries);
 
       int previous = 0;
-      for (int i = 1; i < partitions.size() - 1; i++) {
-        int current = Integer.parseInt(partitions.get(i));
+      for (int i = 1; i < partitionBoundaries.size() - 1; i++) {
+        int current = Integer.parseInt(partitionBoundaries.get(i));
         Assert.assertEquals(
-            getErrMsgPrefix(1, i) + partitionsString,
+            getErrMsgPrefix(1, i) + partitionBoundariesString,
             1,
             current - previous,
             FACTOR
@@ -348,17 +350,16 @@ private static void assertMaxNumberOfPartitions(PartitionBoundaries partitions)
       }
     }
 
-    private static void assertSinglePartition(PartitionBoundaries partitions)
+    private static void assertSinglePartition(PartitionBoundaries partitionBoundaries)
     {
-      Assert.assertEquals(3, partitions.size());  // +2 = endpoint + null
-      assertFirstAndLastPartitionsCorrect(partitions);
+      Assert.assertEquals(2, partitionBoundaries.size());
+      assertFirstAndLastPartitionsCorrect(partitionBoundaries);
     }
 
-    private static void assertFirstAndLastPartitionsCorrect(PartitionBoundaries partitions)
+    private static void assertFirstAndLastPartitionsCorrect(PartitionBoundaries partitionBoundaries)
     {
-      Assert.assertNull(partitions.get(0));
-      Assert.assertEquals(MAX_STRING, partitions.get(partitions.size() - 2));
-      Assert.assertNull(partitions.get(partitions.size() - 1));
+      Assert.assertNull(partitionBoundaries.get(0));
+      Assert.assertNull(partitionBoundaries.get(partitionBoundaries.size() - 1));
     }
 
     private static String getErrMsgPrefix(int size, int i)
@@ -366,12 +367,12 @@ private static String getErrMsgPrefix(int size, int i)
       return "size=" + size + " i=" + i + " of ";
     }
 
-    private static String toString(PartitionBoundaries partitions)
+    private static String toString(PartitionBoundaries partitionBoundaries)
     {
-      String prefix = "partitions[" + partitions.size() + "]=";
+      String prefix = "partitionBoundaries[" + partitionBoundaries.size() + "]=";
       StringJoiner sj = new StringJoiner(" ", prefix, "]");
-      for (int i = 0; i < partitions.size(); i++) {
-        sj.add("[" + i + "]=" + partitions.get(i));
+      for (int i = 0; i < partitionBoundaries.size(); i++) {
+        sj.add("[" + i + "]=" + partitionBoundaries.get(i));
       }
       return sj.toString();
     }

From 1753d64f8c790aaece73d32ad539748724acbc20 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Sat, 7 Dec 2019 16:55:03 -0800
Subject: [PATCH 13/17] Split ITParallelIndexTest to reduce runtime

---
 .travis.yml                                   |   8 +-
 .../org/apache/druid/tests/TestNGGroup.java   |   2 +-
 .../indexer/AbstractITBatchIndexTest.java     |   2 +-
 ...> ITImperfectRollupParallelIndexTest.java} |  89 ++++++--------
 .../ITPerfectRollupParallelIndexTest.java     | 113 ++++++++++++++++++
 5 files changed, 153 insertions(+), 61 deletions(-)
 rename integration-tests/src/test/java/org/apache/druid/tests/indexer/{ITParallelIndexTest.java => ITImperfectRollupParallelIndexTest.java} (59%)
 create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java

diff --git a/.travis.yml b/.travis.yml
index 0047c1667b1a..0dc60f2af3e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -290,10 +290,10 @@ jobs:
           docker exec -it druid-$v sh -c 'dmesg | tail -3' ;
           done
 
-    - &integration_parallel_batch_index
-      name: "parallel batch index integration test"
+    - &integration_perfect_rollup_parallel_batch_index
+      name: "perfect rollup parallel batch index integration test"
       services: *integration_test_services
-      env: TESTNG_GROUPS='-Dgroups=parallel-batch-index'
+      env: TESTNG_GROUPS='-Dgroups=perfect-rollup-parallel-batch-index'
       script: *run_integration_test
       after_failure: *integration_test_diags
 
@@ -321,6 +321,6 @@ jobs:
     - &integration_tests
       name: "other integration test"
       services: *integration_test_services
-      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,parallel-batch-index,kafka-index,query,realtime-index'
+      env: TESTNG_GROUPS='-DexcludedGroups=batch-index,perfect-rollup-parallel-batch-index,kafka-index,query,realtime-index'
       script: *run_integration_test
       after_failure: *integration_test_diags
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
index 10497cf0d0df..ad8a1454a6d0 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/TestNGGroup.java
@@ -29,7 +29,7 @@ public class TestNGGroup
   public static final String HADOOP_INDEX = "hadoop-index";
   public static final String KAFKA_INDEX = "kafka-index";
   public static final String OTHER_INDEX = "other-index";
-  public static final String PARALLEL_BATCH_INDEX = "parallel-batch-index";
+  public static final String PERFECT_ROLLUP_PARALLEL_BATCH_INDEX = "perfect-rollup-parallel-batch-index";
   public static final String QUERY = "query";
   public static final String REALTIME_INDEX = "realtime-index";
   public static final String SECURITY = "security";
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
index 430e3bfccb45..9fc01a7451d4 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/AbstractITBatchIndexTest.java
@@ -226,7 +226,7 @@ private void submitTaskAndWait(String taskSpec, String dataSourceName, boolean w
       );
     }
 
-    // ITParallelIndexTest does a second round of ingestion to replace segements in an existing
+    // IT*ParallelIndexTest do a second round of ingestion to replace segements in an existing
     // data source. For that second round we need to make sure the coordinator actually learned
     // about the new segments befor waiting for it to report that all segments are loaded; otherwise
     // this method could return too early because the coordinator is merely reporting that all the
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java
similarity index 59%
rename from integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
rename to integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java
index 1e2dff2c908d..853eba11ed29 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java
@@ -21,12 +21,11 @@
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import org.apache.druid.indexer.partitions.DynamicPartitionsSpec;
-import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
 import org.apache.druid.indexer.partitions.PartitionsSpec;
-import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.testing.guice.DruidTestModuleFactory;
 import org.apache.druid.tests.TestNGGroup;
+import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Guice;
 import org.testng.annotations.Test;
@@ -34,9 +33,9 @@
 import java.io.Closeable;
 import java.util.function.Function;
 
-@Test(groups = TestNGGroup.PARALLEL_BATCH_INDEX)
+@Test(groups = TestNGGroup.BATCH_INDEX)
 @Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITParallelIndexTest extends AbstractITBatchIndexTest
+public class ITImperfectRollupParallelIndexTest extends AbstractITBatchIndexTest
 {
   private static final String INDEX_TASK = "/indexer/wikipedia_parallel_index_task.json";
   private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_parallel_index_queries.json";
@@ -52,27 +51,27 @@ public class ITParallelIndexTest extends AbstractITBatchIndexTest
   public static Object[][] resources()
   {
     return new Object[][]{
-        {new DynamicPartitionsSpec(null, null)},
-        {new HashedPartitionsSpec(null, 2, null)},
-        {new SingleDimensionPartitionsSpec(2, null, "namespace", false)},
+        {new DynamicPartitionsSpec(null, null)}
     };
   }
 
   @Test(dataProvider = "resources")
   public void testIndexData(PartitionsSpec partitionsSpec) throws Exception
   {
-    try (final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
-         final Closeable ignored2 = unloader(INDEX_INGEST_SEGMENT_DATASOURCE + config.getExtraDatasourceNameSuffix());
-         final Closeable ignored3 = unloader(INDEX_DRUID_INPUT_SOURCE_DATASOURCE + config.getExtraDatasourceNameSuffix())
+    try (
+        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored2 = unloader(INDEX_INGEST_SEGMENT_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored3 = unloader(INDEX_DRUID_INPUT_SOURCE_DATASOURCE + config.getExtraDatasourceNameSuffix())
     ) {
       boolean forceGuaranteedRollup = partitionsSpec.isForceGuaranteedRollupCompatible();
+      Assert.assertFalse(forceGuaranteedRollup, "parititionSpec does not support best-effort rollup");
 
       final Function<String, String> rollupTransform = spec -> {
         try {
           spec = StringUtils.replace(
               spec,
               "%%FORCE_GUARANTEED_ROLLUP%%",
-              Boolean.toString(forceGuaranteedRollup)
+              Boolean.toString(false)
           );
           return StringUtils.replace(
               spec,
@@ -93,52 +92,32 @@ public void testIndexData(PartitionsSpec partitionsSpec) throws Exception
           false
       );
 
-      // Missing intervals is not supported yet if forceGuaranteedRollup = true
-      if (!forceGuaranteedRollup) {
-        // Index again, this time only choosing the second data file, and without explicit intervals chosen.
-        // The second datafile covers both day segments, so this should replace them, as reflected in the queries.
-        doIndexTest(
-            INDEX_DATASOURCE,
-            REINDEX_TASK,
-            rollupTransform,
-            REINDEX_QUERIES_RESOURCE,
-            true
-        );
-
-        doReindexTest(
-            INDEX_DATASOURCE,
-            INDEX_INGEST_SEGMENT_DATASOURCE,
-            rollupTransform,
-            INDEX_INGEST_SEGMENT_TASK,
-            REINDEX_QUERIES_RESOURCE
-        );
+      // Index again, this time only choosing the second data file, and without explicit intervals chosen.
+      // The second datafile covers both day segments, so this should replace them, as reflected in the queries.
+      doIndexTest(
+          INDEX_DATASOURCE,
+          REINDEX_TASK,
+          rollupTransform,
+          REINDEX_QUERIES_RESOURCE,
+          true
+      );
 
-        // with DruidInputSource instead of IngestSegmentFirehose
-        doReindexTest(
-            INDEX_DATASOURCE,
-            INDEX_DRUID_INPUT_SOURCE_DATASOURCE,
-            rollupTransform,
-            INDEX_DRUID_INPUT_SOURCE_TASK,
-            REINDEX_QUERIES_RESOURCE
-        );
-      } else {
-        doReindexTest(
-            INDEX_DATASOURCE,
-            INDEX_INGEST_SEGMENT_DATASOURCE,
-            rollupTransform,
-            INDEX_INGEST_SEGMENT_TASK,
-            INDEX_QUERIES_RESOURCE
-        );
+      doReindexTest(
+          INDEX_DATASOURCE,
+          INDEX_INGEST_SEGMENT_DATASOURCE,
+          rollupTransform,
+          INDEX_INGEST_SEGMENT_TASK,
+          REINDEX_QUERIES_RESOURCE
+      );
 
-        // with DruidInputSource instead of IngestSegmentFirehose
-        doReindexTest(
-            INDEX_DATASOURCE,
-            INDEX_DRUID_INPUT_SOURCE_DATASOURCE,
-            rollupTransform,
-            INDEX_DRUID_INPUT_SOURCE_TASK,
-            INDEX_QUERIES_RESOURCE
-        );
-      }
+      // with DruidInputSource instead of IngestSegmentFirehose
+      doReindexTest(
+          INDEX_DATASOURCE,
+          INDEX_DRUID_INPUT_SOURCE_DATASOURCE,
+          rollupTransform,
+          INDEX_DRUID_INPUT_SOURCE_TASK,
+          REINDEX_QUERIES_RESOURCE
+      );
     }
   }
 }
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java
new file mode 100644
index 000000000000..03442032de03
--- /dev/null
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITPerfectRollupParallelIndexTest.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.tests.indexer;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import org.apache.druid.indexer.partitions.HashedPartitionsSpec;
+import org.apache.druid.indexer.partitions.PartitionsSpec;
+import org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.testing.guice.DruidTestModuleFactory;
+import org.apache.druid.tests.TestNGGroup;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.io.Closeable;
+import java.util.function.Function;
+
+@Test(groups = TestNGGroup.PERFECT_ROLLUP_PARALLEL_BATCH_INDEX)
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITPerfectRollupParallelIndexTest extends AbstractITBatchIndexTest
+{
+  private static final String INDEX_TASK = "/indexer/wikipedia_parallel_index_task.json";
+  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_parallel_index_queries.json";
+  private static final String INDEX_DATASOURCE = "wikipedia_parallel_index_test";
+  private static final String INDEX_INGEST_SEGMENT_DATASOURCE = "wikipedia_parallel_ingest_segment_index_test";
+  private static final String INDEX_INGEST_SEGMENT_TASK = "/indexer/wikipedia_parallel_ingest_segment_index_task.json";
+  private static final String INDEX_DRUID_INPUT_SOURCE_DATASOURCE = "wikipedia_parallel_druid_input_source_index_test";
+  private static final String INDEX_DRUID_INPUT_SOURCE_TASK = "/indexer/wikipedia_parallel_druid_input_source_index_task.json";
+
+  @DataProvider
+  public static Object[][] resources()
+  {
+    return new Object[][]{
+        {new HashedPartitionsSpec(null, 2, null)},
+        {new SingleDimensionPartitionsSpec(2, null, "namespace", false)}
+    };
+  }
+
+  @Test(dataProvider = "resources")
+  public void testIndexData(PartitionsSpec partitionsSpec) throws Exception
+  {
+    try (
+        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored2 = unloader(INDEX_INGEST_SEGMENT_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored3 = unloader(INDEX_DRUID_INPUT_SOURCE_DATASOURCE + config.getExtraDatasourceNameSuffix())
+    ) {
+      boolean forceGuaranteedRollup = partitionsSpec.isForceGuaranteedRollupCompatible();
+      Assert.assertTrue(forceGuaranteedRollup, "parititionSpec does not support perfect rollup");
+
+      final Function<String, String> rollupTransform = spec -> {
+        try {
+          spec = StringUtils.replace(
+              spec,
+              "%%FORCE_GUARANTEED_ROLLUP%%",
+              Boolean.toString(true)
+          );
+          return StringUtils.replace(
+              spec,
+              "%%PARTITIONS_SPEC%%",
+              jsonMapper.writeValueAsString(partitionsSpec)
+          );
+        }
+        catch (JsonProcessingException e) {
+          throw new RuntimeException(e);
+        }
+      };
+
+      doIndexTest(
+          INDEX_DATASOURCE,
+          INDEX_TASK,
+          rollupTransform,
+          INDEX_QUERIES_RESOURCE,
+          false
+      );
+
+      doReindexTest(
+          INDEX_DATASOURCE,
+          INDEX_INGEST_SEGMENT_DATASOURCE,
+          rollupTransform,
+          INDEX_INGEST_SEGMENT_TASK,
+          INDEX_QUERIES_RESOURCE
+      );
+
+      // with DruidInputSource instead of IngestSegmentFirehose
+      doReindexTest(
+          INDEX_DATASOURCE,
+          INDEX_DRUID_INPUT_SOURCE_DATASOURCE,
+          rollupTransform,
+          INDEX_DRUID_INPUT_SOURCE_TASK,
+          INDEX_QUERIES_RESOURCE
+      );
+    }
+  }
+}

From 96215101c09484b5e504fd0f7c50b9eb9799d69a Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Mon, 9 Dec 2019 10:08:39 -0800
Subject: [PATCH 14/17] Rename test class

---
 ...lIndexTest.java => ITBestEffortRollupParallelIndexTest.java} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename integration-tests/src/test/java/org/apache/druid/tests/indexer/{ITImperfectRollupParallelIndexTest.java => ITBestEffortRollupParallelIndexTest.java} (98%)

diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
similarity index 98%
rename from integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java
rename to integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
index 853eba11ed29..0c975b208e65 100644
--- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITImperfectRollupParallelIndexTest.java
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITBestEffortRollupParallelIndexTest.java
@@ -35,7 +35,7 @@
 
 @Test(groups = TestNGGroup.BATCH_INDEX)
 @Guice(moduleFactory = DruidTestModuleFactory.class)
-public class ITImperfectRollupParallelIndexTest extends AbstractITBatchIndexTest
+public class ITBestEffortRollupParallelIndexTest extends AbstractITBatchIndexTest
 {
   private static final String INDEX_TASK = "/indexer/wikipedia_parallel_index_task.json";
   private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_parallel_index_queries.json";

From 8d714bdf7d6a08b3d5a1a607b313184ae8f25e44 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Mon, 9 Dec 2019 16:24:19 -0800
Subject: [PATCH 15/17] Allow null values in range partitions

---
 .../PartialDimensionDistributionTask.java     |   5 +-
 .../PartialRangeSegmentGenerateTask.java      |   3 +-
 ...itionIndexTaskInputRowIteratorBuilder.java |  24 +++-
 ...InputRowIteratorBuilderTestingFactory.java |  11 +-
 ...titionTaskInputRowIteratorBuilderTest.java | 110 +++++++++++++++---
 5 files changed, 127 insertions(+), 26 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
index ca4c1838b939..60c2d185162c 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialDimensionDistributionTask.java
@@ -75,6 +75,9 @@ public class PartialDimensionDistributionTask extends PerfectRollupWorkerTask
   public static final String TYPE = "partial_dimension_distribution";
   private static final Logger LOG = new Logger(PartialDimensionDistributionTask.class);
 
+  // Future work: StringDistribution does not handle inserting NULLs. This is the same behavior as hadoop indexing.
+  private static final boolean SKIP_NULL = true;
+
   private final int numAttempts;
   private final ParallelIndexIngestionSpec ingestionSchema;
   private final String supervisorTaskId;
@@ -220,7 +223,7 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception
 
     try (
         CloseableIterator<InputRow> inputRowIterator = inputSourceReader.read();
-        HandlingInputRowIterator iterator = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimension)
+        HandlingInputRowIterator iterator = new RangePartitionIndexTaskInputRowIteratorBuilder(partitionDimension, SKIP_NULL)
             .delegate(inputRowIterator)
             .granularitySpec(granularitySpec)
             .nullRowRunnable(IndexTaskInputRowIteratorBuilder.NOOP_RUNNABLE)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
index 3cecf67c8690..b52b26b410fc 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/PartialRangeSegmentGenerateTask.java
@@ -54,6 +54,7 @@ public class PartialRangeSegmentGenerateTask extends PartialSegmentGenerateTask<
 {
   public static final String TYPE = "partial_range_index_generate";
   private static final String PROP_SPEC = "spec";
+  private static final boolean SKIP_NULL = true;
 
   private final String supervisorTaskId;
   private final int numAttempts;
@@ -86,7 +87,7 @@ public PartialRangeSegmentGenerateTask(
         indexingServiceClient,
         taskClientFactory,
         appenderatorsManager,
-        new RangePartitionIndexTaskInputRowIteratorBuilder(getPartitionDimension(ingestionSchema))
+        new RangePartitionIndexTaskInputRowIteratorBuilder(getPartitionDimension(ingestionSchema), !SKIP_NULL)
     );
 
     this.numAttempts = numAttempts;
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
index b2884b99d439..4373af494e01 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionIndexTaskInputRowIteratorBuilder.java
@@ -54,10 +54,19 @@ public class RangePartitionIndexTaskInputRowIteratorBuilder implements IndexTask
 {
   private final DefaultIndexTaskInputRowIteratorBuilder delegate;
 
-  public RangePartitionIndexTaskInputRowIteratorBuilder(String partitionDimension)
+  /**
+   * @param partitionDimension Create range partitions for this dimension
+   * @param skipNull Whether to skip rows with a dimension value of null
+   */
+  public RangePartitionIndexTaskInputRowIteratorBuilder(String partitionDimension, boolean skipNull)
   {
     delegate = new DefaultIndexTaskInputRowIteratorBuilder();
-    delegate.appendInputRowHandler(createOnlySingleDimensionValueRowsHandler(partitionDimension));
+
+    if (skipNull) {
+      delegate.appendInputRowHandler(createOnlySingleDimensionValueRowsHandler(partitionDimension));
+    } else {
+      delegate.appendInputRowHandler(createOnlySingleOrNullDimensionValueRowsHandler(partitionDimension));
+    }
   }
 
   @Override
@@ -99,4 +108,15 @@ private static HandlingInputRowIterator.InputRowHandler createOnlySingleDimensio
       return dimensionValues.size() != 1;
     };
   }
+
+  private static HandlingInputRowIterator.InputRowHandler createOnlySingleOrNullDimensionValueRowsHandler(
+      String partitionDimension
+  )
+  {
+    return inputRow -> {
+      List<String> dimensionValues = inputRow.getDimension(partitionDimension);
+      return dimensionValues.size() > 1;  // Rows.objectToStrings() returns an empty list for a single null value
+    };
+  }
+
 }
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
index 628a5b008e3c..39300acd1a9f 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/IndexTaskInputRowIteratorBuilderTestingFactory.java
@@ -22,6 +22,7 @@
 import com.google.common.base.Optional;
 import org.apache.druid.data.input.HandlingInputRowIterator;
 import org.apache.druid.data.input.InputRow;
+import org.apache.druid.data.input.MapBasedInputRow;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
@@ -52,11 +53,11 @@ static InputRow createInputRow(DateTime timestamp)
 
   static InputRow createInputRow(DateTime timestamp, List<String> dimensionValues)
   {
-    InputRow inputRow = EasyMock.mock(InputRow.class);
-    EasyMock.expect(inputRow.getTimestamp()).andStubReturn(timestamp);
-    EasyMock.expect(inputRow.getDimension(DIMENSION)).andStubReturn(dimensionValues);
-    EasyMock.replay(inputRow);
-    return inputRow;
+    return new MapBasedInputRow(
+        timestamp,
+        dimensionValues,
+        Collections.singletonMap(DIMENSION, dimensionValues)
+    );
   }
 
   static CloseableIterator<InputRow> createInputRowIterator(InputRow inputRow)
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
index 6093d0d0eb46..45a028a5e96d 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
@@ -32,8 +32,14 @@
 
 public class RangePartitionTaskInputRowIteratorBuilderTest
 {
+  private static final boolean SKIP_NULL = true;
   private static final IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester HANDLER_TESTER =
-      IndexTaskInputRowIteratorBuilderTestingFactory.createHandlerTester(() -> new RangePartitionIndexTaskInputRowIteratorBuilder(IndexTaskInputRowIteratorBuilderTestingFactory.DIMENSION));
+      IndexTaskInputRowIteratorBuilderTestingFactory.createHandlerTester(
+          () -> new RangePartitionIndexTaskInputRowIteratorBuilder(
+              IndexTaskInputRowIteratorBuilderTestingFactory.DIMENSION,
+              SKIP_NULL
+          )
+      );
   private static final InputRow NO_NEXT_INPUT_ROW = null;
 
   @Test
@@ -41,15 +47,24 @@ public void invokesDimensionValueCountFilterLast()
   {
     DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
     List<String> multipleDimensionValues = Arrays.asList("multiple", "dimension", "values");
-    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, multipleDimensionValues);
-    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(inputRow);
-    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(timestamp, IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT);
-
-    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory = HANDLER_TESTER.invokeHandlers(
-        inputRowIterator,
-        granularitySpec,
-        NO_NEXT_INPUT_ROW
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(
+        timestamp,
+        multipleDimensionValues
     );
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(
+        inputRow
+    );
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(
+        timestamp,
+        IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT
+    );
+
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory =
+        HANDLER_TESTER.invokeHandlers(
+            inputRowIterator,
+            granularitySpec,
+            NO_NEXT_INPUT_ROW
+        );
 
     Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
   }
@@ -58,16 +73,77 @@ public void invokesDimensionValueCountFilterLast()
   public void doesNotInvokeHandlersIfRowValid()
   {
     DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
-    List<String> singleDimensionValue = Collections.singletonList("single-dimension-value");
-    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, singleDimensionValue);
-    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(inputRow);
-    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(timestamp, IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT);
-
-    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory = HANDLER_TESTER.invokeHandlers(
-        inputRowIterator,
-        granularitySpec,
+    List<String> nullDimensionValue = Collections.singletonList(null);
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, nullDimensionValue);
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(
+        inputRow
+    );
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(
+        timestamp,
+        IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT
+    );
+
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory =
+        HANDLER_TESTER.invokeHandlers(
+            inputRowIterator,
+            granularitySpec,
+            inputRow
+        );
+
+    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+  }
+
+  @Test
+  public void invokesHandlerIfRowInvalidNull()
+  {
+    DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
+    List<String> nullDimensionValue = null;
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, nullDimensionValue);
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(
+        inputRow
+    );
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(
+        timestamp,
+        IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT
+    );
+
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory =
+        HANDLER_TESTER.invokeHandlers(
+            inputRowIterator,
+            granularitySpec,
+            NO_NEXT_INPUT_ROW
+        );
+
+    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+  }
+
+  @Test
+  public void doesNotInvokeHandlersIfRowValidNull()
+  {
+    DateTime timestamp = IndexTaskInputRowIteratorBuilderTestingFactory.TIMESTAMP;
+    List<String> nullDimensionValue = null;
+    InputRow inputRow = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRow(timestamp, nullDimensionValue);
+    CloseableIterator<InputRow> inputRowIterator = IndexTaskInputRowIteratorBuilderTestingFactory.createInputRowIterator(
         inputRow
     );
+    GranularitySpec granularitySpec = IndexTaskInputRowIteratorBuilderTestingFactory.createGranularitySpec(
+        timestamp,
+        IndexTaskInputRowIteratorBuilderTestingFactory.PRESENT_BUCKET_INTERVAL_OPT
+    );
+
+    IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester handlerTester =
+        IndexTaskInputRowIteratorBuilderTestingFactory.createHandlerTester(
+            () -> new RangePartitionIndexTaskInputRowIteratorBuilder(
+                IndexTaskInputRowIteratorBuilderTestingFactory.DIMENSION,
+                !SKIP_NULL
+            )
+        );
+    List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory =
+        handlerTester.invokeHandlers(
+            inputRowIterator,
+            granularitySpec,
+            inputRow
+        );
 
     Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
   }

From e423be5456a126a88556cf5188524ad96c5e02cf Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Mon, 9 Dec 2019 17:08:38 -0800
Subject: [PATCH 16/17] Indicate which phase failed

---
 .../task/batch/parallel/ParallelIndexSupervisorTask.java      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
index 444ad6e3cdd6..db31af67d91a 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/batch/parallel/ParallelIndexSupervisorTask.java
@@ -606,7 +606,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
 
     TaskState distributionState = runNextPhase(distributionRunner);
     if (distributionState.isFailure()) {
-      return TaskStatus.failure(getId());
+      return TaskStatus.failure(getId(), PartialDimensionDistributionTask.TYPE + " failed");
     }
 
     Map<Interval, PartitionBoundaries> intervalToPartitions =
@@ -624,7 +624,7 @@ private TaskStatus runRangePartitionMultiPhaseParallel(TaskToolbox toolbox) thro
 
     TaskState indexingState = runNextPhase(indexingRunner);
     if (indexingState.isFailure()) {
-      return TaskStatus.failure(getId());
+      return TaskStatus.failure(getId(), PartialRangeSegmentGenerateTask.TYPE + " failed");
     }
 
     // partition (interval, partitionId) -> partition locations

From 76221343a7ba032431ff333948d65388af6828b1 Mon Sep 17 00:00:00 2001
From: Chi Cao Minh <chi.caominh@imply.io>
Date: Mon, 9 Dec 2019 17:49:50 -0800
Subject: [PATCH 17/17] Improve asserts in tests

---
 ...titionTaskInputRowIteratorBuilderTest.java | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
index 45a028a5e96d..719535c42b43 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/batch/parallel/iterator/RangePartitionTaskInputRowIteratorBuilderTest.java
@@ -22,6 +22,7 @@
 import org.apache.druid.data.input.InputRow;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
 import org.apache.druid.segment.indexing.granularity.GranularitySpec;
+import org.hamcrest.Matchers;
 import org.joda.time.DateTime;
 import org.junit.Assert;
 import org.junit.Test;
@@ -66,7 +67,14 @@ public void invokesDimensionValueCountFilterLast()
             NO_NEXT_INPUT_ROW
         );
 
-    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+    assertNotInHandlerInvocationHistory(
+        handlerInvocationHistory,
+        IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler.NULL_ROW
+    );
+    assertNotInHandlerInvocationHistory(
+        handlerInvocationHistory,
+        IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler.ABSENT_BUCKET_INTERVAL
+    );
   }
 
   @Test
@@ -114,7 +122,14 @@ public void invokesHandlerIfRowInvalidNull()
             NO_NEXT_INPUT_ROW
         );
 
-    Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
+    assertNotInHandlerInvocationHistory(
+        handlerInvocationHistory,
+        IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler.NULL_ROW
+    );
+    assertNotInHandlerInvocationHistory(
+        handlerInvocationHistory,
+        IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler.ABSENT_BUCKET_INTERVAL
+    );
   }
 
   @Test
@@ -147,4 +162,12 @@ public void doesNotInvokeHandlersIfRowValidNull()
 
     Assert.assertEquals(Collections.emptyList(), handlerInvocationHistory);
   }
+
+  private static void assertNotInHandlerInvocationHistory(
+      List<IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler> handlerInvocationHistory,
+      IndexTaskInputRowIteratorBuilderTestingFactory.HandlerTester.Handler handler
+  )
+  {
+    Assert.assertThat(handlerInvocationHistory, Matchers.not(Matchers.contains(handler)));
+  }
 }