diff --git a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap
index f3b66e810932..3a0a83841f63 100644
--- a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap
+++ b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap
@@ -1,79 +1,660 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
-exports[`compaction dialog matches snapshot 1`] = `
+exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partitionsSpec) 1`] = `
- The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. -
, - "name": "skipOffsetFromLatest", - "type": "string", - }, - Object { - "defaultValue": 5000000, - "info":- Determines how many rows are in each segment. -
, - "name": "maxRowsPerSegment", - "type": "number", - }, +
+ For perfect rollup, you should use either
+
+ The dimensions to partition on. Leave blank to select all dimensions. +
, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":+ The dimension to partition on. +
, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +
, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":+ Maximum number of rows to include in a partition. +
, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +
, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info":
-
+ For perfect rollup, you should use either
+
+ The dimensions to partition on. Leave blank to select all dimensions. +
, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":+ The dimension to partition on. +
, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +
, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":+ Maximum number of rows to include in a partition. +
, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +
, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info":- Priority of the compaction task. -
, - "name": "taskPriority", - "type": "number", - }, + "dataSource": "test1", + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "hashed", + }, + }, + } + } + onChange={[Function]} + /> +
+ For perfect rollup, you should use either
+
+ The dimensions to partition on. Leave blank to select all dimensions. +
, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":+ The dimension to partition on. +
, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +
, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":+ Maximum number of rows to include in a partition. +
, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +
, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info":
-
+ For perfect rollup, you should use either
+
+ The dimensions to partition on. Leave blank to select all dimensions. +
, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":+ The dimension to partition on. +
, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +
, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":+ Maximum number of rows to include in a partition. +
, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +
, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must - be processed in its entirety, if the segments for a particular time chunk have a total size - in bytes greater than this parameter, compaction will not run for that time chunk. Because - each compaction task runs with a single thread, setting this value too far above 1–2GB will - result in compaction tasks taking an excessive amount of time. + The offset for searching segments to be compacted. Strongly recommended to set for realtime + dataSources.
), }, { - name: 'skipOffsetFromLatest', + name: 'tuningConfig.partitionsSpec.type', + label: 'Partitioning type', type: 'string', - defaultValue: 'P1D', + suggestions: ['dynamic', 'hashed', 'single_dim'], info: (
- The offset for searching segments to be compacted. Strongly recommended to set for realtime
- dataSources.
+ For perfect rollup, you should use either hashed (partitioning based on the
+ hash of dimensions in each row) or single_dim (based on ranges of a single
+ dimension). For best-effort rollup, you should use dynamic.
Determines how many rows are in each segment.
, + defaultValue: 5000000, + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', + info: <>Determines how many rows are in each segment.>, }, { - name: 'taskContext', - type: 'json', + name: 'tuningConfig.partitionsSpec.maxTotalRows', + label: 'Max total rows', + type: 'number', + defaultValue: 20000000, + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', + info: <>Total number of rows in segments waiting for being pushed.>, + }, + // partitionsSpec type: hashed + { + name: 'tuningConfig.partitionsSpec.numShards', + label: 'Num shards', + type: 'number', + required: true, // ToDo: this will no longer be required after https://github.com/apache/druid/pull/10419 is merged + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed', + info: ( + <> + Directly specify the number of shards to create. If this is specified and 'intervals' is + specified in the granularitySpec, the index task can skip the determine intervals/partitions + pass through the data. + > + ), + }, + { + name: 'tuningConfig.partitionsSpec.partitionDimensions', + label: 'Partition dimensions', + type: 'string-array', + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed', + info:The dimensions to partition on. Leave blank to select all dimensions.
, + }, + // partitionsSpec type: single_dim + { + name: 'tuningConfig.partitionsSpec.partitionDimension', + label: 'Partition dimension', + type: 'string', + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim', + required: true, + info:The dimension to partition on.
, + }, + { + name: 'tuningConfig.partitionsSpec.targetRowsPerSegment', + label: 'Target rows per segment', + type: 'number', + zeroMeansUndefined: true, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + required: (t: CompactionConfig) => + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), info: (
-
Priority of the compaction task.
, + zeroMeansUndefined: true, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment'), + required: (t: CompactionConfig) => + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + info:Maximum number of rows to include in a partition.
, }, { - name: 'tuningConfig', - type: 'json', + name: 'tuningConfig.partitionsSpec.assumeGrouped', + label: 'Assume grouped', + type: 'boolean', + defaultValue: false, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim', info: (
-
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must + be processed in its entirety, if the segments for a particular time chunk have a total size + in bytes greater than this parameter, compaction will not run for that time chunk. Because + each compaction task runs with a single thread, setting this value too far above 1–2GB will + result in compaction tasks taking an excessive amount of time. +
+ ), + }, + { + name: 'tuningConfig.maxNumMergeTasks', + label: 'Max num merge tasks', + type: 'number', + defaultValue: 1, + min: 1, + defined: (t: CompactionConfig) => + ['hashed', 'single_dim'].includes(deepGet(t, 'tuningConfig.partitionsSpec.type')), + info: <>Maximum number of merge tasks which can be run at the same time.>, + }, + { + name: 'tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask', + label: 'Max input segment bytes per task', + type: 'number', + defaultValue: 500000000, + min: 1000000, + adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'segments'), + info: ( + <> + Maximum number of bytes of input segments to process in a single task. If a single segment + is larger than this number, it will be processed by itself in a single task (input segments + are never split across tasks). + > + ), + }, ]; +function validCompactionConfig(compactionConfig: CompactionConfig): boolean { + const partitionsSpecType = + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.type') || 'dynamic'; + switch (partitionsSpecType) { + // case 'dynamic': // Nothing to check for dynamic + case 'hashed': + // ToDo: this will no longer be required after https://github.com/apache/druid/pull/10419 is merged + if (!deepGet(compactionConfig, 'tuningConfig.partitionsSpec.numShards')) { + return false; + } + break; + + case 'single_dim': + if (!deepGet(compactionConfig, 'tuningConfig.partitionsSpec.partitionDimension')) { + return false; + } + const hasTargetRowsPerSegment = Boolean( + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.targetRowsPerSegment'), + ); + const hasMaxRowsPerSegment = Boolean( + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + ); + if (hasTargetRowsPerSegment === hasMaxRowsPerSegment) { + return false; + } + break; + } + + return true; +} + export interface CompactionDialogProps { onClose: () => void; - onSave: (config: Record
For perfect rollup, you should use either hashed (partitioning based on
the hash of dimensions in each row) or single_dim (based on ranges of a
- single dimension. For best-effort rollup, you should use dynamic.
+ single dimension). For best-effort rollup, you should use dynamic.