diff --git a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap index f3b66e810932..3a0a83841f63 100644 --- a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap @@ -1,79 +1,660 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP -exports[`compaction dialog matches snapshot 1`] = ` +exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partitionsSpec) 1`] = ` - - Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -

, - "name": "inputSegmentSizeBytes", - "type": "number", - }, - Object { - "defaultValue": "P1D", - "info":

- The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. -

, - "name": "skipOffsetFromLatest", - "type": "string", - }, - Object { - "defaultValue": 5000000, - "info":

- Determines how many rows are in each segment. -

, - "name": "maxRowsPerSegment", - "type": "number", - }, + + + + + + +
+ + The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. +

, + "name": "skipOffsetFromLatest", + "type": "string", + }, + Object { + "info":

+ For perfect rollup, you should use either + + hashed + + (partitioning based on the hash of dimensions in each row) or + + single_dim + + (based on ranges of a single dimension). For best-effort rollup, you should use + + dynamic + + . +

, + "label": "Partitioning type", + "name": "tuningConfig.partitionsSpec.type", + "suggestions": Array [ + "dynamic", + "hashed", + "single_dim", + ], + "type": "string", + }, + Object { + "defaultValue": 5000000, + "defined": [Function], + "info": + Determines how many rows are in each segment. + , + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "type": "number", + }, + Object { + "defaultValue": 20000000, + "defined": [Function], + "info": + Total number of rows in segments waiting for being pushed. + , + "label": "Max total rows", + "name": "tuningConfig.partitionsSpec.maxTotalRows", + "type": "number", + }, + Object { + "defined": [Function], + "info": + Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data. + , + "label": "Num shards", + "name": "tuningConfig.partitionsSpec.numShards", + "required": true, + "type": "number", + }, + Object { + "defined": [Function], + "info":

+ The dimensions to partition on. Leave blank to select all dimensions. +

, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":

+ The dimension to partition on. +

, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":

+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +

, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":

+ Maximum number of rows to include in a partition. +

, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":

+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +

, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info": + Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. + , + "label": "Max num concurrent sub tasks", + "min": 1, + "name": "tuningConfig.maxNumConcurrentSubTasks", + "type": "number", + }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info": + Maximum number of merge tasks which can be run at the same time. + , + "label": "Max num merge tasks", + "min": 1, + "name": "tuningConfig.maxNumMergeTasks", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 500000000, + "info": + Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). + , + "label": "Max input segment bytes per task", + "min": 1000000, + "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "type": "number", + }, + ] + } + model={ Object { - "info":

- - Task context - - - for compaction tasks. -

, - "name": "taskContext", - "type": "json", - }, + "dataSource": "test1", + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "dynamic", + }, + }, + } + } + onChange={[Function]} + /> +
+
+
+ + + +
+
+
+`; + +exports[`CompactionDialog matches snapshot with compactionConfig (hashed partitionsSpec) 1`] = ` + + + + + + + +
+ + The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. +

, + "name": "skipOffsetFromLatest", + "type": "string", + }, + Object { + "info":

+ For perfect rollup, you should use either + + hashed + + (partitioning based on the hash of dimensions in each row) or + + single_dim + + (based on ranges of a single dimension). For best-effort rollup, you should use + + dynamic + + . +

, + "label": "Partitioning type", + "name": "tuningConfig.partitionsSpec.type", + "suggestions": Array [ + "dynamic", + "hashed", + "single_dim", + ], + "type": "string", + }, + Object { + "defaultValue": 5000000, + "defined": [Function], + "info": + Determines how many rows are in each segment. + , + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "type": "number", + }, + Object { + "defaultValue": 20000000, + "defined": [Function], + "info": + Total number of rows in segments waiting for being pushed. + , + "label": "Max total rows", + "name": "tuningConfig.partitionsSpec.maxTotalRows", + "type": "number", + }, + Object { + "defined": [Function], + "info": + Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data. + , + "label": "Num shards", + "name": "tuningConfig.partitionsSpec.numShards", + "required": true, + "type": "number", + }, + Object { + "defined": [Function], + "info":

+ The dimensions to partition on. Leave blank to select all dimensions. +

, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":

+ The dimension to partition on. +

, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":

+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +

, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":

+ Maximum number of rows to include in a partition. +

, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":

+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +

, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info": + Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. + , + "label": "Max num concurrent sub tasks", + "min": 1, + "name": "tuningConfig.maxNumConcurrentSubTasks", + "type": "number", + }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info": + Maximum number of merge tasks which can be run at the same time. + , + "label": "Max num merge tasks", + "min": 1, + "name": "tuningConfig.maxNumMergeTasks", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 500000000, + "info": + Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). + , + "label": "Max input segment bytes per task", + "min": 1000000, + "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "type": "number", + }, + ] + } + model={ Object { - "defaultValue": 25, - "info":

- Priority of the compaction task. -

, - "name": "taskPriority", - "type": "number", - }, + "dataSource": "test1", + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "hashed", + }, + }, + } + } + onChange={[Function]} + /> +
+
+
+ + + +
+
+
+`; + +exports[`CompactionDialog matches snapshot with compactionConfig (single_dim partitionsSpec) 1`] = ` + + + + + + + +
+ + The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. +

, + "name": "skipOffsetFromLatest", + "type": "string", + }, + Object { + "info":

+ For perfect rollup, you should use either + + hashed + + (partitioning based on the hash of dimensions in each row) or + + single_dim + + (based on ranges of a single dimension). For best-effort rollup, you should use + + dynamic + + . +

, + "label": "Partitioning type", + "name": "tuningConfig.partitionsSpec.type", + "suggestions": Array [ + "dynamic", + "hashed", + "single_dim", + ], + "type": "string", + }, + Object { + "defaultValue": 5000000, + "defined": [Function], + "info": + Determines how many rows are in each segment. + , + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "type": "number", + }, + Object { + "defaultValue": 20000000, + "defined": [Function], + "info": + Total number of rows in segments waiting for being pushed. + , + "label": "Max total rows", + "name": "tuningConfig.partitionsSpec.maxTotalRows", + "type": "number", + }, + Object { + "defined": [Function], + "info": + Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data. + , + "label": "Num shards", + "name": "tuningConfig.partitionsSpec.numShards", + "required": true, + "type": "number", + }, + Object { + "defined": [Function], + "info":

+ The dimensions to partition on. Leave blank to select all dimensions. +

, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":

+ The dimension to partition on. +

, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":

+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +

, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":

+ Maximum number of rows to include in a partition. +

, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":

+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +

, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info": + Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. + , + "label": "Max num concurrent sub tasks", + "min": 1, + "name": "tuningConfig.maxNumConcurrentSubTasks", + "type": "number", + }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info": + Maximum number of merge tasks which can be run at the same time. + , + "label": "Max num merge tasks", + "min": 1, + "name": "tuningConfig.maxNumMergeTasks", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 500000000, + "info": + Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). + , + "label": "Max input segment bytes per task", + "min": 1000000, + "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "type": "number", + }, + ] + } + model={ Object { - "info":

- - Tuning config - - - for compaction tasks. -

, - "name": "tuningConfig", - "type": "json", - }, - ] - } - model={Object {}} - onChange={[Function]} - /> + "dataSource": "test1", + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "single_dim", + }, + }, + } + } + onChange={[Function]} + /> +
@@ -81,11 +662,232 @@ exports[`compaction dialog matches snapshot 1`] = ` className="bp3-dialog-footer-actions" > + + +
+ +
+`; + +exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` + + + + + + + +
+ + The offset for searching segments to be compacted. Strongly recommended to set for realtime dataSources. +

, + "name": "skipOffsetFromLatest", + "type": "string", + }, + Object { + "info":

+ For perfect rollup, you should use either + + hashed + + (partitioning based on the hash of dimensions in each row) or + + single_dim + + (based on ranges of a single dimension). For best-effort rollup, you should use + + dynamic + + . +

, + "label": "Partitioning type", + "name": "tuningConfig.partitionsSpec.type", + "suggestions": Array [ + "dynamic", + "hashed", + "single_dim", + ], + "type": "string", + }, + Object { + "defaultValue": 5000000, + "defined": [Function], + "info": + Determines how many rows are in each segment. + , + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "type": "number", + }, + Object { + "defaultValue": 20000000, + "defined": [Function], + "info": + Total number of rows in segments waiting for being pushed. + , + "label": "Max total rows", + "name": "tuningConfig.partitionsSpec.maxTotalRows", + "type": "number", + }, + Object { + "defined": [Function], + "info": + Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data. + , + "label": "Num shards", + "name": "tuningConfig.partitionsSpec.numShards", + "required": true, + "type": "number", + }, + Object { + "defined": [Function], + "info":

+ The dimensions to partition on. Leave blank to select all dimensions. +

, + "label": "Partition dimensions", + "name": "tuningConfig.partitionsSpec.partitionDimensions", + "type": "string-array", + }, + Object { + "defined": [Function], + "info":

+ The dimension to partition on. +

, + "label": "Partition dimension", + "name": "tuningConfig.partitionsSpec.partitionDimension", + "required": true, + "type": "string", + }, + Object { + "defined": [Function], + "info":

+ Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB. +

, + "label": "Target rows per segment", + "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defined": [Function], + "info":

+ Maximum number of rows to include in a partition. +

, + "label": "Max rows per segment", + "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", + "required": [Function], + "type": "number", + "zeroMeansUndefined": true, + }, + Object { + "defaultValue": false, + "defined": [Function], + "info":

+ Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated. +

, + "label": "Assume grouped", + "name": "tuningConfig.partitionsSpec.assumeGrouped", + "type": "boolean", + }, + Object { + "defaultValue": 1, + "info": + Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. + , + "label": "Max num concurrent sub tasks", + "min": 1, + "name": "tuningConfig.maxNumConcurrentSubTasks", + "type": "number", + }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, + Object { + "defaultValue": 1, + "defined": [Function], + "info": + Maximum number of merge tasks which can be run at the same time. + , + "label": "Max num merge tasks", + "min": 1, + "name": "tuningConfig.maxNumMergeTasks", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 500000000, + "info": + Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). + , + "label": "Max input segment bytes per task", + "min": 1000000, + "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "type": "number", + }, + ] + } + model={ + Object { + "dataSource": "test1", + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "dynamic", + }, + }, + } + } + onChange={[Function]} + /> +
+
+
{ - it('matches snapshot', () => { +describe('CompactionDialog', () => { + it('matches snapshot without compactionConfig', () => { const compactionDialog = shallow( {}} onSave={() => {}} onDelete={() => {}} - datasource={'test'} - compactionConfig={{}} + datasource={'test1'} + compactionConfig={undefined} + />, + ); + expect(compactionDialog).toMatchSnapshot(); + }); + + it('matches snapshot with compactionConfig (dynamic partitionsSpec)', () => { + const compactionDialog = shallow( + {}} + onSave={() => {}} + onDelete={() => {}} + datasource={'test1'} + compactionConfig={{ + dataSource: 'test1', + tuningConfig: { partitionsSpec: { type: 'dynamic' } }, + }} + />, + ); + expect(compactionDialog).toMatchSnapshot(); + }); + + it('matches snapshot with compactionConfig (hashed partitionsSpec)', () => { + const compactionDialog = shallow( + {}} + onSave={() => {}} + onDelete={() => {}} + datasource={'test1'} + compactionConfig={{ + dataSource: 'test1', + tuningConfig: { partitionsSpec: { type: 'hashed' } }, + }} + />, + ); + expect(compactionDialog).toMatchSnapshot(); + }); + + it('matches snapshot with compactionConfig (single_dim partitionsSpec)', () => { + const compactionDialog = shallow( + {}} + onSave={() => {}} + onDelete={() => {}} + datasource={'test1'} + compactionConfig={{ + dataSource: 'test1', + tuningConfig: { partitionsSpec: { type: 'single_dim' } }, + }} />, ); expect(compactionDialog).toMatchSnapshot(); diff --git a/web-console/src/dialogs/compaction-dialog/compaction-dialog.tsx b/web-console/src/dialogs/compaction-dialog/compaction-dialog.tsx index 85ce37d3bbcd..1798565a2f87 100644 --- a/web-console/src/dialogs/compaction-dialog/compaction-dialog.tsx +++ b/web-console/src/dialogs/compaction-dialog/compaction-dialog.tsx @@ -16,101 +16,248 @@ * limitations under the License. */ -import { Button, Classes, Dialog, Intent } from '@blueprintjs/core'; +import { Button, ButtonGroup, Classes, Code, Dialog, FormGroup, Intent } from '@blueprintjs/core'; import React, { useState } from 'react'; -import { AutoForm, ExternalLink, Field } from '../../components'; -import { getLink } from '../../links'; +import { AutoForm, Field, JsonInput } from '../../components'; +import { deepGet, deepSet } from '../../utils/object-change'; import './compaction-dialog.scss'; export const DEFAULT_MAX_ROWS_PER_SEGMENT = 5000000; -const COMPACTION_CONFIG_FIELDS: Field>[] = [ +type Tabs = 'form' | 'json'; + +type CompactionConfig = Record; + +const COMPACTION_CONFIG_FIELDS: Field[] = [ { - name: 'inputSegmentSizeBytes', - type: 'number', - defaultValue: 419430400, + name: 'skipOffsetFromLatest', + type: 'string', + defaultValue: 'P1D', info: (

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must - be processed in its entirety, if the segments for a particular time chunk have a total size - in bytes greater than this parameter, compaction will not run for that time chunk. Because - each compaction task runs with a single thread, setting this value too far above 1–2GB will - result in compaction tasks taking an excessive amount of time. + The offset for searching segments to be compacted. Strongly recommended to set for realtime + dataSources.

), }, { - name: 'skipOffsetFromLatest', + name: 'tuningConfig.partitionsSpec.type', + label: 'Partitioning type', type: 'string', - defaultValue: 'P1D', + suggestions: ['dynamic', 'hashed', 'single_dim'], info: (

- The offset for searching segments to be compacted. Strongly recommended to set for realtime - dataSources. + For perfect rollup, you should use either hashed (partitioning based on the + hash of dimensions in each row) or single_dim (based on ranges of a single + dimension). For best-effort rollup, you should use dynamic.

), }, + // partitionsSpec type: dynamic { - name: 'maxRowsPerSegment', + name: 'tuningConfig.partitionsSpec.maxRowsPerSegment', + label: 'Max rows per segment', type: 'number', - defaultValue: DEFAULT_MAX_ROWS_PER_SEGMENT, - info:

Determines how many rows are in each segment.

, + defaultValue: 5000000, + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', + info: <>Determines how many rows are in each segment., }, { - name: 'taskContext', - type: 'json', + name: 'tuningConfig.partitionsSpec.maxTotalRows', + label: 'Max total rows', + type: 'number', + defaultValue: 20000000, + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', + info: <>Total number of rows in segments waiting for being pushed., + }, + // partitionsSpec type: hashed + { + name: 'tuningConfig.partitionsSpec.numShards', + label: 'Num shards', + type: 'number', + required: true, // ToDo: this will no longer be required after https://github.com/apache/druid/pull/10419 is merged + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed', + info: ( + <> + Directly specify the number of shards to create. If this is specified and 'intervals' is + specified in the granularitySpec, the index task can skip the determine intervals/partitions + pass through the data. + + ), + }, + { + name: 'tuningConfig.partitionsSpec.partitionDimensions', + label: 'Partition dimensions', + type: 'string-array', + defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed', + info:

The dimensions to partition on. Leave blank to select all dimensions.

, + }, + // partitionsSpec type: single_dim + { + name: 'tuningConfig.partitionsSpec.partitionDimension', + label: 'Partition dimension', + type: 'string', + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim', + required: true, + info:

The dimension to partition on.

, + }, + { + name: 'tuningConfig.partitionsSpec.targetRowsPerSegment', + label: 'Target rows per segment', + type: 'number', + zeroMeansUndefined: true, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + required: (t: CompactionConfig) => + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), info: (

- - Task context - {' '} - for compaction tasks. + Target number of rows to include in a partition, should be a number that targets segments of + 500MB~1GB.

), }, { - name: 'taskPriority', + name: 'tuningConfig.partitionsSpec.maxRowsPerSegment', + label: 'Max rows per segment', type: 'number', - defaultValue: 25, - info:

Priority of the compaction task.

, + zeroMeansUndefined: true, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment'), + required: (t: CompactionConfig) => + !deepGet(t, 'tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(t, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + info:

Maximum number of rows to include in a partition.

, }, { - name: 'tuningConfig', - type: 'json', + name: 'tuningConfig.partitionsSpec.assumeGrouped', + label: 'Assume grouped', + type: 'boolean', + defaultValue: false, + defined: (t: CompactionConfig) => + deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim', info: (

- - Tuning config - {' '} - for compaction tasks. + Assume that input data has already been grouped on time and dimensions. Ingestion will run + faster, but may choose sub-optimal partitions if this assumption is violated.

), }, + { + name: 'tuningConfig.maxNumConcurrentSubTasks', + label: 'Max num concurrent sub tasks', + type: 'number', + defaultValue: 1, + min: 1, + info: ( + <> + Maximum number of tasks which can be run at the same time. The supervisor task would spawn + worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this + value is set to 1, the supervisor task processes data ingestion on its own instead of + spawning worker tasks. If this value is set to too large, too many worker tasks can be + created which might block other ingestion. + + ), + }, + { + name: 'inputSegmentSizeBytes', + type: 'number', + defaultValue: 419430400, + info: ( +

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must + be processed in its entirety, if the segments for a particular time chunk have a total size + in bytes greater than this parameter, compaction will not run for that time chunk. Because + each compaction task runs with a single thread, setting this value too far above 1–2GB will + result in compaction tasks taking an excessive amount of time. +

+ ), + }, + { + name: 'tuningConfig.maxNumMergeTasks', + label: 'Max num merge tasks', + type: 'number', + defaultValue: 1, + min: 1, + defined: (t: CompactionConfig) => + ['hashed', 'single_dim'].includes(deepGet(t, 'tuningConfig.partitionsSpec.type')), + info: <>Maximum number of merge tasks which can be run at the same time., + }, + { + name: 'tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask', + label: 'Max input segment bytes per task', + type: 'number', + defaultValue: 500000000, + min: 1000000, + adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'segments'), + info: ( + <> + Maximum number of bytes of input segments to process in a single task. If a single segment + is larger than this number, it will be processed by itself in a single task (input segments + are never split across tasks). + + ), + }, ]; +function validCompactionConfig(compactionConfig: CompactionConfig): boolean { + const partitionsSpecType = + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.type') || 'dynamic'; + switch (partitionsSpecType) { + // case 'dynamic': // Nothing to check for dynamic + case 'hashed': + // ToDo: this will no longer be required after https://github.com/apache/druid/pull/10419 is merged + if (!deepGet(compactionConfig, 'tuningConfig.partitionsSpec.numShards')) { + return false; + } + break; + + case 'single_dim': + if (!deepGet(compactionConfig, 'tuningConfig.partitionsSpec.partitionDimension')) { + return false; + } + const hasTargetRowsPerSegment = Boolean( + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.targetRowsPerSegment'), + ); + const hasMaxRowsPerSegment = Boolean( + deepGet(compactionConfig, 'tuningConfig.partitionsSpec.maxRowsPerSegment'), + ); + if (hasTargetRowsPerSegment === hasMaxRowsPerSegment) { + return false; + } + break; + } + + return true; +} + export interface CompactionDialogProps { onClose: () => void; - onSave: (config: Record) => void; + onSave: (compactionConfig: CompactionConfig) => void; onDelete: () => void; datasource: string; - compactionConfig?: Record; + compactionConfig: CompactionConfig | undefined; } export const CompactionDialog = React.memo(function CompactionDialog(props: CompactionDialogProps) { const { datasource, compactionConfig, onSave, onClose, onDelete } = props; - const [currentConfig, setCurrentConfig] = useState>( + const [currentTab, setCurrentTab] = useState('form'); + const [currentConfig, setCurrentConfig] = useState( compactionConfig || { dataSource: datasource, + tuningConfig: { partitionsSpec: { type: 'dynamic' } }, }, ); function handleSubmit() { - if (!currentConfig) return; + if (!validCompactionConfig(currentConfig)) return; onSave(currentConfig); } @@ -122,25 +269,40 @@ export const CompactionDialog = React.memo(function CompactionDialog(props: Comp canOutsideClickClose={false} title={`Compaction config: ${datasource}`} > - setCurrentConfig(m)} - /> -
-
+ + +
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx index 530c269c96fd..d6d90bb64d9c 100644 --- a/web-console/src/utils/ingestion-spec.tsx +++ b/web-console/src/utils/ingestion-spec.tsx @@ -2120,10 +2120,13 @@ export function invalidTuningConfig(tuningConfig: TuningConfig, intervals: any): case 'single_dim': if (!deepGet(tuningConfig, 'partitionsSpec.partitionDimension')) return true; - if ( - !deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment') && - !deepGet(tuningConfig, 'partitionsSpec.maxRowsPerSegment') - ) { + const hasTargetRowsPerSegment = Boolean( + deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment'), + ); + const hasMaxRowsPerSegment = Boolean( + deepGet(tuningConfig, 'partitionsSpec.maxRowsPerSegment'), + ); + if (hasTargetRowsPerSegment === hasMaxRowsPerSegment) { return true; } } @@ -2160,7 +2163,7 @@ export function getPartitionRelatedTuningSpecFormFields(

For perfect rollup, you should use either hashed (partitioning based on the hash of dimensions in each row) or single_dim (based on ranges of a - single dimension. For best-effort rollup, you should use dynamic. + single dimension). For best-effort rollup, you should use dynamic.

), }, @@ -2192,8 +2195,7 @@ export function getPartitionRelatedTuningSpecFormFields( <> Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine - intervals/partitions pass through the data. numShards cannot be specified if - maxRowsPerSegment is set. + intervals/partitions pass through the data. ), }, @@ -2218,7 +2220,9 @@ export function getPartitionRelatedTuningSpecFormFields( label: 'Target rows per segment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + defined: (t: TuningConfig) => + deepGet(t, 'partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), required: (t: TuningConfig) => !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), @@ -2234,7 +2238,9 @@ export function getPartitionRelatedTuningSpecFormFields( label: 'Max rows per segment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + defined: (t: TuningConfig) => + deepGet(t, 'partitionsSpec.type') === 'single_dim' && + !deepGet(t, 'partitionsSpec.targetRowsPerSegment'), required: (t: TuningConfig) => !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && !deepGet(t, 'partitionsSpec.maxRowsPerSegment'),