diff --git a/web-console/e2e-tests/reindexing.spec.ts b/web-console/e2e-tests/reindexing.spec.ts index ae45b735965f..a14262a8ab28 100644 --- a/web-console/e2e-tests/reindexing.spec.ts +++ b/web-console/e2e-tests/reindexing.spec.ts @@ -67,7 +67,7 @@ describe('Reindexing from Druid', () => { const configureSchemaConfig = new ConfigureSchemaConfig({ rollup: false }); const partitionConfig = new PartitionConfig({ segmentGranularity: SegmentGranularity.DAY, - timeIntervals: interval, + timeIntervals: null, partitionsSpec: new SingleDimPartitionsSpec({ partitionDimension: 'channel', targetRowsPerSegment: 10_000, diff --git a/web-console/e2e-tests/tutorial-batch.spec.ts b/web-console/e2e-tests/tutorial-batch.spec.ts index f4fa45054632..842d7a50182e 100644 --- a/web-console/e2e-tests/tutorial-batch.spec.ts +++ b/web-console/e2e-tests/tutorial-batch.spec.ts @@ -16,13 +16,14 @@ * limitations under the License. */ +import { SqlRef } from 'druid-query-toolkit'; import * as playwright from 'playwright-chromium'; import { DatasourcesOverview } from './component/datasources/overview'; import { IngestionOverview } from './component/ingestion/overview'; import { ConfigureSchemaConfig } from './component/load-data/config/configure-schema'; -import { PartitionConfig } from './component/load-data/config/partition'; import { SegmentGranularity } from './component/load-data/config/partition'; +import { PartitionConfig } from './component/load-data/config/partition'; import { PublishConfig } from './component/load-data/config/publish'; import { LocalFileDataConnector } from './component/load-data/data-connector/local-file'; import { DataLoader } from './component/load-data/data-loader'; @@ -37,6 +38,8 @@ import { waitTillWebConsoleReady } from './util/setup'; jest.setTimeout(5 * 60 * 1000); +const ALL_SORTS_OF_CHARS = '<>|!@#$%^&`\'".,:;\\*()[]{}Россия 한국 中国!?~'; + describe('Tutorial: Loading a file', () => { let browser: playwright.Browser; let page: playwright.Page; @@ -56,7 +59,7 @@ describe('Tutorial: Loading a file', () => { it('Loads data from local disk', async () => { const testName = 'load-data-from-local-disk-'; - const datasourceName = testName + new Date().toISOString(); + const datasourceName = testName + ALL_SORTS_OF_CHARS + new Date().toISOString(); const dataConnector = new LocalFileDataConnector(page, { baseDirectory: DRUID_EXAMPLES_QUICKSTART_TUTORIAL_DIR, fileFilter: 'wikiticker-2015-09-12-sampled.json.gz', @@ -168,7 +171,7 @@ async function validateDatasourceStatus(page: playwright.Page, datasourceName: s async function validateQuery(page: playwright.Page, datasourceName: string) { const queryOverview = new QueryOverview(page, UNIFIED_CONSOLE_URL); - const query = `SELECT * FROM "${datasourceName}" ORDER BY __time`; + const query = `SELECT * FROM ${SqlRef.table(datasourceName)} ORDER BY __time`; const results = await queryOverview.runQuery(query); expect(results).toBeDefined(); expect(results.length).toBeGreaterThan(0); diff --git a/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap b/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap index baf1011bdc8e..02a63a1b1074 100644 --- a/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap +++ b/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap @@ -8,23 +8,13 @@ exports[`AutoForm matches snapshot 1`] = ` key="testOne" label="Test one" > - > extends React.PureComponent static REQUIRED_INTENT = Intent.PRIMARY; static makeLabelName(label: string): string { - let newLabel = label + const parts = label.split('.'); + let newLabel = parts[parts.length - 1] .split(/(?=[A-Z])/) .join(' ') .toLowerCase() @@ -203,17 +205,17 @@ export class AutoForm> extends React.PureComponent private renderNumberInput(field: Field): JSX.Element { const { model, large, onFinalize } = this.props; - let modelValue = deepGet(model as any, field.name); - if (typeof modelValue !== 'number') modelValue = field.defaultValue; + const modelValue = deepGet(model as any, field.name); return ( - { - if (valueAsString === '' || isNaN(valueAsNumber)) return; - this.fieldChange( - field, - valueAsNumber === 0 && field.zeroMeansUndefined ? undefined : valueAsNumber, - ); + let newValue: number | undefined; + if (valueAsString !== '' && !isNaN(valueAsNumber)) { + newValue = valueAsNumber === 0 && field.zeroMeansUndefined ? undefined : valueAsNumber; + } + this.fieldChange(field, newValue); }} onBlur={e => { if (e.target.value === '') { diff --git a/web-console/src/components/numeric-input-with-default/__snapshots__/numeric-input-with-default.spec.tsx.snap b/web-console/src/components/numeric-input-with-default/__snapshots__/numeric-input-with-default.spec.tsx.snap new file mode 100644 index 000000000000..75335f79dcb5 --- /dev/null +++ b/web-console/src/components/numeric-input-with-default/__snapshots__/numeric-input-with-default.spec.tsx.snap @@ -0,0 +1,19 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`NumericInputWithDefault matches snapshot 1`] = ` + +`; diff --git a/web-console/src/components/numeric-input-with-default/numeric-input-with-default.spec.tsx b/web-console/src/components/numeric-input-with-default/numeric-input-with-default.spec.tsx new file mode 100644 index 000000000000..b0d2b61d2ce9 --- /dev/null +++ b/web-console/src/components/numeric-input-with-default/numeric-input-with-default.spec.tsx @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { shallow } from 'enzyme'; +import React from 'react'; + +import { NumericInputWithDefault } from './numeric-input-with-default'; + +describe('NumericInputWithDefault', () => { + it('matches snapshot', () => { + const numericInputWithDefault = shallow(); + + expect(numericInputWithDefault).toMatchSnapshot(); + }); +}); diff --git a/web-console/src/components/numeric-input-with-default/numeric-input-with-default.tsx b/web-console/src/components/numeric-input-with-default/numeric-input-with-default.tsx new file mode 100644 index 000000000000..fb731ac6f7db --- /dev/null +++ b/web-console/src/components/numeric-input-with-default/numeric-input-with-default.tsx @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { HTMLInputProps, INumericInputProps, NumericInput } from '@blueprintjs/core'; +import React, { useState } from 'react'; + +export type NumericInputWithDefaultProps = HTMLInputProps & INumericInputProps; + +export const NumericInputWithDefault = React.memo(function NumericInputWithDefault( + props: NumericInputWithDefaultProps, +) { + const { value, defaultValue, onValueChange, onBlur, ...rest } = props; + const [hasChanged, setHasChanged] = useState(false); + + let effectiveValue = value; + if (effectiveValue == null) { + effectiveValue = hasChanged ? '' : typeof defaultValue !== 'undefined' ? defaultValue : ''; + } + + return ( + { + setHasChanged(true); + if (!onValueChange) return; + return onValueChange(valueAsNumber, valueAsString, inputElement); + }} + onBlur={e => { + setHasChanged(false); + if (!onBlur) return; + return onBlur(e); + }} + {...rest} + /> + ); +}); diff --git a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap index e03e12a0373d..ba969a43086e 100644 --- a/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/compaction-dialog/__snapshots__/compaction-dialog.spec.tsx.snap @@ -63,7 +63,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info": Determines how many rows are in each segment. , - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "type": "number", }, @@ -73,7 +72,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info": Total number of rows in segments waiting for being pushed. , - "label": "Max total rows", "name": "tuningConfig.partitionsSpec.maxTotalRows", "type": "number", }, @@ -87,7 +85,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "type": "number", "zeroMeansUndefined": true, @@ -102,7 +99,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.

, - "label": "Num shards", "name": "tuningConfig.partitionsSpec.numShards", "type": "number", "zeroMeansUndefined": true, @@ -112,7 +108,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":

The dimensions to partition on. Leave blank to select all dimensions.

, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -122,7 +117,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":

The dimension to partition on.

, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -132,7 +126,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":

Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -143,7 +136,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":

Maximum number of rows to include in a partition.

, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -155,48 +147,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":

Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.

, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info": Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. , - "label": "Max num concurrent sub tasks", "min": 1, "name": "tuningConfig.maxNumConcurrentSubTasks", "type": "number", }, - Object { - "defaultValue": 419430400, - "info":

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -

, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info": Maximum number of merge tasks which can be run at the same time. , - "label": "Total num merge tasks", "min": 1, "name": "tuningConfig.totalNumMergeTasks", "type": "number", }, Object { "adjustment": [Function], - "defaultValue": 500000000, + "defaultValue": 1073741824, + "hideInMore": true, "info": Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). , - "label": "Max input segment bytes per task", "min": 1000000, - "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "name": "tuningConfig.splitHintSpec.maxSplitSize", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 1000, + "hideInMore": true, + "info": + Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + + jute.maxbuffer + + ) and the max packet size in MySQL ( + + max_allowed_packet + + ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. + , + "label": "Max num files (segments)", + "min": 1, + "name": "tuningConfig.splitHintSpec.maxNumFiles", "type": "number", }, ] @@ -303,7 +312,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info": Determines how many rows are in each segment. , - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "type": "number", }, @@ -313,7 +321,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info": Total number of rows in segments waiting for being pushed. , - "label": "Max total rows", "name": "tuningConfig.partitionsSpec.maxTotalRows", "type": "number", }, @@ -327,7 +334,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "type": "number", "zeroMeansUndefined": true, @@ -342,7 +348,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.

, - "label": "Num shards", "name": "tuningConfig.partitionsSpec.numShards", "type": "number", "zeroMeansUndefined": true, @@ -352,7 +357,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":

The dimensions to partition on. Leave blank to select all dimensions.

, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -362,7 +366,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":

The dimension to partition on.

, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -372,7 +375,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":

Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -383,7 +385,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":

Maximum number of rows to include in a partition.

, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -395,48 +396,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":

Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.

, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info": Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. , - "label": "Max num concurrent sub tasks", "min": 1, "name": "tuningConfig.maxNumConcurrentSubTasks", "type": "number", }, - Object { - "defaultValue": 419430400, - "info":

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -

, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info": Maximum number of merge tasks which can be run at the same time. , - "label": "Total num merge tasks", "min": 1, "name": "tuningConfig.totalNumMergeTasks", "type": "number", }, Object { "adjustment": [Function], - "defaultValue": 500000000, + "defaultValue": 1073741824, + "hideInMore": true, "info": Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). , - "label": "Max input segment bytes per task", "min": 1000000, - "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "name": "tuningConfig.splitHintSpec.maxSplitSize", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 1000, + "hideInMore": true, + "info": + Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + + jute.maxbuffer + + ) and the max packet size in MySQL ( + + max_allowed_packet + + ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. + , + "label": "Max num files (segments)", + "min": 1, + "name": "tuningConfig.splitHintSpec.maxNumFiles", "type": "number", }, ] @@ -543,7 +561,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info": Determines how many rows are in each segment. , - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "type": "number", }, @@ -553,7 +570,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info": Total number of rows in segments waiting for being pushed. , - "label": "Max total rows", "name": "tuningConfig.partitionsSpec.maxTotalRows", "type": "number", }, @@ -567,7 +583,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "type": "number", "zeroMeansUndefined": true, @@ -582,7 +597,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.

, - "label": "Num shards", "name": "tuningConfig.partitionsSpec.numShards", "type": "number", "zeroMeansUndefined": true, @@ -592,7 +606,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":

The dimensions to partition on. Leave blank to select all dimensions.

, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -602,7 +615,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":

The dimension to partition on.

, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -612,7 +624,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":

Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -623,7 +634,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":

Maximum number of rows to include in a partition.

, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -635,48 +645,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":

Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.

, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info": Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. , - "label": "Max num concurrent sub tasks", "min": 1, "name": "tuningConfig.maxNumConcurrentSubTasks", "type": "number", }, - Object { - "defaultValue": 419430400, - "info":

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -

, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info": Maximum number of merge tasks which can be run at the same time. , - "label": "Total num merge tasks", "min": 1, "name": "tuningConfig.totalNumMergeTasks", "type": "number", }, Object { "adjustment": [Function], - "defaultValue": 500000000, + "defaultValue": 1073741824, + "hideInMore": true, "info": Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). , - "label": "Max input segment bytes per task", "min": 1000000, - "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "name": "tuningConfig.splitHintSpec.maxSplitSize", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 1000, + "hideInMore": true, + "info": + Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + + jute.maxbuffer + + ) and the max packet size in MySQL ( + + max_allowed_packet + + ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. + , + "label": "Max num files (segments)", + "min": 1, + "name": "tuningConfig.splitHintSpec.maxNumFiles", "type": "number", }, ] @@ -783,7 +810,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info": Determines how many rows are in each segment. , - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "type": "number", }, @@ -793,7 +819,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info": Total number of rows in segments waiting for being pushed. , - "label": "Max total rows", "name": "tuningConfig.partitionsSpec.maxTotalRows", "type": "number", }, @@ -807,7 +832,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "type": "number", "zeroMeansUndefined": true, @@ -822,7 +846,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.

, - "label": "Num shards", "name": "tuningConfig.partitionsSpec.numShards", "type": "number", "zeroMeansUndefined": true, @@ -832,7 +855,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":

The dimensions to partition on. Leave blank to select all dimensions.

, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -842,7 +864,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":

The dimension to partition on.

, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -852,7 +873,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":

Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.

, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -863,7 +883,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":

Maximum number of rows to include in a partition.

, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -875,48 +894,65 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":

Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.

, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +

, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info": Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion. , - "label": "Max num concurrent sub tasks", "min": 1, "name": "tuningConfig.maxNumConcurrentSubTasks", "type": "number", }, - Object { - "defaultValue": 419430400, - "info":

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -

, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info": Maximum number of merge tasks which can be run at the same time. , - "label": "Total num merge tasks", "min": 1, "name": "tuningConfig.totalNumMergeTasks", "type": "number", }, Object { "adjustment": [Function], - "defaultValue": 500000000, + "defaultValue": 1073741824, + "hideInMore": true, "info": Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks). , - "label": "Max input segment bytes per task", "min": 1000000, - "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask", + "name": "tuningConfig.splitHintSpec.maxSplitSize", + "type": "number", + }, + Object { + "adjustment": [Function], + "defaultValue": 1000, + "hideInMore": true, + "info": + Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + + jute.maxbuffer + + ) and the max packet size in MySQL ( + + max_allowed_packet + + ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them. + , + "label": "Max num files (segments)", + "min": 1, + "name": "tuningConfig.splitHintSpec.maxNumFiles", "type": "number", }, ] diff --git a/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap b/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap index 7b7dabc71a8b..e5056501c0cd 100644 --- a/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap +++ b/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap @@ -158,7 +158,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` "defaultValue": 0, "defined": [Function], "info": "Number of header rows to be skipped. The default number of header rows to be skipped is 0.", - "label": "Skip header rows", "name": "extractionNamespace.namespaceParseSpec.skipHeaderRows", "type": "number", }, @@ -166,14 +165,12 @@ exports[`LookupEditDialog matches snapshot 1`] = ` "defaultValue": false, "defined": [Function], "info": "A flag to indicate that column information can be extracted from the input files' header row", - "label": "Has header row", "name": "extractionNamespace.namespaceParseSpec.hasHeaderRow", "type": "boolean", }, Object { "defined": [Function], "info": "The list of columns in the csv file", - "label": "Columns", "name": "extractionNamespace.namespaceParseSpec.columns", "placeholder": "[\\"key\\", \\"value\\"]", "required": [Function], @@ -182,7 +179,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` Object { "defined": [Function], "info": "The name of the column containing the key", - "label": "Key column", "name": "extractionNamespace.namespaceParseSpec.keyColumn", "placeholder": "(optional - defaults to the first column)", "type": "string", @@ -190,28 +186,24 @@ exports[`LookupEditDialog matches snapshot 1`] = ` Object { "defined": [Function], "info": "The name of the column containing the value", - "label": "Value column", "name": "extractionNamespace.namespaceParseSpec.valueColumn", "placeholder": "(optional - defaults to the second column)", "type": "string", }, Object { "defined": [Function], - "label": "Delimiter", "name": "extractionNamespace.namespaceParseSpec.delimiter", "placeholder": "(optional)", "type": "string", }, Object { "defined": [Function], - "label": "List delimiter", "name": "extractionNamespace.namespaceParseSpec.listDelimiter", "placeholder": "(optional)", "type": "string", }, Object { "defined": [Function], - "label": "Key field name", "name": "extractionNamespace.namespaceParseSpec.keyFieldName", "placeholder": "key", "required": true, @@ -219,7 +211,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` }, Object { "defined": [Function], - "label": "Value field name", "name": "extractionNamespace.namespaceParseSpec.valueFieldName", "placeholder": "value", "required": true, @@ -229,7 +220,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` "defaultValue": "0", "defined": [Function], "info": "Period between polling for updates", - "label": "Poll period", "name": "extractionNamespace.pollPeriod", "type": "string", }, @@ -247,7 +237,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` .table WHERE filter

, - "label": "Namespace", "name": "extractionNamespace.namespace", "placeholder": "some_lookup", "required": true, @@ -264,21 +253,18 @@ exports[`LookupEditDialog matches snapshot 1`] = ` Object { "defined": [Function], "info": "Defines the user to be used by the connector config", - "label": "User", "name": "extractionNamespace.connectorConfig.user", "type": "string", }, Object { "defined": [Function], "info": "Defines the password to be used by the connector config", - "label": "Password", "name": "extractionNamespace.connectorConfig.password", "type": "string", }, Object { "defined": [Function], "info": "Should tables be created", - "label": "Create tables", "name": "extractionNamespace.connectorConfig.createTables", "type": "boolean", }, @@ -296,7 +282,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` WHERE filter

, - "label": "Table", "name": "extractionNamespace.table", "placeholder": "some_lookup_table", "required": true, @@ -316,7 +301,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` , valueColumn, tsColumn? FROM namespace.table WHERE filter

, - "label": "Key column", "name": "extractionNamespace.keyColumn", "placeholder": "my_key_value", "required": true, @@ -336,7 +320,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` , tsColumn? FROM namespace.table WHERE filter

, - "label": "Value column", "name": "extractionNamespace.valueColumn", "placeholder": "my_column_value", "required": true, @@ -356,7 +339,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `

, - "label": "Filter", "name": "extractionNamespace.filter", "placeholder": "(optional)", "type": "string", @@ -384,7 +366,6 @@ exports[`LookupEditDialog matches snapshot 1`] = ` "defaultValue": 0, "defined": [Function], "info": "How long to wait (in ms) for the first run of the cache to populate. 0 indicates to not wait", - "label": "First cache timeout", "name": "firstCacheTimeout", "type": "number", }, diff --git a/web-console/src/druid-models/__snapshots__/ingestion-spec.spec.ts.snap b/web-console/src/druid-models/__snapshots__/ingestion-spec.spec.ts.snap index 1008cb2112b3..be836323f121 100644 --- a/web-console/src/druid-models/__snapshots__/ingestion-spec.spec.ts.snap +++ b/web-console/src/druid-models/__snapshots__/ingestion-spec.spec.ts.snap @@ -16,7 +16,6 @@ Object { "queryGranularity": "hour", "rollup": true, "segmentGranularity": "day", - "type": "uniform", }, "metricsSpec": Array [ Object { diff --git a/web-console/src/druid-models/compaction-config.tsx b/web-console/src/druid-models/compaction-config.tsx index 02bb42f0004e..437f91404802 100644 --- a/web-console/src/druid-models/compaction-config.tsx +++ b/web-console/src/druid-models/compaction-config.tsx @@ -53,7 +53,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ // partitionsSpec type: dynamic { name: 'tuningConfig.partitionsSpec.maxRowsPerSegment', - label: 'Max rows per segment', type: 'number', defaultValue: 5000000, defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', @@ -61,7 +60,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.maxTotalRows', - label: 'Max total rows', type: 'number', defaultValue: 20000000, defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic', @@ -70,7 +68,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ // partitionsSpec type: hashed { name: 'tuningConfig.partitionsSpec.targetRowsPerSegment', - label: 'Target rows per segment', type: 'number', zeroMeansUndefined: true, defined: (t: CompactionConfig) => @@ -91,7 +88,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.numShards', - label: 'Num shards', type: 'number', zeroMeansUndefined: true, defined: (t: CompactionConfig) => @@ -113,7 +109,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.partitionDimensions', - label: 'Partition dimensions', type: 'string-array', placeholder: '(all dimensions)', defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed', @@ -122,7 +117,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ // partitionsSpec type: single_dim { name: 'tuningConfig.partitionsSpec.partitionDimension', - label: 'Partition dimension', type: 'string', defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim', @@ -131,7 +125,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.targetRowsPerSegment', - label: 'Target rows per segment', type: 'number', zeroMeansUndefined: true, defined: (t: CompactionConfig) => @@ -149,7 +142,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.maxRowsPerSegment', - label: 'Max rows per segment', type: 'number', zeroMeansUndefined: true, defined: (t: CompactionConfig) => @@ -162,7 +154,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ }, { name: 'tuningConfig.partitionsSpec.assumeGrouped', - label: 'Assume grouped', type: 'boolean', defaultValue: false, defined: (t: CompactionConfig) => @@ -174,9 +165,22 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [

), }, + { + name: 'inputSegmentSizeBytes', + type: 'number', + defaultValue: 419430400, + info: ( +

+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must + be processed in its entirety, if the segments for a particular time chunk have a total size + in bytes greater than this parameter, compaction will not run for that time chunk. Because + each compaction task runs with a single thread, setting this value too far above 1–2GB will + result in compaction tasks taking an excessive amount of time. +

+ ), + }, { name: 'tuningConfig.maxNumConcurrentSubTasks', - label: 'Max num concurrent sub tasks', type: 'number', defaultValue: 1, min: 1, @@ -190,23 +194,8 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ ), }, - { - name: 'inputSegmentSizeBytes', - type: 'number', - defaultValue: 419430400, - info: ( -

- Maximum number of total segment bytes processed per compaction task. Since a time chunk must - be processed in its entirety, if the segments for a particular time chunk have a total size - in bytes greater than this parameter, compaction will not run for that time chunk. Because - each compaction task runs with a single thread, setting this value too far above 1–2GB will - result in compaction tasks taking an excessive amount of time. -

- ), - }, { name: 'tuningConfig.totalNumMergeTasks', - label: 'Total num merge tasks', type: 'number', defaultValue: 10, min: 1, @@ -215,12 +204,12 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ info: <>Maximum number of merge tasks which can be run at the same time., }, { - name: 'tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask', - label: 'Max input segment bytes per task', + name: 'tuningConfig.splitHintSpec.maxSplitSize', type: 'number', - defaultValue: 500000000, + defaultValue: 1073741824, min: 1000000, - adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'segments'), + hideInMore: true, + adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'maxSize'), info: ( <> Maximum number of bytes of input segments to process in a single task. If a single segment @@ -229,4 +218,23 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [ ), }, + { + name: 'tuningConfig.splitHintSpec.maxNumFiles', + label: 'Max num files (segments)', + type: 'number', + defaultValue: 1000, + min: 1, + hideInMore: true, + adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'maxSize'), + info: ( + <> + Maximum number of input segments to process in a single subtask. This limit is to avoid task + failures when the ingestion spec is too long. There are two known limits on the max size of + serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + jute.maxbuffer) and the max packet size in MySQL ( + max_allowed_packet). These can make ingestion tasks fail if the serialized + ingestion spec size hits one of them. + + ), + }, ]; diff --git a/web-console/src/druid-models/filter.tsx b/web-console/src/druid-models/filter.tsx index 10791294063e..5c11784ec69e 100644 --- a/web-console/src/druid-models/filter.tsx +++ b/web-console/src/druid-models/filter.tsx @@ -16,9 +16,14 @@ * limitations under the License. */ -import { Field } from '../components'; +import React from 'react'; + +import { ExternalLink, Field } from '../components'; +import { getLink } from '../links'; import { deepGet, EMPTY_ARRAY, oneOf } from '../utils'; +import { IngestionSpec } from './ingestion-spec'; + export type DruidFilter = Record; export interface DimensionFiltersWithRest { @@ -30,12 +35,17 @@ export function splitFilter(filter: DruidFilter | null): DimensionFiltersWithRes const inputAndFilters: DruidFilter[] = filter ? filter.type === 'and' && Array.isArray(filter.fields) ? filter.fields - : [filter] + : filter.type !== 'true' + ? [filter] + : EMPTY_ARRAY : EMPTY_ARRAY; + const dimensionFilters: DruidFilter[] = inputAndFilters.filter( - f => typeof f.dimension === 'string', + f => typeof getFilterDimension(f) === 'string', + ); + const restFilters: DruidFilter[] = inputAndFilters.filter( + f => typeof getFilterDimension(f) !== 'string', ); - const restFilters: DruidFilter[] = inputAndFilters.filter(f => typeof f.dimension !== 'string'); return { dimensionFilters, @@ -59,16 +69,24 @@ export function joinFilter( return { type: 'and', fields: newFields }; } +export function getFilterDimension(filter: DruidFilter): string | undefined { + if (typeof filter.dimension === 'string') return filter.dimension; + if (filter.type === 'not' && filter.field) return getFilterDimension(filter.field); + return; +} + +export const KNOWN_FILTER_TYPES = ['selector', 'in', 'interval', 'regex', 'like', 'not']; + export const FILTER_FIELDS: Field[] = [ { name: 'type', type: 'string', - suggestions: ['selector', 'in', 'regex', 'like', 'not'], + suggestions: KNOWN_FILTER_TYPES, }, { name: 'dimension', type: 'string', - defined: (df: DruidFilter) => oneOf(df.type, 'selector', 'in', 'regex', 'like'), + defined: (df: DruidFilter) => oneOf(df.type, 'selector', 'in', 'interval', 'regex', 'like'), }, { name: 'value', @@ -80,6 +98,12 @@ export const FILTER_FIELDS: Field[] = [ type: 'string-array', defined: (df: DruidFilter) => df.type === 'in', }, + { + name: 'intervals', + type: 'string-array', + defined: (df: DruidFilter) => df.type === 'interval', + placeholder: 'ex: 2020-01-01/2020-06-01', + }, { name: 'pattern', type: 'string', @@ -90,7 +114,7 @@ export const FILTER_FIELDS: Field[] = [ name: 'field.type', label: 'Sub-filter type', type: 'string', - suggestions: ['selector', 'in', 'regex', 'like'], + suggestions: ['selector', 'in', 'interval', 'regex', 'like'], defined: (df: DruidFilter) => df.type === 'not', }, { @@ -111,6 +135,13 @@ export const FILTER_FIELDS: Field[] = [ type: 'string-array', defined: (df: DruidFilter) => df.type === 'not' && deepGet(df, 'field.type') === 'in', }, + { + name: 'field.intervals', + label: 'Sub-filter intervals', + type: 'string-array', + defined: (df: DruidFilter) => df.type === 'not' && deepGet(df, 'field.type') === 'interval', + placeholder: 'ex: 2020-01-01/2020-06-01', + }, { name: 'field.pattern', label: 'Sub-filter pattern', @@ -119,3 +150,27 @@ export const FILTER_FIELDS: Field[] = [ df.type === 'not' && oneOf(deepGet(df, 'field.type'), 'regex', 'like'), }, ]; + +export const FILTERS_FIELDS: Field[] = [ + { + name: 'spec.dataSchema.transformSpec.filter', + type: 'json', + height: '350px', + placeholder: '{ "type": "true" }', + info: ( + <> +

+ A Druid{' '} + + JSON filter expression + {' '} + to apply to the data. +

+

+ Note that only the value that match the filter will be included. If you want to remove + some data values you must negate the filter. +

+ + ), + }, +]; diff --git a/web-console/src/druid-models/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec.spec.ts index e698128888e8..27b8aac64805 100644 --- a/web-console/src/druid-models/ingestion-spec.spec.ts +++ b/web-console/src/druid-models/ingestion-spec.spec.ts @@ -45,7 +45,6 @@ describe('ingestion-spec', () => { dataSchema: { dataSource: 'wikipedia', granularitySpec: { - type: 'uniform', segmentGranularity: 'day', queryGranularity: 'hour', rollup: true, @@ -183,7 +182,6 @@ describe('spec utils', () => { dataSchema: { dataSource: 'wikipedia', granularitySpec: { - type: 'uniform', segmentGranularity: 'day', queryGranularity: 'hour', }, @@ -207,9 +205,14 @@ describe('spec utils', () => { }); it('updateSchemaWithSample', () => { - expect( - updateSchemaWithSample(ingestionSpec, { header: ['header'], rows: [] }, 'specific', true), - ).toMatchInlineSnapshot(` + const withRollup = updateSchemaWithSample( + ingestionSpec, + { header: ['header'], rows: [] }, + 'specific', + true, + ); + + expect(withRollup).toMatchInlineSnapshot(` Object { "spec": Object { "dataSchema": Object { @@ -223,7 +226,6 @@ describe('spec utils', () => { "queryGranularity": "hour", "rollup": true, "segmentGranularity": "day", - "type": "uniform", }, "metricsSpec": Array [ Object { @@ -249,6 +251,60 @@ describe('spec utils', () => { "type": "index_parallel", }, "tuningConfig": Object { + "forceGuaranteedRollup": true, + "partitionsSpec": Object { + "type": "hashed", + }, + "type": "index_parallel", + }, + }, + "type": "index_parallel", + } + `); + + const noRollup = updateSchemaWithSample( + ingestionSpec, + { header: ['header'], rows: [] }, + 'specific', + false, + ); + + expect(noRollup).toMatchInlineSnapshot(` + Object { + "spec": Object { + "dataSchema": Object { + "dataSource": "wikipedia", + "dimensionsSpec": Object { + "dimensions": Array [ + "header", + ], + }, + "granularitySpec": Object { + "queryGranularity": "none", + "rollup": false, + "segmentGranularity": "day", + }, + "timestampSpec": Object { + "column": "timestamp", + "format": "iso", + }, + }, + "ioConfig": Object { + "inputFormat": Object { + "type": "json", + }, + "inputSource": Object { + "type": "http", + "uris": Array [ + "https://static.imply.io/data/wikipedia.json.gz", + ], + }, + "type": "index_parallel", + }, + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "dynamic", + }, "type": "index_parallel", }, }, diff --git a/web-console/src/druid-models/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec.tsx index 73963da1589f..d4c9bdca89fe 100644 --- a/web-console/src/druid-models/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec.tsx @@ -19,13 +19,14 @@ import { Code } from '@blueprintjs/core'; import React from 'react'; -import { ExternalLink, Field } from '../components'; +import { AutoForm, ExternalLink, Field } from '../components'; import { getLink } from '../links'; import { deepDelete, deepGet, deepMove, deepSet, + deepSetIfUnset, EMPTY_ARRAY, EMPTY_OBJECT, filterMap, @@ -225,6 +226,22 @@ export function getRequiredModule(ingestionType: IngestionComboTypeWithExtra): s } } +export function getIssueWithSpec(spec: IngestionSpec): string | undefined { + if (!deepGet(spec, 'spec.dataSchema.dataSource')) { + return 'missing spec.dataSchema.dataSource'; + } + + if (!deepGet(spec, 'spec.dataSchema.timestampSpec')) { + return 'missing spec.dataSchema.timestampSpec'; + } + + if (!deepGet(spec, 'spec.dataSchema.dimensionsSpec')) { + return 'missing spec.dataSchema.dimensionsSpec'; + } + + return; +} + // -------------- export interface DataSchema { @@ -290,11 +307,9 @@ export function normalizeSpec(spec: Partial): IngestionSpec { deepGet(spec, 'spec.tuningConfig.type'); if (!specType) return spec as IngestionSpec; - if (!deepGet(spec, 'type')) spec = deepSet(spec, 'type', specType); - if (!deepGet(spec, 'spec.ioConfig.type')) spec = deepSet(spec, 'spec.ioConfig.type', specType); - if (!deepGet(spec, 'spec.tuningConfig.type')) { - spec = deepSet(spec, 'spec.tuningConfig.type', specType); - } + spec = deepSetIfUnset(spec, 'type', specType); + spec = deepSetIfUnset(spec, 'spec.ioConfig.type', specType); + spec = deepSetIfUnset(spec, 'spec.tuningConfig.type', specType); return spec as IngestionSpec; } @@ -1235,7 +1250,7 @@ function basenameFromFilename(filename: string): string | undefined { export function fillDataSourceNameIfNeeded(spec: IngestionSpec): IngestionSpec { const possibleName = guessDataSourceName(spec); if (!possibleName) return spec; - return deepSet(spec, 'spec.dataSchema.dataSource', possibleName); + return deepSetIfUnset(spec, 'spec.dataSchema.dataSource', possibleName); } export function guessDataSourceName(spec: IngestionSpec): string | undefined { @@ -1340,57 +1355,81 @@ export interface PartitionsSpec { assumeGrouped?: boolean; } -export function adjustTuningConfig(tuningConfig: TuningConfig) { - const tuningConfigType = deepGet(tuningConfig, 'type'); - if (tuningConfigType !== 'index_parallel') return tuningConfig; +export function adjustForceGuaranteedRollup(spec: IngestionSpec) { + if (getSpecType(spec) !== 'index_parallel') return spec; - const partitionsSpecType = deepGet(tuningConfig, 'partitionsSpec.type') || 'dynamic'; + const partitionsSpecType = deepGet(spec, 'spec.tuningConfig.partitionsSpec.type') || 'dynamic'; if (partitionsSpecType === 'dynamic') { - tuningConfig = deepDelete(tuningConfig, 'forceGuaranteedRollup'); + spec = deepDelete(spec, 'spec.tuningConfig.forceGuaranteedRollup'); } else if (oneOf(partitionsSpecType, 'hashed', 'single_dim')) { - tuningConfig = deepSet(tuningConfig, 'forceGuaranteedRollup', true); + spec = deepSet(spec, 'spec.tuningConfig.forceGuaranteedRollup', true); } - return tuningConfig; + return spec; } -export function invalidTuningConfig(tuningConfig: TuningConfig, intervals: any): boolean { - if (tuningConfig.type !== 'index_parallel') return false; - - switch (deepGet(tuningConfig, 'partitionsSpec.type')) { - case 'hashed': - if (!intervals) return true; - return ( - Boolean(deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment')) && - Boolean(deepGet(tuningConfig, 'partitionsSpec.numShards')) - ); - - case 'single_dim': - if (!intervals) return true; - if (!deepGet(tuningConfig, 'partitionsSpec.partitionDimension')) return true; - const hasTargetRowsPerSegment = Boolean( - deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment'), - ); - const hasMaxRowsPerSegment = Boolean( - deepGet(tuningConfig, 'partitionsSpec.maxRowsPerSegment'), - ); - if (hasTargetRowsPerSegment === hasMaxRowsPerSegment) { - return true; - } - } - - return false; +export function invalidPartitionConfig(spec: IngestionSpec): boolean { + return ( + // Bad primary partitioning, or... + !deepGet(spec, 'spec.dataSchema.granularitySpec.segmentGranularity') || + // Bad secondary partitioning + Boolean(AutoForm.issueWithModel(spec, getSecondaryPartitionRelatedFormFields(spec, undefined))) + ); } -export function getPartitionRelatedTuningSpecFormFields( - specType: IngestionType, +export const PRIMARY_PARTITION_RELATED_FORM_FIELDS: Field[] = [ + { + name: 'spec.dataSchema.granularitySpec.segmentGranularity', + label: 'Segment granularity', + type: 'string', + suggestions: ['hour', 'day', 'week', 'month', 'year'], + required: true, + info: ( + <> + The granularity to create time chunks at. Multiple segments can be created per time chunk. + For example, with 'DAY' segmentGranularity, the events of the same day fall into the same + time chunk which can be optionally further partitioned into multiple segments based on other + configurations and input size. + + ), + }, + { + name: 'spec.dataSchema.granularitySpec.intervals', + label: 'Time intervals', + type: 'string-array', + placeholder: '(auto determine)', + defined: s => getSpecType(s) === 'index_parallel', + info: ( + <> +

+ A list of intervals describing what time chunks of segments should be created. This list + will be broken up and rounded-off based on the segmentGranularity. +

+

+ If not provided, batch ingestion tasks will generally determine which time chunks to + output based on what timestamps are found in the input data. +

+

+ If specified, batch ingestion tasks may be able to skip a determining-partitions phase, + which can result in faster ingestion. Batch ingestion tasks may also be able to request + all their locks up-front instead of one by one. Batch ingestion tasks will throw away any + records with timestamps outside of the specified intervals. +

+ + ), + }, +]; + +export function getSecondaryPartitionRelatedFormFields( + spec: IngestionSpec, dimensionSuggestions: string[] | undefined, -): Field[] { +): Field[] { + const specType = getSpecType(spec); switch (specType) { case 'index_parallel': return [ { - name: 'partitionsSpec.type', + name: 'spec.tuningConfig.partitionsSpec.type', label: 'Partitioning type', type: 'string', required: true, @@ -1402,38 +1441,46 @@ export function getPartitionRelatedTuningSpecFormFields( single dimension). For best-effort rollup, you should use dynamic.

), - adjustment: (t: TuningConfig) => { - if (!Array.isArray(dimensionSuggestions) || !dimensionSuggestions.length) return t; - return deepSet(t, 'partitionsSpec.partitionDimension', dimensionSuggestions[0]); + adjustment: s => { + if ( + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') !== 'single_dim' || + !Array.isArray(dimensionSuggestions) || + !dimensionSuggestions.length + ) { + return s; + } + + return deepSet( + s, + 'spec.tuningConfig.partitionsSpec.partitionDimension', + dimensionSuggestions[0], + ); }, }, // partitionsSpec type: dynamic { - name: 'partitionsSpec.maxRowsPerSegment', - label: 'Max rows per segment', + name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment', type: 'number', defaultValue: 5000000, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'dynamic', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic', info: <>Determines how many rows are in each segment., }, { - name: 'partitionsSpec.maxTotalRows', - label: 'Max total rows', + name: 'spec.tuningConfig.partitionsSpec.maxTotalRows', type: 'number', defaultValue: 20000000, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'dynamic', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic', info: <>Total number of rows in segments waiting for being pushed., }, // partitionsSpec type: hashed { - name: 'partitionsSpec.targetRowsPerSegment', - label: 'Target rows per segment', + name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment', type: 'number', zeroMeansUndefined: true, defaultValue: 5000000, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'hashed' && - !deepGet(t, 'partitionsSpec.numShards'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.numShards'), info: ( <>

@@ -1449,14 +1496,13 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.numShards', - label: 'Num shards', + name: 'spec.tuningConfig.partitionsSpec.numShards', type: 'number', zeroMeansUndefined: true, hideInMore: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'hashed' && - !deepGet(t, 'partitionsSpec.targetRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'), info: ( <>

@@ -1472,19 +1518,17 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.partitionDimensions', - label: 'Partition dimensions', + name: 'spec.tuningConfig.partitionsSpec.partitionDimensions', type: 'string-array', placeholder: '(all dimensions)', - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'hashed', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed', info:

The dimensions to partition on. Leave blank to select all dimensions.

, }, // partitionsSpec type: single_dim { - name: 'partitionsSpec.partitionDimension', - label: 'Partition dimension', + name: 'spec.tuningConfig.partitionsSpec.partitionDimension', type: 'string', - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim', required: true, suggestions: dimensionSuggestions, info: ( @@ -1501,16 +1545,15 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.targetRowsPerSegment', - label: 'Target rows per segment', + name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'single_dim' && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), - required: (t: TuningConfig) => - !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), + required: s => + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), info: (

Target number of rows to include in a partition, should be a number that targets @@ -1519,24 +1562,23 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.maxRowsPerSegment', - label: 'Max rows per segment', + name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'single_dim' && - !deepGet(t, 'partitionsSpec.targetRowsPerSegment'), - required: (t: TuningConfig) => - !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'), + required: s => + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), info:

Maximum number of rows to include in a partition.

, }, { - name: 'partitionsSpec.assumeGrouped', - label: 'Assume grouped', + name: 'spec.tuningConfig.partitionsSpec.assumeGrouped', type: 'boolean', defaultValue: false, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + hideInMore: true, + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim', info: (

Assume that input data has already been grouped on time and dimensions. Ingestion will @@ -1550,13 +1592,13 @@ export function getPartitionRelatedTuningSpecFormFields( case 'kinesis': return [ { - name: 'maxRowsPerSegment', + name: 'spec.tuningConfig.maxRowsPerSegment', type: 'number', defaultValue: 5000000, info: <>Determines how many rows are in each segment., }, { - name: 'maxTotalRows', + name: 'spec.tuningConfig.maxTotalRows', type: 'number', defaultValue: 20000000, info: <>Total number of rows in segments waiting for being pushed., @@ -1567,13 +1609,13 @@ export function getPartitionRelatedTuningSpecFormFields( throw new Error(`unknown spec type ${specType}`); } -const TUNING_CONFIG_FORM_FIELDS: Field[] = [ +const TUNING_FORM_FIELDS: Field[] = [ { - name: 'maxNumConcurrentSubTasks', + name: 'spec.tuningConfig.maxNumConcurrentSubTasks', type: 'number', defaultValue: 1, min: 1, - defined: (t: TuningConfig) => t.type === 'index_parallel', + defined: s => s.type === 'index_parallel', info: ( <> Maximum number of tasks which can be run at the same time. The supervisor task would spawn @@ -1585,41 +1627,41 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'maxRetry', + name: 'spec.tuningConfig.maxRetry', type: 'number', defaultValue: 3, - defined: (t: TuningConfig) => t.type === 'index_parallel', + defined: s => s.type === 'index_parallel', hideInMore: true, info: <>Maximum number of retries on task failures., }, { - name: 'taskStatusCheckPeriodMs', + name: 'spec.tuningConfig.taskStatusCheckPeriodMs', type: 'number', defaultValue: 1000, - defined: (t: TuningConfig) => t.type === 'index_parallel', + defined: s => s.type === 'index_parallel', hideInMore: true, info: <>Polling period in milliseconds to check running task statuses., }, { - name: 'totalNumMergeTasks', + name: 'spec.tuningConfig.totalNumMergeTasks', type: 'number', defaultValue: 10, min: 1, - defined: (t: TuningConfig) => + defined: s => Boolean( - t.type === 'index_parallel' && - oneOf(deepGet(t, 'partitionsSpec.type'), 'hashed', 'single_dim'), + s.type === 'index_parallel' && + oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'hashed', 'single_dim'), ), info: <>Number of tasks to merge partial segments after shuffle., }, { - name: 'maxNumSegmentsToMerge', + name: 'spec.tuningConfig.maxNumSegmentsToMerge', type: 'number', defaultValue: 100, - defined: (t: TuningConfig) => + defined: s => Boolean( - t.type === 'index_parallel' && - oneOf(deepGet(t, 'partitionsSpec.type'), 'hashed', 'single_dim'), + s.type === 'index_parallel' && + oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'hashed', 'single_dim'), ), info: ( <> @@ -1628,22 +1670,22 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'maxRowsInMemory', + name: 'spec.tuningConfig.maxRowsInMemory', type: 'number', defaultValue: 1000000, info: <>Used in determining when intermediate persists to disk should occur., }, { - name: 'maxBytesInMemory', + name: 'spec.tuningConfig.maxBytesInMemory', type: 'number', placeholder: 'Default: 1/6 of max JVM memory', info: <>Used in determining when intermediate persists to disk should occur., }, { - name: 'resetOffsetAutomatically', + name: 'spec.tuningConfig.resetOffsetAutomatically', type: 'boolean', defaultValue: false, - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), info: ( <> Whether to reset the consumer offset if the next offset that it is trying to fetch is less @@ -1652,10 +1694,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'skipSequenceNumberAvailabilityCheck', + name: 'spec.tuningConfig.skipSequenceNumberAvailabilityCheck', type: 'boolean', defaultValue: false, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', info: ( <> Whether to enable checking if the current sequence number is still available in a particular @@ -1665,17 +1707,17 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'intermediatePersistPeriod', + name: 'spec.tuningConfig.intermediatePersistPeriod', type: 'duration', defaultValue: 'PT10M', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), info: <>The period that determines the rate at which intermediate persists occur., }, { - name: 'intermediateHandoffPeriod', + name: 'spec.tuningConfig.intermediateHandoffPeriod', type: 'duration', defaultValue: 'P2147483647D', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), info: ( <> How often the tasks should hand off segments. Handoff will happen either if @@ -1685,7 +1727,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'maxPendingPersists', + name: 'spec.tuningConfig.maxPendingPersists', type: 'number', hideInMore: true, info: ( @@ -1697,7 +1739,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'pushTimeout', + name: 'spec.tuningConfig.pushTimeout', type: 'number', defaultValue: 0, hideInMore: true, @@ -1708,15 +1750,15 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'handoffConditionTimeout', + name: 'spec.tuningConfig.handoffConditionTimeout', type: 'number', defaultValue: 0, - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), hideInMore: true, info: <>Milliseconds to wait for segment handoff. 0 means to wait forever., }, { - name: 'indexSpec.bitmap.type', + name: 'spec.tuningConfig.indexSpec.bitmap.type', label: 'Index bitmap type', type: 'string', defaultValue: 'roaring', @@ -1725,7 +1767,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ info: <>Compression format for bitmap indexes., }, { - name: 'indexSpec.dimensionCompression', + name: 'spec.tuningConfig.indexSpec.dimensionCompression', label: 'Index dimension compression', type: 'string', defaultValue: 'lz4', @@ -1734,7 +1776,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ info: <>Compression format for dimension columns., }, { - name: 'indexSpec.metricCompression', + name: 'spec.tuningConfig.indexSpec.metricCompression', label: 'Index metric compression', type: 'string', defaultValue: 'lz4', @@ -1743,7 +1785,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ info: <>Compression format for primitive type metric columns., }, { - name: 'indexSpec.longEncoding', + name: 'spec.tuningConfig.indexSpec.longEncoding', label: 'Index long encoding', type: 'string', defaultValue: 'longs', @@ -1759,43 +1801,79 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'chatHandlerTimeout', + name: 'spec.tuningConfig.splitHintSpec.maxSplitSize', + type: 'number', + defaultValue: 1073741824, + min: 1000000, + defined: s => + s.type === 'index_parallel' && deepGet(s, 'spec.ioConfig.inputFormat.type') !== 'http', + hideInMore: true, + adjustment: s => deepSet(s, 'splitHintSpec.type', 'maxSize'), + info: ( + <> + Maximum number of bytes of input files to process in a single subtask. If a single file is + larger than this number, it will be processed by itself in a single subtask (Files are never + split across tasks yet). + + ), + }, + { + name: 'spec.tuningConfig.splitHintSpec.maxNumFiles', + type: 'number', + defaultValue: 1000, + min: 1, + defined: s => s.type === 'index_parallel', + hideInMore: true, + adjustment: s => deepSet(s, 'splitHintSpec.type', 'maxSize'), + info: ( + <> + Maximum number of input files to process in a single subtask. This limit is to avoid task + failures when the ingestion spec is too long. There are two known limits on the max size of + serialized ingestion spec, i.e., the max ZNode size in ZooKeeper ( + jute.maxbuffer) and the max packet size in MySQL ( + max_allowed_packet). These can make ingestion tasks fail if the serialized + ingestion spec size hits one of them. + + ), + }, + { + name: 'spec.tuningConfig.chatHandlerTimeout', type: 'duration', defaultValue: 'PT10S', - defined: (t: TuningConfig) => t.type === 'index_parallel', + defined: s => s.type === 'index_parallel', hideInMore: true, info: <>Timeout for reporting the pushed segments in worker tasks., }, { - name: 'chatHandlerNumRetries', + name: 'spec.tuningConfig.chatHandlerNumRetries', type: 'number', defaultValue: 5, - defined: (t: TuningConfig) => t.type === 'index_parallel', + defined: s => s.type === 'index_parallel', hideInMore: true, info: <>Retries for reporting the pushed segments in worker tasks., }, { - name: 'workerThreads', + name: 'spec.tuningConfig.workerThreads', type: 'number', placeholder: 'min(10, taskCount)', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), info: ( <>The number of threads that will be used by the supervisor for asynchronous operations. ), }, { - name: 'chatThreads', + name: 'spec.tuningConfig.chatThreads', type: 'number', placeholder: 'min(10, taskCount * replicas)', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), hideInMore: true, info: <>The number of threads that will be used for communicating with indexing tasks., }, { - name: 'chatRetries', + name: 'spec.tuningConfig.chatRetries', type: 'number', defaultValue: 8, - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), hideInMore: true, info: ( <> @@ -1805,17 +1883,17 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'httpTimeout', + name: 'spec.tuningConfig.httpTimeout', type: 'duration', defaultValue: 'PT10S', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), info: <>How long to wait for a HTTP response from an indexing task., }, { - name: 'shutdownTimeout', + name: 'spec.tuningConfig.shutdownTimeout', type: 'duration', defaultValue: 'PT80S', - defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'), + defined: s => oneOf(s.type, 'kafka', 'kinesis'), hideInMore: true, info: ( <> @@ -1824,10 +1902,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'offsetFetchPeriod', + name: 'spec.tuningConfig.offsetFetchPeriod', type: 'duration', defaultValue: 'PT30S', - defined: (t: TuningConfig) => t.type === 'kafka', + defined: s => s.type === 'kafka', info: ( <> How often the supervisor queries Kafka and the indexing tasks to fetch current offsets and @@ -1836,10 +1914,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'recordBufferSize', + name: 'spec.tuningConfig.recordBufferSize', type: 'number', defaultValue: 10000, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', info: ( <> Size of the buffer (number of events) used between the Kinesis fetch threads and the main @@ -1848,10 +1926,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'recordBufferOfferTimeout', + name: 'spec.tuningConfig.recordBufferOfferTimeout', type: 'number', defaultValue: 5000, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', hideInMore: true, info: ( <> @@ -1861,11 +1939,11 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'recordBufferFullWait', + name: 'spec.tuningConfig.recordBufferFullWait', hideInMore: true, type: 'number', defaultValue: 5000, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', info: ( <> Length of time in milliseconds to wait for the buffer to drain before attempting to fetch @@ -1874,10 +1952,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'fetchSequenceNumberTimeout', + name: 'spec.tuningConfig.fetchSequenceNumberTimeout', type: 'number', defaultValue: 60000, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', hideInMore: true, info: ( <> @@ -1889,10 +1967,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'fetchThreads', + name: 'spec.tuningConfig.fetchThreads', type: 'number', placeholder: 'max(1, {numProcessors} - 1)', - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', hideInMore: true, info: ( <> @@ -1902,10 +1980,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'maxRecordsPerPoll', + name: 'spec.tuningConfig.maxRecordsPerPoll', type: 'number', defaultValue: 100, - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', hideInMore: true, info: ( <> @@ -1915,10 +1993,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ ), }, { - name: 'repartitionTransitionDuration', + name: 'spec.tuningConfig.repartitionTransitionDuration', type: 'duration', defaultValue: 'PT2M', - defined: (t: TuningConfig) => t.type === 'kinesis', + defined: s => s.type === 'kinesis', hideInMore: true, info: ( <> @@ -1939,8 +2017,8 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [ }, ]; -export function getTuningSpecFormFields() { - return TUNING_CONFIG_FORM_FIELDS; +export function getTuningFormFields() { + return TUNING_FORM_FIELDS; } export interface IndexSpec { @@ -2006,7 +2084,8 @@ export function issueWithSampleData(sampleData: string[]): JSX.Element | undefin return; } -export function fillInputFormat(spec: IngestionSpec, sampleData: string[]): IngestionSpec { +export function fillInputFormatIfNeeded(spec: IngestionSpec, sampleData: string[]): IngestionSpec { + if (deepGet(spec, 'spec.ioConfig.inputFormat.type')) return spec; return deepSet(spec, 'spec.ioConfig.inputFormat', guessInputFormat(sampleData)); } @@ -2115,6 +2194,7 @@ export function updateSchemaWithSample( headerAndRows: HeaderAndRows, dimensionMode: DimensionMode, rollup: boolean, + forcePartitionInitialization = false, ): IngestionSpec { const typeHints = getTypeHintsFromSpec(spec); @@ -2144,6 +2224,19 @@ export function updateSchemaWithSample( newSpec = deepDelete(newSpec, 'spec.dataSchema.metricsSpec'); } + if ( + getSpecType(newSpec) === 'index_parallel' && + (!deepGet(newSpec, 'spec.tuningConfig.partitionsSpec') || forcePartitionInitialization) + ) { + newSpec = adjustForceGuaranteedRollup( + deepSet( + newSpec, + 'spec.tuningConfig.partitionsSpec', + rollup ? { type: 'hashed' } : { type: 'dynamic' }, + ), + ); + } + newSpec = deepSet(newSpec, 'spec.dataSchema.granularitySpec.rollup', rollup); return newSpec; } diff --git a/web-console/src/druid-models/lookup-spec.tsx b/web-console/src/druid-models/lookup-spec.tsx index c9e0e5a13bde..31405f311719 100644 --- a/web-console/src/druid-models/lookup-spec.tsx +++ b/web-console/src/druid-models/lookup-spec.tsx @@ -104,8 +104,8 @@ export const LOOKUP_FIELDS: Field[] = [ // cachedNamespace lookups have more options { name: 'extractionNamespace.type', - type: 'string', label: 'Globally cached lookup type', + type: 'string', placeholder: 'uri', suggestions: ['uri', 'jdbc'], defined: (model: LookupSpec) => model.type === 'cachedNamespace', @@ -113,8 +113,8 @@ export const LOOKUP_FIELDS: Field[] = [ }, { name: 'extractionNamespace.uriPrefix', - type: 'string', label: 'URI prefix', + type: 'string', placeholder: 's3://bucket/some/key/prefix/', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -145,8 +145,8 @@ export const LOOKUP_FIELDS: Field[] = [ }, { name: 'extractionNamespace.fileRegex', - type: 'string', label: 'File regex', + type: 'string', defaultValue: '.*', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -157,8 +157,8 @@ export const LOOKUP_FIELDS: Field[] = [ // namespaceParseSpec { name: 'extractionNamespace.namespaceParseSpec.format', - type: 'string', label: 'Parse format', + type: 'string', suggestions: ['csv', 'tsv', 'simpleJson', 'customJson'], defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri', required: true, @@ -177,7 +177,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.skipHeaderRows', type: 'number', - label: 'Skip header rows', defaultValue: 0, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -187,7 +186,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.hasHeaderRow', type: 'boolean', - label: 'Has header row', defaultValue: false, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -197,7 +195,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.columns', type: 'string-array', - label: 'Columns', placeholder: `["key", "value"]`, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -209,7 +206,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.keyColumn', type: 'string', - label: 'Key column', placeholder: '(optional - defaults to the first column)', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -219,7 +215,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.valueColumn', type: 'string', - label: 'Value column', placeholder: '(optional - defaults to the second column)', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -231,7 +226,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.delimiter', type: 'string', - label: 'Delimiter', placeholder: `(optional)`, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -240,7 +234,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.listDelimiter', type: 'string', - label: 'List delimiter', placeholder: `(optional)`, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -251,7 +244,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.keyFieldName', type: 'string', - label: 'Key field name', placeholder: `key`, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -261,7 +253,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespaceParseSpec.valueFieldName', type: 'string', - label: 'Value field name', placeholder: `value`, defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri' && @@ -271,7 +262,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.pollPeriod', type: 'string', - label: 'Poll period', defaultValue: '0', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri', info: `Period between polling for updates`, @@ -281,7 +271,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.namespace', type: 'string', - label: 'Namespace', placeholder: 'some_lookup', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', required: true, @@ -297,8 +286,8 @@ export const LOOKUP_FIELDS: Field[] = [ }, { name: 'extractionNamespace.connectorConfig.connectURI', - type: 'string', label: 'Connect URI', + type: 'string', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', required: true, info: 'Defines the connectURI value on the The connector config to used', @@ -306,28 +295,24 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.connectorConfig.user', type: 'string', - label: 'User', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', info: 'Defines the user to be used by the connector config', }, { name: 'extractionNamespace.connectorConfig.password', type: 'string', - label: 'Password', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', info: 'Defines the password to be used by the connector config', }, { name: 'extractionNamespace.connectorConfig.createTables', type: 'boolean', - label: 'Create tables', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', info: 'Should tables be created', }, { name: 'extractionNamespace.table', type: 'string', - label: 'Table', placeholder: 'some_lookup_table', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', required: true, @@ -347,7 +332,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.keyColumn', type: 'string', - label: 'Key column', placeholder: 'my_key_value', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', required: true, @@ -367,7 +351,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.valueColumn', type: 'string', - label: 'Value column', placeholder: 'my_column_value', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', required: true, @@ -387,7 +370,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'extractionNamespace.filter', type: 'string', - label: 'Filter', placeholder: '(optional)', defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc', info: ( @@ -427,7 +409,6 @@ export const LOOKUP_FIELDS: Field[] = [ { name: 'firstCacheTimeout', type: 'number', - label: 'First cache timeout', defaultValue: 0, defined: (model: LookupSpec) => model.type === 'cachedNamespace', info: `How long to wait (in ms) for the first run of the cache to populate. 0 indicates to not wait`, diff --git a/web-console/src/singletons/api.spec.ts b/web-console/src/singletons/api.spec.ts index 64429af4023f..8cb8581b1736 100644 --- a/web-console/src/singletons/api.spec.ts +++ b/web-console/src/singletons/api.spec.ts @@ -21,6 +21,6 @@ import { Api } from './api'; describe('Api', () => { it('escapes stuff', () => { expect(Api.encodePath('wikipedia')).toEqual('wikipedia'); - expect(Api.encodePath('wi%ki?pe#dia')).toEqual('wi%25ki%3Fpe%23dia'); + expect(Api.encodePath(`wi%ki?pe#dia&'[]`)).toEqual('wi%25ki%3Fpe%23dia%26%27%5B%5D'); }); }); diff --git a/web-console/src/singletons/api.ts b/web-console/src/singletons/api.ts index a05adf324610..7a05bdd398c8 100644 --- a/web-console/src/singletons/api.ts +++ b/web-console/src/singletons/api.ts @@ -46,6 +46,14 @@ export class Api { } static encodePath(path: string): string { - return path.replace(/[?#%]/g, encodeURIComponent); + return path.replace( + /[?#%&'\[\]]/g, + c => + '%' + + c + .charCodeAt(0) + .toString(16) + .toUpperCase(), + ); } } diff --git a/web-console/src/utils/object-change.ts b/web-console/src/utils/object-change.ts index 7ff7d5e2fc8f..99e3166f9ca8 100644 --- a/web-console/src/utils/object-change.ts +++ b/web-console/src/utils/object-change.ts @@ -83,6 +83,11 @@ export function deepSet>(value: T, path: string, x return valueCopy; } +export function deepSetIfUnset>(value: T, path: string, x: any): T { + if (typeof deepGet(value, path) !== 'undefined') return value; + return deepSet(value, path, x); +} + export function deepSetMulti>( value: T, changes: Record, diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts index 5f8b97eb34e5..a279adc63ad5 100644 --- a/web-console/src/utils/sampler.ts +++ b/web-console/src/utils/sampler.ts @@ -276,6 +276,7 @@ export async function sampleForConnect( ioConfig = deepSet(ioConfig, 'inputFormat', { type: 'regex', pattern: '(.*)', + listDelimiter: '56616469-6de2-9da4-efb8-8f416e6e6965', // Just a UUID to disable the list delimiter, let's hope we do not see this UUID in the data columns: ['raw'], }); } diff --git a/web-console/src/utils/utils.spec.ts b/web-console/src/utils/utils.spec.ts index 31e2f89ab224..117ba069d3a9 100644 --- a/web-console/src/utils/utils.spec.ts +++ b/web-console/src/utils/utils.spec.ts @@ -40,7 +40,6 @@ describe('utils', () => { dataSchema: { dataSource: 'wikipedia', granularitySpec: { - type: 'uniform', segmentGranularity: 'day', queryGranularity: 'hour', }, @@ -91,7 +90,6 @@ describe('utils', () => { "granularitySpec": Object { "queryGranularity": "hour", "segmentGranularity": "day", - "type": "uniform", }, "timestampSpec": Object { "column": "timestamp", diff --git a/web-console/src/views/load-data-view/__snapshots__/load-data-view.spec.tsx.snap b/web-console/src/views/load-data-view/__snapshots__/load-data-view.spec.tsx.snap index ef5cec674717..26f4700e1e50 100644 --- a/web-console/src/views/load-data-view/__snapshots__/load-data-view.spec.tsx.snap +++ b/web-console/src/views/load-data-view/__snapshots__/load-data-view.spec.tsx.snap @@ -173,13 +173,13 @@ exports[`load data view matches snapshot 1`] = `

- -

- Please specify where your raw data is located -

-
+ + +

+ Please specify where your raw data is located +

+
+
`; diff --git a/web-console/src/views/load-data-view/filter-table/filter-table.spec.tsx b/web-console/src/views/load-data-view/filter-table/filter-table.spec.tsx index 0c635b17947a..f023124877a4 100644 --- a/web-console/src/views/load-data-view/filter-table/filter-table.spec.tsx +++ b/web-console/src/views/load-data-view/filter-table/filter-table.spec.tsx @@ -39,7 +39,6 @@ describe('filter table', () => { columnFilter="" dimensionFilters={[]} selectedFilterName={undefined} - onShowGlobalFilter={() => {}} onFilterSelect={() => {}} /> ); diff --git a/web-console/src/views/load-data-view/filter-table/filter-table.tsx b/web-console/src/views/load-data-view/filter-table/filter-table.tsx index db0bddc3eedd..e9da226aa35e 100644 --- a/web-console/src/views/load-data-view/filter-table/filter-table.tsx +++ b/web-console/src/views/load-data-view/filter-table/filter-table.tsx @@ -21,7 +21,7 @@ import React from 'react'; import ReactTable from 'react-table'; import { TableCell } from '../../../components'; -import { DruidFilter } from '../../../druid-models'; +import { DruidFilter, getFilterDimension } from '../../../druid-models'; import { caseInsensitiveContains, filterMap } from '../../../utils'; import { HeaderAndRows, SampleEntry } from '../../../utils/sampler'; @@ -42,19 +42,11 @@ export interface FilterTableProps { columnFilter: string; dimensionFilters: DruidFilter[]; selectedFilterName: string | undefined; - onShowGlobalFilter: () => void; onFilterSelect: (filter: DruidFilter, index: number) => void; } export const FilterTable = React.memo(function FilterTable(props: FilterTableProps) { - const { - sampleData, - columnFilter, - dimensionFilters, - selectedFilterName, - onShowGlobalFilter, - onFilterSelect, - } = props; + const { sampleData, columnFilter, dimensionFilters, selectedFilterName, onFilterSelect } = props; return ( { if (!caseInsensitiveContains(columnName, columnFilter)) return; const timestamp = columnName === '__time'; - const filterIndex = dimensionFilters.findIndex(f => f.dimension === columnName); + const filterIndex = dimensionFilters.findIndex(f => getFilterDimension(f) === columnName); const filter = dimensionFilters[filterIndex]; const columnClassName = classNames({ @@ -73,14 +65,17 @@ export const FilterTable = React.memo(function FilterTable(props: FilterTablePro return { Header: (
{ - if (timestamp) { - onShowGlobalFilter(); - } else if (filter) { + if (filter) { onFilterSelect(filter, filterIndex); } else { - onFilterSelect({ type: 'selector', dimension: columnName, value: '' }, -1); + onFilterSelect( + timestamp + ? { type: 'interval', dimension: columnName, intervals: [] } + : { type: 'selector', dimension: columnName, value: '' }, + -1, + ); } }} > diff --git a/web-console/src/views/load-data-view/info-messages.tsx b/web-console/src/views/load-data-view/info-messages.tsx index 217c84200caf..ee53ae77b3e4 100644 --- a/web-console/src/views/load-data-view/info-messages.tsx +++ b/web-console/src/views/load-data-view/info-messages.tsx @@ -16,7 +16,7 @@ * limitations under the License. */ -import { Callout, Code } from '@blueprintjs/core'; +import { Callout, Code, FormGroup } from '@blueprintjs/core'; import React from 'react'; import { ExternalLink } from '../../components'; @@ -34,22 +34,26 @@ export const ConnectMessage = React.memo(function ConnectMessage(props: ConnectM const { inlineMode, spec } = props; return ( - -

- Druid ingests raw data and converts it into a custom,{' '} - indexed format{' '} - that is optimized for analytic queries. -

- {inlineMode ? ( - <> -

To get started, please paste some data in the box to the left.

-

Click "Apply" to verify your data with Druid.

- - ) : ( -

To get started, please specify what data you want to ingest.

- )} - -
+ + +

+ Druid ingests raw data and converts it into a custom,{' '} + + indexed format + {' '} + that is optimized for analytic queries. +

+ {inlineMode ? ( + <> +

To get started, please paste some data in the box to the left.

+

Click "Apply" to verify your data with Druid.

+ + ) : ( +

To get started, please specify what data you want to ingest.

+ )} + +
+
); }); @@ -61,69 +65,78 @@ export const ParserMessage = React.memo(function ParserMessage(props: ParserMess const { canFlatten } = props; return ( - -

- Druid requires flat data (non-nested, non-hierarchical). Each row should represent a - discrete event. -

- {canFlatten && ( + +

- If you have nested data, you can{' '} - - flatten - {' '} - it here. If the provided flattening capabilities are not sufficient, please pre-process - your data before ingesting it into Druid. + Druid requires flat data (non-nested, non-hierarchical). Each row should represent a + discrete event.

- )} -

Ensure that your data appears correctly in a row/column orientation.

- -
+ {canFlatten && ( +

+ If you have nested data, you can{' '} + + flatten + {' '} + it here. If the provided flattening capabilities are not sufficient, please pre-process + your data before ingesting it into Druid. +

+ )} +

Ensure that your data appears correctly in a row/column orientation.

+ +
+ ); }); export const TimestampMessage = React.memo(function TimestampMessage() { return ( - -

- Druid partitions data based on the primary time column of your data. This column is stored - internally in Druid as __time. -

-

Configure how to define the time column for this data.

-

- If your data does not have a time column, you can select None to use a - placeholder value. If the time information is spread across multiple columns you can combine - them into one by selecting Expression and defining a transform expression. -

- -
+ + +

+ Druid partitions data based on the primary time column of your data. This column is stored + internally in Druid as __time. +

+

Configure how to define the time column for this data.

+

+ If your data does not have a time column, you can select None to use a + placeholder value. If the time information is spread across multiple columns you can + combine them into one by selecting Expression and defining a transform + expression. +

+ +
+
); }); export const TransformMessage = React.memo(function TransformMessage() { return ( - -

- Druid can perform per-row{' '} - - transforms - {' '} - of column values allowing you to create new derived columns or alter existing column. -

- -
+ + +

+ Druid can perform per-row{' '} + + transforms + {' '} + of column values allowing you to create new derived columns or alter existing column. +

+ +
+
); }); export const FilterMessage = React.memo(function FilterMessage() { return ( - -

- Druid can filter out unwanted data by applying per-row{' '} - filters. -

- -
+ + +

+ Druid can filter out unwanted data by applying per-row{' '} + filters. +

+ +
+
); }); @@ -135,57 +148,68 @@ export const SchemaMessage = React.memo(function SchemaMessage(props: SchemaMess const { dimensionMode } = props; return ( - -

- Each column in Druid must have an assigned type (string, long, float, double, complex, etc). -

- {dimensionMode === 'specific' && ( + +

- Default primitive types have been automatically assigned to your columns. If you want to - change the type, click on the column header. + Each column in Druid must have an assigned type (string, long, float, double, complex, + etc).

- )} - -
+ {dimensionMode === 'specific' && ( +

+ Default primitive types have been automatically assigned to your columns. If you want to + change the type, click on the column header. +

+ )} + +
+ ); }); export const PartitionMessage = React.memo(function PartitionMessage() { return ( - -

Configure how Druid will partition data.

- -
+ + +

Configure how Druid will partition data.

+ +
+
); }); export const TuningMessage = React.memo(function TuningMessage() { return ( - -

Fine tune how Druid will ingest data.

- -
+ + +

Fine tune how Druid will ingest data.

+ +
+
); }); export const PublishMessage = React.memo(function PublishMessage() { return ( - -

Configure behavior of indexed data once it reaches Druid.

-
+ + +

Configure behavior of indexed data once it reaches Druid.

+
+
); }); export const SpecMessage = React.memo(function SpecMessage() { return ( - -

- Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any values - in this view, the values entered in previous sections will update accordingly. If you modify - any values in previous sections, this spec will automatically update. -

-

Submit the spec to begin loading data into Druid.

- -
+ + +

+ Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any + values in this view, the values entered in previous sections will update accordingly. If + you modify any values in previous sections, this spec will automatically update. +

+

Submit the spec to begin loading data into Druid.

+ +
+
); }); diff --git a/web-console/src/views/load-data-view/load-data-view.scss b/web-console/src/views/load-data-view/load-data-view.scss index bbc8627e8d16..8c4ec58797ba 100644 --- a/web-console/src/views/load-data-view/load-data-view.scss +++ b/web-console/src/views/load-data-view/load-data-view.scss @@ -277,12 +277,8 @@ $actual-icon-height: 400px; overflow: auto; padding: 0 5px; - .intro { - margin-bottom: 15px; - - .optional { - font-style: italic; - } + .optional { + font-style: italic; } } diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index edf29b6d5c77..ff7db4d6d2e1 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -61,14 +61,18 @@ import { CONSTANT_TIMESTAMP_SPEC_FIELDS, DIMENSION_SPEC_FIELDS, FILTER_FIELDS, + FILTERS_FIELDS, FLATTEN_FIELD_FIELDS, getDimensionSpecName, + getIssueWithSpec, getMetricSpecName, getTimestampExpressionFields, getTimestampSchema, INPUT_FORMAT_FIELDS, issueWithSampleData, + KNOWN_FILTER_TYPES, METRIC_SPEC_FIELDS, + PRIMARY_PARTITION_RELATED_FORM_FIELDS, removeTimestampTransform, TIMESTAMP_SPEC_FIELDS, TimestampSpec, @@ -77,14 +81,14 @@ import { updateSchemaWithSample, } from '../../druid-models'; import { - adjustTuningConfig, + adjustForceGuaranteedRollup, cleanSpec, computeFlattenPathsForData, DimensionMode, DimensionSpec, DruidFilter, fillDataSourceNameIfNeeded, - fillInputFormat, + fillInputFormatIfNeeded, FlattenField, getDimensionMode, getIngestionComboType, @@ -92,18 +96,17 @@ import { getIngestionTitle, getIoConfigFormFields, getIoConfigTuningFormFields, - getPartitionRelatedTuningSpecFormFields, getRequiredModule, getRollup, + getSecondaryPartitionRelatedFormFields, getSpecType, - getTuningSpecFormFields, - GranularitySpec, + getTuningFormFields, IngestionComboTypeWithExtra, IngestionSpec, InputFormat, inputFormatCanFlatten, invalidIoConfig, - invalidTuningConfig, + invalidPartitionConfig, IoConfig, isDruidSource, isEmptyIngestionSpec, @@ -341,7 +344,6 @@ export interface LoadDataViewState { filterQueryState: QueryState; selectedFilterIndex: number; selectedFilter?: DruidFilter; - showGlobalFilter: boolean; newFilterValue?: Record; // for schema @@ -399,7 +401,6 @@ export class LoadDataView extends React.PureComponent { - this.setState(({ specPreview }) => { + this.setState(({ spec, specPreview }) => { localStorageSet(LocalStorageKeys.INGESTION_SPEC, JSON.stringify(specPreview)); - return { spec: specPreview }; + return { spec: spec === specPreview ? Object.assign({}, specPreview) : specPreview }; // If applying again, make a shallow copy to force a refresh }); }; @@ -762,7 +756,11 @@ export class LoadDataView extends React.PureComponent
- {welcomeMessage && {welcomeMessage}} + {welcomeMessage && ( + + {welcomeMessage} + + )} {this.renderWelcomeStepControls()} {!isEmptyIngestionSpec(spec) && (
- {!showGlobalFilter && this.renderColumnFilterControls()} - {!selectedFilter && this.renderGlobalFilterControls()} + {!selectedFilter && ( + <> + + {this.renderApplyButtonBar(filterQueryState, undefined)} + +
{this.renderNextBar({})} ); } - private onShowGlobalFilter = () => { - this.setState({ showGlobalFilter: true }); - }; - private onFilterSelect = (filter: DruidFilter, index: number) => { this.setState({ selectedFilterIndex: index, @@ -2125,6 +2137,7 @@ export class LoadDataView extends React.PureComponent { this.setState({ @@ -2133,129 +2146,48 @@ export class LoadDataView extends React.PureComponent - this.setState({ selectedFilter: f })} - showCustom={f => !oneOf(f.type, 'selector', 'in', 'regex', 'like', 'not')} - /> -
-
- - ); - } else { - return ( - + return ( +
+ this.setState({ selectedFilter: f })} + showCustom={f => !KNOWN_FILTER_TYPES.includes(f.type)} + /> +
+ )}
- ); - } else { - return ( - -