Assume that input data has already been grouped on time and dimensions. Ingestion will
@@ -1546,17 +1550,41 @@ export function getPartitionRelatedTuningSpecFormFields(
},
];
+ if (oneOf(deepGet(spec, 'spec.tuningConfig.partitionsSpec.type'), 'hashed', 'single_dim')) {
+ parallelFields.push({
+ name: 'spec.dataSchema.granularitySpec.intervals',
+ label: 'Time intervals',
+ type: 'string-array',
+ placeholder: 'ex: 2018-01-01/2018-06-01',
+ hideInMore: true,
+ info: (
+ <>
+
A comma separated list of intervals for the raw data being ingested.
+
+ This list is used to determine the shards that will be created. If it is not
+ specified then then an additional job will run to automatically determine the data
+ intervals used.
+
+ >
+ ),
+ });
+ }
+
+ return parallelFields;
+
case 'kafka':
case 'kinesis':
return [
{
- name: 'maxRowsPerSegment',
+ name: 'spec.tuningConfig.maxRowsPerSegment',
+ label: 'Max rows per segment',
type: 'number',
defaultValue: 5000000,
info: <>Determines how many rows are in each segment.>,
},
{
- name: 'maxTotalRows',
+ name: 'spec.tuningConfig.maxTotalRows',
+ label: 'Max total rows',
type: 'number',
defaultValue: 20000000,
info: <>Total number of rows in segments waiting for being pushed.>,
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 75b427a3e7de..31bb54ca4970 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -2214,7 +2214,7 @@ export class LoadDataView extends React.PureComponent this.updateSpec(s)}
+ onChange={this.updateSpec}
/>
this.updateSpec(s)}
+ onChange={this.updateSpec}
/>
)}
this.updateSpec(deepSet(spec, 'spec.dataSchema.granularitySpec', g))}
/>
- {!isStreaming && (
-
- ['hashed', 'single_dim'].includes(
- deepGet(spec, 'spec.tuningConfig.partitionsSpec.type'),
- ),
- info: <>A comma separated list of intervals for the raw data being ingested.>,
- },
- ]}
- model={spec}
- onChange={s => this.updateSpec(s)}
- />
- )}
From 14b8f7f00040271e36f115b27944b39a72ef74bf Mon Sep 17 00:00:00 2001
From: Vadim Ogievetsky
Date: Sun, 6 Dec 2020 15:26:27 -0800
Subject: [PATCH 02/14] don't set redundant fields
---
.../src/druid-models/ingestion-spec.tsx | 88 +++++++++++--------
web-console/src/utils/object-change.ts | 5 ++
.../views/load-data-view/load-data-view.tsx | 63 ++++---------
3 files changed, 73 insertions(+), 83 deletions(-)
diff --git a/web-console/src/druid-models/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec.tsx
index bfae30420c1d..2a93825b9aab 100644
--- a/web-console/src/druid-models/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec.tsx
@@ -19,13 +19,14 @@
import { Code } from '@blueprintjs/core';
import React from 'react';
-import { ExternalLink, Field } from '../components';
+import { AutoForm, ExternalLink, Field } from '../components';
import { getLink } from '../links';
import {
deepDelete,
deepGet,
deepMove,
deepSet,
+ deepSetIfUnset,
EMPTY_ARRAY,
EMPTY_OBJECT,
filterMap,
@@ -290,11 +291,9 @@ export function normalizeSpec(spec: Partial): IngestionSpec {
deepGet(spec, 'spec.tuningConfig.type');
if (!specType) return spec as IngestionSpec;
- if (!deepGet(spec, 'type')) spec = deepSet(spec, 'type', specType);
- if (!deepGet(spec, 'spec.ioConfig.type')) spec = deepSet(spec, 'spec.ioConfig.type', specType);
- if (!deepGet(spec, 'spec.tuningConfig.type')) {
- spec = deepSet(spec, 'spec.tuningConfig.type', specType);
- }
+ spec = deepSetIfUnset(spec, 'type', specType);
+ spec = deepSetIfUnset(spec, 'spec.ioConfig.type', specType);
+ spec = deepSetIfUnset(spec, 'spec.tuningConfig.type', specType);
return spec as IngestionSpec;
}
@@ -1340,9 +1339,8 @@ export interface PartitionsSpec {
assumeGrouped?: boolean;
}
-export function adjustTuningConfig(spec: IngestionSpec) {
- const tuningConfigType = deepGet(spec, 'spec.tuningConfig.type');
- if (tuningConfigType !== 'index_parallel') return spec;
+export function adjustForceGuaranteedRollup(spec: IngestionSpec) {
+ if (getSpecType(spec) !== 'index_parallel') return spec;
const partitionsSpecType = deepGet(spec, 'spec.tuningConfig.partitionsSpec.type') || 'dynamic';
if (partitionsSpecType === 'dynamic') {
@@ -1354,37 +1352,38 @@ export function adjustTuningConfig(spec: IngestionSpec) {
return spec;
}
-export function invalidTuningConfig(tuningConfig: TuningConfig): boolean {
- if (tuningConfig.type !== 'index_parallel') return false;
-
- switch (deepGet(tuningConfig, 'partitionsSpec.type')) {
- case 'hashed':
- return (
- Boolean(deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment')) &&
- Boolean(deepGet(tuningConfig, 'partitionsSpec.numShards'))
- );
-
- case 'single_dim':
- if (!deepGet(tuningConfig, 'partitionsSpec.partitionDimension')) return true;
- const hasTargetRowsPerSegment = Boolean(
- deepGet(tuningConfig, 'partitionsSpec.targetRowsPerSegment'),
- );
- const hasMaxRowsPerSegment = Boolean(
- deepGet(tuningConfig, 'partitionsSpec.maxRowsPerSegment'),
- );
- if (hasTargetRowsPerSegment === hasMaxRowsPerSegment) {
- return true;
- }
- }
-
- return false;
+export function invalidPartitionConfig(spec: IngestionSpec): boolean {
+ return (
+ // Bad primary partitioning, or...
+ !deepGet(spec, 'spec.dataSchema.granularitySpec.segmentGranularity') ||
+ // Bad secondary partitioning
+ Boolean(AutoForm.issueWithModel(spec, getSecondaryPartitionRelatedFormFields(spec, undefined)))
+ );
}
-export function getPartitionRelatedTuningSpecFormFields(
+export const PRIMARY_PARTITION_RELATED_FORM_FIELDS: Field[] = [
+ {
+ name: 'spec.dataSchema.granularitySpec.segmentGranularity',
+ type: 'string',
+ suggestions: ['hour', 'day', 'week', 'month', 'year'],
+ defined: s => deepGet(s, 'spec.dataSchema.granularitySpec.type') === 'uniform',
+ required: true,
+ info: (
+ <>
+ The granularity to create time chunks at. Multiple segments can be created per time chunk.
+ For example, with 'DAY' segmentGranularity, the events of the same day fall into the same
+ time chunk which can be optionally further partitioned into multiple segments based on other
+ configurations and input size.
+ >
+ ),
+ },
+];
+
+export function getSecondaryPartitionRelatedFormFields(
spec: IngestionSpec,
dimensionSuggestions: string[] | undefined,
): Field[] {
- const specType = getSpecType(spec) || 'index_parallel';
+ const specType = getSpecType(spec);
switch (specType) {
case 'index_parallel':
const parallelFields: Field[] = [
@@ -1402,7 +1401,14 @@ export function getPartitionRelatedTuningSpecFormFields(
),
adjustment: s => {
- if (!Array.isArray(dimensionSuggestions) || !dimensionSuggestions.length) return s;
+ if (
+ deepGet(s, 'spec.tuningConfig.partitionsSpec.type') !== 'single_dim' ||
+ !Array.isArray(dimensionSuggestions) ||
+ !dimensionSuggestions.length
+ ) {
+ return s;
+ }
+
return deepSet(
s,
'spec.tuningConfig.partitionsSpec.partitionDimension',
@@ -2172,6 +2178,16 @@ export function updateSchemaWithSample(
newSpec = deepDelete(newSpec, 'spec.dataSchema.metricsSpec');
}
+ if (getSpecType(newSpec) === 'index_parallel') {
+ newSpec = adjustForceGuaranteedRollup(
+ deepSet(
+ newSpec,
+ 'spec.tuningConfig.partitionsSpec',
+ rollup ? { type: 'hashed' } : { type: 'dynamic' },
+ ),
+ );
+ }
+
newSpec = deepSet(newSpec, 'spec.dataSchema.granularitySpec.rollup', rollup);
return newSpec;
}
diff --git a/web-console/src/utils/object-change.ts b/web-console/src/utils/object-change.ts
index 7ff7d5e2fc8f..99e3166f9ca8 100644
--- a/web-console/src/utils/object-change.ts
+++ b/web-console/src/utils/object-change.ts
@@ -83,6 +83,11 @@ export function deepSet>(value: T, path: string, x
return valueCopy;
}
+export function deepSetIfUnset>(value: T, path: string, x: any): T {
+ if (typeof deepGet(value, path) !== 'undefined') return value;
+ return deepSet(value, path, x);
+}
+
export function deepSetMulti>(
value: T,
changes: Record,
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 31bb54ca4970..15e7df035fce 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -68,6 +68,7 @@ import {
INPUT_FORMAT_FIELDS,
issueWithSampleData,
METRIC_SPEC_FIELDS,
+ PRIMARY_PARTITION_RELATED_FORM_FIELDS,
removeTimestampTransform,
TIMESTAMP_SPEC_FIELDS,
TimestampSpec,
@@ -76,7 +77,7 @@ import {
updateSchemaWithSample,
} from '../../druid-models';
import {
- adjustTuningConfig,
+ adjustForceGuaranteedRollup,
cleanSpec,
computeFlattenPathsForData,
DimensionMode,
@@ -91,18 +92,17 @@ import {
getIngestionTitle,
getIoConfigFormFields,
getIoConfigTuningFormFields,
- getPartitionRelatedTuningSpecFormFields,
getRequiredModule,
getRollup,
+ getSecondaryPartitionRelatedFormFields,
getSpecType,
getTuningSpecFormFields,
- GranularitySpec,
IngestionComboTypeWithExtra,
IngestionSpec,
InputFormat,
inputFormatCanFlatten,
invalidIoConfig,
- invalidTuningConfig,
+ invalidPartitionConfig,
IoConfig,
isDruidSource,
isEmptyIngestionSpec,
@@ -125,6 +125,7 @@ import {
deepDelete,
deepGet,
deepSet,
+ deepSetIfUnset,
deepSetMulti,
EMPTY_ARRAY,
EMPTY_OBJECT,
@@ -509,9 +510,9 @@ export class LoadDataView extends React.PureComponent {
- this.setState(({ specPreview }) => {
+ this.setState(({ spec, specPreview }) => {
localStorageSet(LocalStorageKeys.INGESTION_SPEC, JSON.stringify(specPreview));
- return { spec: specPreview };
+ return { spec: spec === specPreview ? Object.assign({}, specPreview) : specPreview }; // If applying again, make a shallow copy to force a refresh
});
};
@@ -1886,17 +1887,16 @@ export class LoadDataView extends React.PureComponent
{this.renderNextBar({
disabled: !schemaQueryState.data,
- onNextStep: () => {
- let newSpec = spec;
- if (rollup) {
- newSpec = deepSet(newSpec, 'spec.tuningConfig.partitionsSpec', { type: 'hashed' });
- newSpec = deepSet(newSpec, 'spec.tuningConfig.forceGuaranteedRollup', true);
- } else {
- newSpec = deepSet(newSpec, 'spec.tuningConfig.partitionsSpec', { type: 'dynamic' });
- newSpec = deepDelete(newSpec, 'spec.tuningConfig.forceGuaranteedRollup');
- }
-
- this.updateSpec(newSpec);
- return true;
- },
})}
>
);
@@ -2920,8 +2907,6 @@ export class LoadDataView extends React.PureComponent
Primary partitioning (by time)
g.type === 'uniform',
- required: true,
- info: (
- <>
- The granularity to create time chunks at. Multiple segments can be created per
- time chunk. For example, with 'DAY' segmentGranularity, the events of the same
- day fall into the same time chunk which can be optionally further partitioned
- into multiple segments based on other configurations and input size.
- >
- ),
- },
- ]}
- model={granularitySpec}
- onChange={g => this.updateSpec(deepSet(spec, 'spec.dataSchema.granularitySpec', g))}
+ fields={PRIMARY_PARTITION_RELATED_FORM_FIELDS}
+ model={spec}
+ onChange={this.updateSpec}
/>
- A comma separated list of intervals for the raw data being ingested.>,
- },
- ]}
- model={spec}
- onChange={this.updateSpec}
- />
-
- this.setState({ newFilterValue: f })}
- height="200px"
- />
-
-
A comma separated list of intervals for the raw data being ingested.
-
- This list is used to determine the shards that will be created. If it is not
- specified then then an additional job will run to automatically determine the data
- intervals used.
-
- Druid requires flat data (non-nested, non-hierarchical). Each row should represent a
- discrete event.
-
- {canFlatten && (
+
+
- If you have nested data, you can{' '}
-
- flatten
- {' '}
- it here. If the provided flattening capabilities are not sufficient, please pre-process
- your data before ingesting it into Druid.
+ Druid requires flat data (non-nested, non-hierarchical). Each row should represent a
+ discrete event.
- )}
-
Ensure that your data appears correctly in a row/column orientation.
-
-
+ {canFlatten && (
+
+ If you have nested data, you can{' '}
+
+ flatten
+ {' '}
+ it here. If the provided flattening capabilities are not sufficient, please pre-process
+ your data before ingesting it into Druid.
+
+ )}
+
Ensure that your data appears correctly in a row/column orientation.
- Druid partitions data based on the primary time column of your data. This column is stored
- internally in Druid as __time.
-
-
Configure how to define the time column for this data.
-
- If your data does not have a time column, you can select None to use a
- placeholder value. If the time information is spread across multiple columns you can combine
- them into one by selecting Expression and defining a transform expression.
-
-
-
+
+
+
+ Druid partitions data based on the primary time column of your data. This column is stored
+ internally in Druid as __time.
+
+
Configure how to define the time column for this data.
+
+ If your data does not have a time column, you can select None to use a
+ placeholder value. If the time information is spread across multiple columns you can
+ combine them into one by selecting Expression and defining a transform
+ expression.
+
- Each column in Druid must have an assigned type (string, long, float, double, complex, etc).
-
- {dimensionMode === 'specific' && (
+
+
- Default primitive types have been automatically assigned to your columns. If you want to
- change the type, click on the column header.
+ Each column in Druid must have an assigned type (string, long, float, double, complex,
+ etc).
- )}
-
-
+ {dimensionMode === 'specific' && (
+
+ Default primitive types have been automatically assigned to your columns. If you want to
+ change the type, click on the column header.
+
- Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any values
- in this view, the values entered in previous sections will update accordingly. If you modify
- any values in previous sections, this spec will automatically update.
-
-
Submit the spec to begin loading data into Druid.
-
-
+
+
+
+ Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any
+ values in this view, the values entered in previous sections will update accordingly. If
+ you modify any values in previous sections, this spec will automatically update.
+
Your partitioning and sorting configuration does not make sense.
-
- For best performance the first dimension in your schema (
- {firstDimensionName}), which is what the data will be primarily sorted on,
- should match the partitioning dimension ({partitionDimension}).
-
Your partitioning and sorting configuration does not make sense.
+
+ For best performance the first dimension in your schema (
+ {firstDimensionName}), which is what the data will be primarily sorted
+ on, should match the partitioning dimension ({partitionDimension}).
+
+
+
);
}
@@ -2923,6 +2930,15 @@ export class LoadDataView extends React.PureComponent
{nonsensicalSingleDimPartitioningMessage}
+ {settingIntervalsWouldSpeedUpIngestion(spec) && (
+
+
+ You can make this ingestion run slightly faster by explicitly specifying the time
+ intervals for this dataset from the{' '}
+ this.updateStep('filter')}>Filter step.
+
+
+ )}
Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
+
,
+ "name": "inputSegmentSizeBytes",
+ "type": "number",
+ },
Object {
"defaultValue": 1,
"info":
Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion.
,
- "label": "Max num concurrent sub tasks",
"min": 1,
"name": "tuningConfig.maxNumConcurrentSubTasks",
"type": "number",
},
- Object {
- "defaultValue": 419430400,
- "info":
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
-
,
- "name": "inputSegmentSizeBytes",
- "type": "number",
- },
Object {
"defaultValue": 10,
"defined": [Function],
"info":
Maximum number of merge tasks which can be run at the same time.
,
- "label": "Total num merge tasks",
"min": 1,
"name": "tuningConfig.totalNumMergeTasks",
"type": "number",
},
Object {
"adjustment": [Function],
- "defaultValue": 500000000,
+ "defaultValue": 1073741824,
+ "hideInMore": true,
"info":
Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks).
,
- "label": "Max input segment bytes per task",
"min": 1000000,
- "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask",
+ "name": "tuningConfig.splitHintSpec.maxSplitSize",
+ "type": "number",
+ },
+ Object {
+ "adjustment": [Function],
+ "defaultValue": 1000,
+ "hideInMore": true,
+ "info":
+ Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+
+ jute.maxbuffer
+
+ ) and the max packet size in MySQL (
+
+ max_allowed_packet
+
+ ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them.
+ ,
+ "label": "Max num files (segments)",
+ "min": 1,
+ "name": "tuningConfig.splitHintSpec.maxNumFiles",
"type": "number",
},
]
@@ -303,7 +312,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"info":
Determines how many rows are in each segment.
,
- "label": "Max rows per segment",
"name": "tuningConfig.partitionsSpec.maxRowsPerSegment",
"type": "number",
},
@@ -313,7 +321,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"info":
Total number of rows in segments waiting for being pushed.
,
- "label": "Max total rows",
"name": "tuningConfig.partitionsSpec.maxTotalRows",
"type": "number",
},
@@ -327,7 +334,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.
,
- "label": "Target rows per segment",
"name": "tuningConfig.partitionsSpec.targetRowsPerSegment",
"type": "number",
"zeroMeansUndefined": true,
@@ -342,7 +348,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.
,
- "label": "Num shards",
"name": "tuningConfig.partitionsSpec.numShards",
"type": "number",
"zeroMeansUndefined": true,
@@ -352,7 +357,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"info":
The dimensions to partition on. Leave blank to select all dimensions.
Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
,
- "label": "Target rows per segment",
"name": "tuningConfig.partitionsSpec.targetRowsPerSegment",
"required": [Function],
"type": "number",
@@ -383,7 +385,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"info":
Maximum number of rows to include in a partition.
,
- "label": "Max rows per segment",
"name": "tuningConfig.partitionsSpec.maxRowsPerSegment",
"required": [Function],
"type": "number",
@@ -395,48 +396,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti
"info":
Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
+
,
+ "name": "inputSegmentSizeBytes",
+ "type": "number",
+ },
Object {
"defaultValue": 1,
"info":
Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion.
,
- "label": "Max num concurrent sub tasks",
"min": 1,
"name": "tuningConfig.maxNumConcurrentSubTasks",
"type": "number",
},
- Object {
- "defaultValue": 419430400,
- "info":
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
-
,
- "name": "inputSegmentSizeBytes",
- "type": "number",
- },
Object {
"defaultValue": 10,
"defined": [Function],
"info":
Maximum number of merge tasks which can be run at the same time.
,
- "label": "Total num merge tasks",
"min": 1,
"name": "tuningConfig.totalNumMergeTasks",
"type": "number",
},
Object {
"adjustment": [Function],
- "defaultValue": 500000000,
+ "defaultValue": 1073741824,
+ "hideInMore": true,
"info":
Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks).
,
- "label": "Max input segment bytes per task",
"min": 1000000,
- "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask",
+ "name": "tuningConfig.splitHintSpec.maxSplitSize",
+ "type": "number",
+ },
+ Object {
+ "adjustment": [Function],
+ "defaultValue": 1000,
+ "hideInMore": true,
+ "info":
+ Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+
+ jute.maxbuffer
+
+ ) and the max packet size in MySQL (
+
+ max_allowed_packet
+
+ ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them.
+ ,
+ "label": "Max num files (segments)",
+ "min": 1,
+ "name": "tuningConfig.splitHintSpec.maxNumFiles",
"type": "number",
},
]
@@ -543,7 +561,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
"info":
Determines how many rows are in each segment.
,
- "label": "Max rows per segment",
"name": "tuningConfig.partitionsSpec.maxRowsPerSegment",
"type": "number",
},
@@ -553,7 +570,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
"info":
Total number of rows in segments waiting for being pushed.
,
- "label": "Max total rows",
"name": "tuningConfig.partitionsSpec.maxTotalRows",
"type": "number",
},
@@ -567,7 +583,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.
,
- "label": "Target rows per segment",
"name": "tuningConfig.partitionsSpec.targetRowsPerSegment",
"type": "number",
"zeroMeansUndefined": true,
@@ -582,7 +597,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.
,
- "label": "Num shards",
"name": "tuningConfig.partitionsSpec.numShards",
"type": "number",
"zeroMeansUndefined": true,
@@ -592,7 +606,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
"info":
The dimensions to partition on. Leave blank to select all dimensions.
Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
,
- "label": "Target rows per segment",
"name": "tuningConfig.partitionsSpec.targetRowsPerSegment",
"required": [Function],
"type": "number",
@@ -623,7 +634,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
"info":
Maximum number of rows to include in a partition.
,
- "label": "Max rows per segment",
"name": "tuningConfig.partitionsSpec.maxRowsPerSegment",
"required": [Function],
"type": "number",
@@ -635,48 +645,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par
"info":
Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
+
,
+ "name": "inputSegmentSizeBytes",
+ "type": "number",
+ },
Object {
"defaultValue": 1,
"info":
Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion.
,
- "label": "Max num concurrent sub tasks",
"min": 1,
"name": "tuningConfig.maxNumConcurrentSubTasks",
"type": "number",
},
- Object {
- "defaultValue": 419430400,
- "info":
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
-
,
- "name": "inputSegmentSizeBytes",
- "type": "number",
- },
Object {
"defaultValue": 10,
"defined": [Function],
"info":
Maximum number of merge tasks which can be run at the same time.
,
- "label": "Total num merge tasks",
"min": 1,
"name": "tuningConfig.totalNumMergeTasks",
"type": "number",
},
Object {
"adjustment": [Function],
- "defaultValue": 500000000,
+ "defaultValue": 1073741824,
+ "hideInMore": true,
"info":
Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks).
,
- "label": "Max input segment bytes per task",
"min": 1000000,
- "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask",
+ "name": "tuningConfig.splitHintSpec.maxSplitSize",
+ "type": "number",
+ },
+ Object {
+ "adjustment": [Function],
+ "defaultValue": 1000,
+ "hideInMore": true,
+ "info":
+ Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+
+ jute.maxbuffer
+
+ ) and the max packet size in MySQL (
+
+ max_allowed_packet
+
+ ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them.
+ ,
+ "label": "Max num files (segments)",
+ "min": 1,
+ "name": "tuningConfig.splitHintSpec.maxNumFiles",
"type": "number",
},
]
@@ -783,7 +810,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
"info":
Determines how many rows are in each segment.
,
- "label": "Max rows per segment",
"name": "tuningConfig.partitionsSpec.maxRowsPerSegment",
"type": "number",
},
@@ -793,7 +819,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
"info":
Total number of rows in segments waiting for being pushed.
,
- "label": "Max total rows",
"name": "tuningConfig.partitionsSpec.maxTotalRows",
"type": "number",
},
@@ -807,7 +832,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
A target row count for each partition. Each partition will have a row count close to the target assuming evenly distributed keys. Defaults to 5 million if numShards is null.
,
- "label": "Target rows per segment",
"name": "tuningConfig.partitionsSpec.targetRowsPerSegment",
"type": "number",
"zeroMeansUndefined": true,
@@ -822,7 +846,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
Directly specify the number of shards to create. If this is specified and 'intervals' is specified in the granularitySpec, the index task can skip the determine intervals/partitions pass through the data.
,
- "label": "Num shards",
"name": "tuningConfig.partitionsSpec.numShards",
"type": "number",
"zeroMeansUndefined": true,
@@ -832,7 +855,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = `
"info":
The dimensions to partition on. Leave blank to select all dimensions.
Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
+
,
+ "name": "inputSegmentSizeBytes",
+ "type": "number",
+ },
Object {
"defaultValue": 1,
"info":
Maximum number of tasks which can be run at the same time. The supervisor task would spawn worker tasks up to maxNumConcurrentSubTasks regardless of the available task slots. If this value is set to 1, the supervisor task processes data ingestion on its own instead of spawning worker tasks. If this value is set to too large, too many worker tasks can be created which might block other ingestion.
,
- "label": "Max num concurrent sub tasks",
"min": 1,
"name": "tuningConfig.maxNumConcurrentSubTasks",
"type": "number",
},
- Object {
- "defaultValue": 419430400,
- "info":
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time.
-
,
- "name": "inputSegmentSizeBytes",
- "type": "number",
- },
Object {
"defaultValue": 10,
"defined": [Function],
"info":
Maximum number of merge tasks which can be run at the same time.
,
- "label": "Total num merge tasks",
"min": 1,
"name": "tuningConfig.totalNumMergeTasks",
"type": "number",
},
Object {
"adjustment": [Function],
- "defaultValue": 500000000,
+ "defaultValue": 1073741824,
+ "hideInMore": true,
"info":
Maximum number of bytes of input segments to process in a single task. If a single segment is larger than this number, it will be processed by itself in a single task (input segments are never split across tasks).
,
- "label": "Max input segment bytes per task",
"min": 1000000,
- "name": "tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask",
+ "name": "tuningConfig.splitHintSpec.maxSplitSize",
+ "type": "number",
+ },
+ Object {
+ "adjustment": [Function],
+ "defaultValue": 1000,
+ "hideInMore": true,
+ "info":
+ Maximum number of input segments to process in a single subtask. This limit is to avoid task failures when the ingestion spec is too long. There are two known limits on the max size of serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+
+ jute.maxbuffer
+
+ ) and the max packet size in MySQL (
+
+ max_allowed_packet
+
+ ). These can make ingestion tasks fail if the serialized ingestion spec size hits one of them.
+ ,
+ "label": "Max num files (segments)",
+ "min": 1,
+ "name": "tuningConfig.splitHintSpec.maxNumFiles",
"type": "number",
},
]
diff --git a/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap b/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap
index 7b7dabc71a8b..e5056501c0cd 100644
--- a/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap
+++ b/web-console/src/dialogs/lookup-edit-dialog/__snapshots__/lookup-edit-dialog.spec.tsx.snap
@@ -158,7 +158,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
"defaultValue": 0,
"defined": [Function],
"info": "Number of header rows to be skipped. The default number of header rows to be skipped is 0.",
- "label": "Skip header rows",
"name": "extractionNamespace.namespaceParseSpec.skipHeaderRows",
"type": "number",
},
@@ -166,14 +165,12 @@ exports[`LookupEditDialog matches snapshot 1`] = `
"defaultValue": false,
"defined": [Function],
"info": "A flag to indicate that column information can be extracted from the input files' header row",
- "label": "Has header row",
"name": "extractionNamespace.namespaceParseSpec.hasHeaderRow",
"type": "boolean",
},
Object {
"defined": [Function],
"info": "The list of columns in the csv file",
- "label": "Columns",
"name": "extractionNamespace.namespaceParseSpec.columns",
"placeholder": "[\\"key\\", \\"value\\"]",
"required": [Function],
@@ -182,7 +179,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
Object {
"defined": [Function],
"info": "The name of the column containing the key",
- "label": "Key column",
"name": "extractionNamespace.namespaceParseSpec.keyColumn",
"placeholder": "(optional - defaults to the first column)",
"type": "string",
@@ -190,28 +186,24 @@ exports[`LookupEditDialog matches snapshot 1`] = `
Object {
"defined": [Function],
"info": "The name of the column containing the value",
- "label": "Value column",
"name": "extractionNamespace.namespaceParseSpec.valueColumn",
"placeholder": "(optional - defaults to the second column)",
"type": "string",
},
Object {
"defined": [Function],
- "label": "Delimiter",
"name": "extractionNamespace.namespaceParseSpec.delimiter",
"placeholder": "(optional)",
"type": "string",
},
Object {
"defined": [Function],
- "label": "List delimiter",
"name": "extractionNamespace.namespaceParseSpec.listDelimiter",
"placeholder": "(optional)",
"type": "string",
},
Object {
"defined": [Function],
- "label": "Key field name",
"name": "extractionNamespace.namespaceParseSpec.keyFieldName",
"placeholder": "key",
"required": true,
@@ -219,7 +211,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
},
Object {
"defined": [Function],
- "label": "Value field name",
"name": "extractionNamespace.namespaceParseSpec.valueFieldName",
"placeholder": "value",
"required": true,
@@ -229,7 +220,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
"defaultValue": "0",
"defined": [Function],
"info": "Period between polling for updates",
- "label": "Poll period",
"name": "extractionNamespace.pollPeriod",
"type": "string",
},
@@ -247,7 +237,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
.table WHERE filter
,
- "label": "Namespace",
"name": "extractionNamespace.namespace",
"placeholder": "some_lookup",
"required": true,
@@ -264,21 +253,18 @@ exports[`LookupEditDialog matches snapshot 1`] = `
Object {
"defined": [Function],
"info": "Defines the user to be used by the connector config",
- "label": "User",
"name": "extractionNamespace.connectorConfig.user",
"type": "string",
},
Object {
"defined": [Function],
"info": "Defines the password to be used by the connector config",
- "label": "Password",
"name": "extractionNamespace.connectorConfig.password",
"type": "string",
},
Object {
"defined": [Function],
"info": "Should tables be created",
- "label": "Create tables",
"name": "extractionNamespace.connectorConfig.createTables",
"type": "boolean",
},
@@ -296,7 +282,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
WHERE filter
,
- "label": "Table",
"name": "extractionNamespace.table",
"placeholder": "some_lookup_table",
"required": true,
@@ -316,7 +301,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
, valueColumn, tsColumn? FROM namespace.table WHERE filter
,
- "label": "Key column",
"name": "extractionNamespace.keyColumn",
"placeholder": "my_key_value",
"required": true,
@@ -336,7 +320,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
, tsColumn? FROM namespace.table WHERE filter
,
- "label": "Value column",
"name": "extractionNamespace.valueColumn",
"placeholder": "my_column_value",
"required": true,
@@ -356,7 +339,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
,
- "label": "Filter",
"name": "extractionNamespace.filter",
"placeholder": "(optional)",
"type": "string",
@@ -384,7 +366,6 @@ exports[`LookupEditDialog matches snapshot 1`] = `
"defaultValue": 0,
"defined": [Function],
"info": "How long to wait (in ms) for the first run of the cache to populate. 0 indicates to not wait",
- "label": "First cache timeout",
"name": "firstCacheTimeout",
"type": "number",
},
diff --git a/web-console/src/druid-models/compaction-config.tsx b/web-console/src/druid-models/compaction-config.tsx
index 02bb42f0004e..437f91404802 100644
--- a/web-console/src/druid-models/compaction-config.tsx
+++ b/web-console/src/druid-models/compaction-config.tsx
@@ -53,7 +53,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
// partitionsSpec type: dynamic
{
name: 'tuningConfig.partitionsSpec.maxRowsPerSegment',
- label: 'Max rows per segment',
type: 'number',
defaultValue: 5000000,
defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic',
@@ -61,7 +60,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.maxTotalRows',
- label: 'Max total rows',
type: 'number',
defaultValue: 20000000,
defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'dynamic',
@@ -70,7 +68,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
// partitionsSpec type: hashed
{
name: 'tuningConfig.partitionsSpec.targetRowsPerSegment',
- label: 'Target rows per segment',
type: 'number',
zeroMeansUndefined: true,
defined: (t: CompactionConfig) =>
@@ -91,7 +88,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.numShards',
- label: 'Num shards',
type: 'number',
zeroMeansUndefined: true,
defined: (t: CompactionConfig) =>
@@ -113,7 +109,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.partitionDimensions',
- label: 'Partition dimensions',
type: 'string-array',
placeholder: '(all dimensions)',
defined: (t: CompactionConfig) => deepGet(t, 'tuningConfig.partitionsSpec.type') === 'hashed',
@@ -122,7 +117,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
// partitionsSpec type: single_dim
{
name: 'tuningConfig.partitionsSpec.partitionDimension',
- label: 'Partition dimension',
type: 'string',
defined: (t: CompactionConfig) =>
deepGet(t, 'tuningConfig.partitionsSpec.type') === 'single_dim',
@@ -131,7 +125,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.targetRowsPerSegment',
- label: 'Target rows per segment',
type: 'number',
zeroMeansUndefined: true,
defined: (t: CompactionConfig) =>
@@ -149,7 +142,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.maxRowsPerSegment',
- label: 'Max rows per segment',
type: 'number',
zeroMeansUndefined: true,
defined: (t: CompactionConfig) =>
@@ -162,7 +154,6 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
},
{
name: 'tuningConfig.partitionsSpec.assumeGrouped',
- label: 'Assume grouped',
type: 'boolean',
defaultValue: false,
defined: (t: CompactionConfig) =>
@@ -174,9 +165,22 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
),
},
+ {
+ name: 'inputSegmentSizeBytes',
+ type: 'number',
+ defaultValue: 419430400,
+ info: (
+
+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must
+ be processed in its entirety, if the segments for a particular time chunk have a total size
+ in bytes greater than this parameter, compaction will not run for that time chunk. Because
+ each compaction task runs with a single thread, setting this value too far above 1–2GB will
+ result in compaction tasks taking an excessive amount of time.
+
- Maximum number of total segment bytes processed per compaction task. Since a time chunk must
- be processed in its entirety, if the segments for a particular time chunk have a total size
- in bytes greater than this parameter, compaction will not run for that time chunk. Because
- each compaction task runs with a single thread, setting this value too far above 1–2GB will
- result in compaction tasks taking an excessive amount of time.
-
- ),
- },
{
name: 'tuningConfig.totalNumMergeTasks',
- label: 'Total num merge tasks',
type: 'number',
defaultValue: 10,
min: 1,
@@ -215,12 +204,12 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
info: <>Maximum number of merge tasks which can be run at the same time.>,
},
{
- name: 'tuningConfig.splitHintSpec.maxInputSegmentBytesPerTask',
- label: 'Max input segment bytes per task',
+ name: 'tuningConfig.splitHintSpec.maxSplitSize',
type: 'number',
- defaultValue: 500000000,
+ defaultValue: 1073741824,
min: 1000000,
- adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'segments'),
+ hideInMore: true,
+ adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'maxSize'),
info: (
<>
Maximum number of bytes of input segments to process in a single task. If a single segment
@@ -229,4 +218,23 @@ export const COMPACTION_CONFIG_FIELDS: Field[] = [
>
),
},
+ {
+ name: 'tuningConfig.splitHintSpec.maxNumFiles',
+ label: 'Max num files (segments)',
+ type: 'number',
+ defaultValue: 1000,
+ min: 1,
+ hideInMore: true,
+ adjustment: (t: CompactionConfig) => deepSet(t, 'tuningConfig.splitHintSpec.type', 'maxSize'),
+ info: (
+ <>
+ Maximum number of input segments to process in a single subtask. This limit is to avoid task
+ failures when the ingestion spec is too long. There are two known limits on the max size of
+ serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+ jute.maxbuffer) and the max packet size in MySQL (
+ max_allowed_packet). These can make ingestion tasks fail if the serialized
+ ingestion spec size hits one of them.
+ >
+ ),
+ },
];
diff --git a/web-console/src/druid-models/filter.tsx b/web-console/src/druid-models/filter.tsx
index 882fdd8d6757..2951dbd1905c 100644
--- a/web-console/src/druid-models/filter.tsx
+++ b/web-console/src/druid-models/filter.tsx
@@ -154,7 +154,6 @@ export const FILTERS_FIELDS: Field[] = [
},
{
name: 'spec.dataSchema.transformSpec.filter',
- label: 'Filter',
type: 'json',
height: '350px',
placeholder: '{ "type": "true" }',
diff --git a/web-console/src/druid-models/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec.tsx
index a1783fb13115..eb5b582aee23 100644
--- a/web-console/src/druid-models/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec.tsx
@@ -1420,7 +1420,6 @@ export function getSecondaryPartitionRelatedFormFields(
// partitionsSpec type: dynamic
{
name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
- label: 'Max rows per segment',
type: 'number',
defaultValue: 5000000,
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
@@ -1428,7 +1427,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.maxTotalRows',
- label: 'Max total rows',
type: 'number',
defaultValue: 20000000,
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
@@ -1437,7 +1435,6 @@ export function getSecondaryPartitionRelatedFormFields(
// partitionsSpec type: hashed
{
name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
- label: 'Target rows per segment',
type: 'number',
zeroMeansUndefined: true,
defaultValue: 5000000,
@@ -1460,7 +1457,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.numShards',
- label: 'Num shards',
type: 'number',
zeroMeansUndefined: true,
hideInMore: true,
@@ -1483,7 +1479,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.partitionDimensions',
- label: 'Partition dimensions',
type: 'string-array',
placeholder: '(all dimensions)',
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed',
@@ -1492,7 +1487,6 @@ export function getSecondaryPartitionRelatedFormFields(
// partitionsSpec type: single_dim
{
name: 'spec.tuningConfig.partitionsSpec.partitionDimension',
- label: 'Partition dimension',
type: 'string',
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim',
required: true,
@@ -1512,7 +1506,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
- label: 'Target rows per segment',
type: 'number',
zeroMeansUndefined: true,
defined: s =>
@@ -1530,7 +1523,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
- label: 'Max rows per segment',
type: 'number',
zeroMeansUndefined: true,
defined: s =>
@@ -1543,7 +1535,6 @@ export function getSecondaryPartitionRelatedFormFields(
},
{
name: 'spec.tuningConfig.partitionsSpec.assumeGrouped',
- label: 'Assume grouped',
type: 'boolean',
defaultValue: false,
hideInMore: true,
@@ -1562,14 +1553,12 @@ export function getSecondaryPartitionRelatedFormFields(
return [
{
name: 'spec.tuningConfig.maxRowsPerSegment',
- label: 'Max rows per segment',
type: 'number',
defaultValue: 5000000,
info: <>Determines how many rows are in each segment.>,
},
{
name: 'spec.tuningConfig.maxTotalRows',
- label: 'Max total rows',
type: 'number',
defaultValue: 20000000,
info: <>Total number of rows in segments waiting for being pushed.>,
@@ -1587,13 +1576,13 @@ export function settingIntervalsWouldSpeedUpIngestion(spec: IngestionSpec): bool
);
}
-const TUNING_CONFIG_FORM_FIELDS: Field[] = [
+const TUNING_FORM_FIELDS: Field[] = [
{
- name: 'maxNumConcurrentSubTasks',
+ name: 'spec.tuningConfig.maxNumConcurrentSubTasks',
type: 'number',
defaultValue: 1,
min: 1,
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
info: (
<>
Maximum number of tasks which can be run at the same time. The supervisor task would spawn
@@ -1605,41 +1594,41 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'maxRetry',
+ name: 'spec.tuningConfig.maxRetry',
type: 'number',
defaultValue: 3,
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Maximum number of retries on task failures.>,
},
{
- name: 'taskStatusCheckPeriodMs',
+ name: 'spec.tuningConfig.taskStatusCheckPeriodMs',
type: 'number',
defaultValue: 1000,
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Polling period in milliseconds to check running task statuses.>,
},
{
- name: 'totalNumMergeTasks',
+ name: 'spec.tuningConfig.totalNumMergeTasks',
type: 'number',
defaultValue: 10,
min: 1,
- defined: (t: TuningConfig) =>
+ defined: s =>
Boolean(
- t.type === 'index_parallel' &&
- oneOf(deepGet(t, 'partitionsSpec.type'), 'hashed', 'single_dim'),
+ s.type === 'index_parallel' &&
+ oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'hashed', 'single_dim'),
),
info: <>Number of tasks to merge partial segments after shuffle.>,
},
{
- name: 'maxNumSegmentsToMerge',
+ name: 'spec.tuningConfig.maxNumSegmentsToMerge',
type: 'number',
defaultValue: 100,
- defined: (t: TuningConfig) =>
+ defined: s =>
Boolean(
- t.type === 'index_parallel' &&
- oneOf(deepGet(t, 'partitionsSpec.type'), 'hashed', 'single_dim'),
+ s.type === 'index_parallel' &&
+ oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'hashed', 'single_dim'),
),
info: (
<>
@@ -1648,22 +1637,22 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'maxRowsInMemory',
+ name: 'spec.tuningConfig.maxRowsInMemory',
type: 'number',
defaultValue: 1000000,
info: <>Used in determining when intermediate persists to disk should occur.>,
},
{
- name: 'maxBytesInMemory',
+ name: 'spec.tuningConfig.maxBytesInMemory',
type: 'number',
placeholder: 'Default: 1/6 of max JVM memory',
info: <>Used in determining when intermediate persists to disk should occur.>,
},
{
- name: 'resetOffsetAutomatically',
+ name: 'spec.tuningConfig.resetOffsetAutomatically',
type: 'boolean',
defaultValue: false,
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: (
<>
Whether to reset the consumer offset if the next offset that it is trying to fetch is less
@@ -1672,10 +1661,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'skipSequenceNumberAvailabilityCheck',
+ name: 'spec.tuningConfig.skipSequenceNumberAvailabilityCheck',
type: 'boolean',
defaultValue: false,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
info: (
<>
Whether to enable checking if the current sequence number is still available in a particular
@@ -1685,17 +1674,17 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'intermediatePersistPeriod',
+ name: 'spec.tuningConfig.intermediatePersistPeriod',
type: 'duration',
defaultValue: 'PT10M',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: <>The period that determines the rate at which intermediate persists occur.>,
},
{
- name: 'intermediateHandoffPeriod',
+ name: 'spec.tuningConfig.intermediateHandoffPeriod',
type: 'duration',
defaultValue: 'P2147483647D',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: (
<>
How often the tasks should hand off segments. Handoff will happen either if
@@ -1705,7 +1694,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'maxPendingPersists',
+ name: 'spec.tuningConfig.maxPendingPersists',
type: 'number',
hideInMore: true,
info: (
@@ -1717,7 +1706,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'pushTimeout',
+ name: 'spec.tuningConfig.pushTimeout',
type: 'number',
defaultValue: 0,
hideInMore: true,
@@ -1728,15 +1717,15 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'handoffConditionTimeout',
+ name: 'spec.tuningConfig.handoffConditionTimeout',
type: 'number',
defaultValue: 0,
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: <>Milliseconds to wait for segment handoff. 0 means to wait forever.>,
},
{
- name: 'indexSpec.bitmap.type',
+ name: 'spec.tuningConfig.indexSpec.bitmap.type',
label: 'Index bitmap type',
type: 'string',
defaultValue: 'roaring',
@@ -1745,7 +1734,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
info: <>Compression format for bitmap indexes.>,
},
{
- name: 'indexSpec.dimensionCompression',
+ name: 'spec.tuningConfig.indexSpec.dimensionCompression',
label: 'Index dimension compression',
type: 'string',
defaultValue: 'lz4',
@@ -1754,7 +1743,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
info: <>Compression format for dimension columns.>,
},
{
- name: 'indexSpec.metricCompression',
+ name: 'spec.tuningConfig.indexSpec.metricCompression',
label: 'Index metric compression',
type: 'string',
defaultValue: 'lz4',
@@ -1763,7 +1752,7 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
info: <>Compression format for primitive type metric columns.>,
},
{
- name: 'indexSpec.longEncoding',
+ name: 'spec.tuningConfig.indexSpec.longEncoding',
label: 'Index long encoding',
type: 'string',
defaultValue: 'longs',
@@ -1779,43 +1768,79 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'chatHandlerTimeout',
+ name: 'spec.tuningConfig.splitHintSpec.maxSplitSize',
+ type: 'number',
+ defaultValue: 1073741824,
+ min: 1000000,
+ defined: s =>
+ s.type === 'index_parallel' && deepGet(s, 'spec.ioConfig.inputFormat.type') !== 'http',
+ hideInMore: true,
+ adjustment: s => deepSet(s, 'splitHintSpec.type', 'maxSize'),
+ info: (
+ <>
+ Maximum number of bytes of input files to process in a single subtask. If a single file is
+ larger than this number, it will be processed by itself in a single subtask (Files are never
+ split across tasks yet).
+ >
+ ),
+ },
+ {
+ name: 'spec.tuningConfig.splitHintSpec.maxNumFiles',
+ type: 'number',
+ defaultValue: 1000,
+ min: 1,
+ defined: s => s.type === 'index_parallel',
+ hideInMore: true,
+ adjustment: s => deepSet(s, 'splitHintSpec.type', 'maxSize'),
+ info: (
+ <>
+ Maximum number of input files to process in a single subtask. This limit is to avoid task
+ failures when the ingestion spec is too long. There are two known limits on the max size of
+ serialized ingestion spec, i.e., the max ZNode size in ZooKeeper (
+ jute.maxbuffer) and the max packet size in MySQL (
+ max_allowed_packet). These can make ingestion tasks fail if the serialized
+ ingestion spec size hits one of them.
+ >
+ ),
+ },
+ {
+ name: 'spec.tuningConfig.chatHandlerTimeout',
type: 'duration',
defaultValue: 'PT10S',
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Timeout for reporting the pushed segments in worker tasks.>,
},
{
- name: 'chatHandlerNumRetries',
+ name: 'spec.tuningConfig.chatHandlerNumRetries',
type: 'number',
defaultValue: 5,
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Retries for reporting the pushed segments in worker tasks.>,
},
{
- name: 'workerThreads',
+ name: 'spec.tuningConfig.workerThreads',
type: 'number',
placeholder: 'min(10, taskCount)',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: (
<>The number of threads that will be used by the supervisor for asynchronous operations.>
),
},
{
- name: 'chatThreads',
+ name: 'spec.tuningConfig.chatThreads',
type: 'number',
placeholder: 'min(10, taskCount * replicas)',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: <>The number of threads that will be used for communicating with indexing tasks.>,
},
{
- name: 'chatRetries',
+ name: 'spec.tuningConfig.chatRetries',
type: 'number',
defaultValue: 8,
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: (
<>
@@ -1825,17 +1850,17 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'httpTimeout',
+ name: 'spec.tuningConfig.httpTimeout',
type: 'duration',
defaultValue: 'PT10S',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: <>How long to wait for a HTTP response from an indexing task.>,
},
{
- name: 'shutdownTimeout',
+ name: 'spec.tuningConfig.shutdownTimeout',
type: 'duration',
defaultValue: 'PT80S',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: (
<>
@@ -1844,10 +1869,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'offsetFetchPeriod',
+ name: 'spec.tuningConfig.offsetFetchPeriod',
type: 'duration',
defaultValue: 'PT30S',
- defined: (t: TuningConfig) => t.type === 'kafka',
+ defined: s => s.type === 'kafka',
info: (
<>
How often the supervisor queries Kafka and the indexing tasks to fetch current offsets and
@@ -1856,10 +1881,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'recordBufferSize',
+ name: 'spec.tuningConfig.recordBufferSize',
type: 'number',
defaultValue: 10000,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
info: (
<>
Size of the buffer (number of events) used between the Kinesis fetch threads and the main
@@ -1868,10 +1893,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'recordBufferOfferTimeout',
+ name: 'spec.tuningConfig.recordBufferOfferTimeout',
type: 'number',
defaultValue: 5000,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
hideInMore: true,
info: (
<>
@@ -1881,11 +1906,11 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'recordBufferFullWait',
+ name: 'spec.tuningConfig.recordBufferFullWait',
hideInMore: true,
type: 'number',
defaultValue: 5000,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
info: (
<>
Length of time in milliseconds to wait for the buffer to drain before attempting to fetch
@@ -1894,10 +1919,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'fetchSequenceNumberTimeout',
+ name: 'spec.tuningConfig.fetchSequenceNumberTimeout',
type: 'number',
defaultValue: 60000,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
hideInMore: true,
info: (
<>
@@ -1909,10 +1934,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'fetchThreads',
+ name: 'spec.tuningConfig.fetchThreads',
type: 'number',
placeholder: 'max(1, {numProcessors} - 1)',
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
hideInMore: true,
info: (
<>
@@ -1922,10 +1947,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'maxRecordsPerPoll',
+ name: 'spec.tuningConfig.maxRecordsPerPoll',
type: 'number',
defaultValue: 100,
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
hideInMore: true,
info: (
<>
@@ -1935,10 +1960,10 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
),
},
{
- name: 'repartitionTransitionDuration',
+ name: 'spec.tuningConfig.repartitionTransitionDuration',
type: 'duration',
defaultValue: 'PT2M',
- defined: (t: TuningConfig) => t.type === 'kinesis',
+ defined: s => s.type === 'kinesis',
hideInMore: true,
info: (
<>
@@ -1959,8 +1984,8 @@ const TUNING_CONFIG_FORM_FIELDS: Field[] = [
},
];
-export function getTuningSpecFormFields() {
- return TUNING_CONFIG_FORM_FIELDS;
+export function getTuningFormFields() {
+ return TUNING_FORM_FIELDS;
}
export interface IndexSpec {
diff --git a/web-console/src/druid-models/lookup-spec.tsx b/web-console/src/druid-models/lookup-spec.tsx
index c9e0e5a13bde..31405f311719 100644
--- a/web-console/src/druid-models/lookup-spec.tsx
+++ b/web-console/src/druid-models/lookup-spec.tsx
@@ -104,8 +104,8 @@ export const LOOKUP_FIELDS: Field[] = [
// cachedNamespace lookups have more options
{
name: 'extractionNamespace.type',
- type: 'string',
label: 'Globally cached lookup type',
+ type: 'string',
placeholder: 'uri',
suggestions: ['uri', 'jdbc'],
defined: (model: LookupSpec) => model.type === 'cachedNamespace',
@@ -113,8 +113,8 @@ export const LOOKUP_FIELDS: Field[] = [
},
{
name: 'extractionNamespace.uriPrefix',
- type: 'string',
label: 'URI prefix',
+ type: 'string',
placeholder: 's3://bucket/some/key/prefix/',
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -145,8 +145,8 @@ export const LOOKUP_FIELDS: Field[] = [
},
{
name: 'extractionNamespace.fileRegex',
- type: 'string',
label: 'File regex',
+ type: 'string',
defaultValue: '.*',
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -157,8 +157,8 @@ export const LOOKUP_FIELDS: Field[] = [
// namespaceParseSpec
{
name: 'extractionNamespace.namespaceParseSpec.format',
- type: 'string',
label: 'Parse format',
+ type: 'string',
suggestions: ['csv', 'tsv', 'simpleJson', 'customJson'],
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri',
required: true,
@@ -177,7 +177,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.skipHeaderRows',
type: 'number',
- label: 'Skip header rows',
defaultValue: 0,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -187,7 +186,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.hasHeaderRow',
type: 'boolean',
- label: 'Has header row',
defaultValue: false,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -197,7 +195,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.columns',
type: 'string-array',
- label: 'Columns',
placeholder: `["key", "value"]`,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -209,7 +206,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.keyColumn',
type: 'string',
- label: 'Key column',
placeholder: '(optional - defaults to the first column)',
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -219,7 +215,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.valueColumn',
type: 'string',
- label: 'Value column',
placeholder: '(optional - defaults to the second column)',
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -231,7 +226,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.delimiter',
type: 'string',
- label: 'Delimiter',
placeholder: `(optional)`,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -240,7 +234,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.listDelimiter',
type: 'string',
- label: 'List delimiter',
placeholder: `(optional)`,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -251,7 +244,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.keyFieldName',
type: 'string',
- label: 'Key field name',
placeholder: `key`,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -261,7 +253,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespaceParseSpec.valueFieldName',
type: 'string',
- label: 'Value field name',
placeholder: `value`,
defined: (model: LookupSpec) =>
deepGet(model, 'extractionNamespace.type') === 'uri' &&
@@ -271,7 +262,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.pollPeriod',
type: 'string',
- label: 'Poll period',
defaultValue: '0',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'uri',
info: `Period between polling for updates`,
@@ -281,7 +271,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.namespace',
type: 'string',
- label: 'Namespace',
placeholder: 'some_lookup',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
required: true,
@@ -297,8 +286,8 @@ export const LOOKUP_FIELDS: Field[] = [
},
{
name: 'extractionNamespace.connectorConfig.connectURI',
- type: 'string',
label: 'Connect URI',
+ type: 'string',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
required: true,
info: 'Defines the connectURI value on the The connector config to used',
@@ -306,28 +295,24 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.connectorConfig.user',
type: 'string',
- label: 'User',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
info: 'Defines the user to be used by the connector config',
},
{
name: 'extractionNamespace.connectorConfig.password',
type: 'string',
- label: 'Password',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
info: 'Defines the password to be used by the connector config',
},
{
name: 'extractionNamespace.connectorConfig.createTables',
type: 'boolean',
- label: 'Create tables',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
info: 'Should tables be created',
},
{
name: 'extractionNamespace.table',
type: 'string',
- label: 'Table',
placeholder: 'some_lookup_table',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
required: true,
@@ -347,7 +332,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.keyColumn',
type: 'string',
- label: 'Key column',
placeholder: 'my_key_value',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
required: true,
@@ -367,7 +351,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.valueColumn',
type: 'string',
- label: 'Value column',
placeholder: 'my_column_value',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
required: true,
@@ -387,7 +370,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'extractionNamespace.filter',
type: 'string',
- label: 'Filter',
placeholder: '(optional)',
defined: (model: LookupSpec) => deepGet(model, 'extractionNamespace.type') === 'jdbc',
info: (
@@ -427,7 +409,6 @@ export const LOOKUP_FIELDS: Field[] = [
{
name: 'firstCacheTimeout',
type: 'number',
- label: 'First cache timeout',
defaultValue: 0,
defined: (model: LookupSpec) => model.type === 'cachedNamespace',
info: `How long to wait (in ms) for the first run of the cache to populate. 0 indicates to not wait`,
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 4ee62e15e569..898482e81f4d 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -98,7 +98,7 @@ import {
getRollup,
getSecondaryPartitionRelatedFormFields,
getSpecType,
- getTuningSpecFormFields,
+ getTuningFormFields,
IngestionComboTypeWithExtra,
IngestionSpec,
InputFormat,
@@ -2935,7 +2935,7 @@ export class LoadDataView extends React.PureComponent
You can make this ingestion run slightly faster by explicitly specifying the time
intervals for this dataset from the{' '}
- this.updateStep('filter')}>Filter step.
+ this.updateStep('filter')}>Filter step.
)}
@@ -2952,7 +2952,6 @@ export class LoadDataView extends React.PureComponent
+ A list of intervals describing what time chunks of segments should be created. This list
+ will be broken up and rounded-off based on the segmentGranularity.
+
+
+ If not provided, batch ingestion tasks will generally determine which time chunks to
+ output based on what timestamps are found in the input data.
+
+
+ If specified, batch ingestion tasks may be able to skip a determining-partitions phase,
+ which can result in faster ingestion. Batch ingestion tasks may also be able to request
+ all their locks up-front instead of one by one. Batch ingestion tasks will throw away any
+ records with timestamps outside of the specified intervals.
+
{nonsensicalSingleDimPartitioningMessage}
- {settingIntervalsWouldSpeedUpIngestion(spec) && (
-
-
- You can make this ingestion run slightly faster by explicitly specifying the time
- intervals for this dataset from the{' '}
- this.updateStep('filter')}>Filter step.
-
-
- )}
{this.renderNextBar({
disabled: invalidPartitionConfig(spec),
From 4696d9c13d873b5a22a9ee00cc57a401617278ea Mon Sep 17 00:00:00 2001
From: Vadim Ogievetsky
Date: Tue, 8 Dec 2020 15:15:17 -0800
Subject: [PATCH 13/14] work with all sorts of chars
---
web-console/e2e-tests/tutorial-batch.spec.ts | 9 ++++++---
web-console/src/singletons/api.spec.ts | 2 +-
web-console/src/singletons/api.ts | 10 +++++++++-
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/web-console/e2e-tests/tutorial-batch.spec.ts b/web-console/e2e-tests/tutorial-batch.spec.ts
index f4fa45054632..842d7a50182e 100644
--- a/web-console/e2e-tests/tutorial-batch.spec.ts
+++ b/web-console/e2e-tests/tutorial-batch.spec.ts
@@ -16,13 +16,14 @@
* limitations under the License.
*/
+import { SqlRef } from 'druid-query-toolkit';
import * as playwright from 'playwright-chromium';
import { DatasourcesOverview } from './component/datasources/overview';
import { IngestionOverview } from './component/ingestion/overview';
import { ConfigureSchemaConfig } from './component/load-data/config/configure-schema';
-import { PartitionConfig } from './component/load-data/config/partition';
import { SegmentGranularity } from './component/load-data/config/partition';
+import { PartitionConfig } from './component/load-data/config/partition';
import { PublishConfig } from './component/load-data/config/publish';
import { LocalFileDataConnector } from './component/load-data/data-connector/local-file';
import { DataLoader } from './component/load-data/data-loader';
@@ -37,6 +38,8 @@ import { waitTillWebConsoleReady } from './util/setup';
jest.setTimeout(5 * 60 * 1000);
+const ALL_SORTS_OF_CHARS = '<>|!@#$%^&`\'".,:;\\*()[]{}Россия 한국 中国!?~';
+
describe('Tutorial: Loading a file', () => {
let browser: playwright.Browser;
let page: playwright.Page;
@@ -56,7 +59,7 @@ describe('Tutorial: Loading a file', () => {
it('Loads data from local disk', async () => {
const testName = 'load-data-from-local-disk-';
- const datasourceName = testName + new Date().toISOString();
+ const datasourceName = testName + ALL_SORTS_OF_CHARS + new Date().toISOString();
const dataConnector = new LocalFileDataConnector(page, {
baseDirectory: DRUID_EXAMPLES_QUICKSTART_TUTORIAL_DIR,
fileFilter: 'wikiticker-2015-09-12-sampled.json.gz',
@@ -168,7 +171,7 @@ async function validateDatasourceStatus(page: playwright.Page, datasourceName: s
async function validateQuery(page: playwright.Page, datasourceName: string) {
const queryOverview = new QueryOverview(page, UNIFIED_CONSOLE_URL);
- const query = `SELECT * FROM "${datasourceName}" ORDER BY __time`;
+ const query = `SELECT * FROM ${SqlRef.table(datasourceName)} ORDER BY __time`;
const results = await queryOverview.runQuery(query);
expect(results).toBeDefined();
expect(results.length).toBeGreaterThan(0);
diff --git a/web-console/src/singletons/api.spec.ts b/web-console/src/singletons/api.spec.ts
index 64429af4023f..8cb8581b1736 100644
--- a/web-console/src/singletons/api.spec.ts
+++ b/web-console/src/singletons/api.spec.ts
@@ -21,6 +21,6 @@ import { Api } from './api';
describe('Api', () => {
it('escapes stuff', () => {
expect(Api.encodePath('wikipedia')).toEqual('wikipedia');
- expect(Api.encodePath('wi%ki?pe#dia')).toEqual('wi%25ki%3Fpe%23dia');
+ expect(Api.encodePath(`wi%ki?pe#dia&'[]`)).toEqual('wi%25ki%3Fpe%23dia%26%27%5B%5D');
});
});
diff --git a/web-console/src/singletons/api.ts b/web-console/src/singletons/api.ts
index a05adf324610..7a05bdd398c8 100644
--- a/web-console/src/singletons/api.ts
+++ b/web-console/src/singletons/api.ts
@@ -46,6 +46,14 @@ export class Api {
}
static encodePath(path: string): string {
- return path.replace(/[?#%]/g, encodeURIComponent);
+ return path.replace(
+ /[?#%&'\[\]]/g,
+ c =>
+ '%' +
+ c
+ .charCodeAt(0)
+ .toString(16)
+ .toUpperCase(),
+ );
}
}
From 6f14ed257eb59a07ed402cff6da3f65887db9395 Mon Sep 17 00:00:00 2001
From: Vadim Ogievetsky
Date: Tue, 8 Dec 2020 18:17:12 -0800
Subject: [PATCH 14/14] fix enabled view
---
web-console/src/views/load-data-view/load-data-view.tsx | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 971634718f59..ff7db4d6d2e1 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -470,20 +470,13 @@ export class LoadDataView extends React.PureComponent