diff --git a/web-console/e2e-tests/reindexing.spec.ts b/web-console/e2e-tests/reindexing.spec.ts
index ae45b735965f..a14262a8ab28 100644
--- a/web-console/e2e-tests/reindexing.spec.ts
+++ b/web-console/e2e-tests/reindexing.spec.ts
@@ -67,7 +67,7 @@ describe('Reindexing from Druid', () => {
const configureSchemaConfig = new ConfigureSchemaConfig({ rollup: false });
const partitionConfig = new PartitionConfig({
segmentGranularity: SegmentGranularity.DAY,
- timeIntervals: interval,
+ timeIntervals: null,
partitionsSpec: new SingleDimPartitionsSpec({
partitionDimension: 'channel',
targetRowsPerSegment: 10_000,
diff --git a/web-console/e2e-tests/tutorial-batch.spec.ts b/web-console/e2e-tests/tutorial-batch.spec.ts
index f4fa45054632..842d7a50182e 100644
--- a/web-console/e2e-tests/tutorial-batch.spec.ts
+++ b/web-console/e2e-tests/tutorial-batch.spec.ts
@@ -16,13 +16,14 @@
* limitations under the License.
*/
+import { SqlRef } from 'druid-query-toolkit';
import * as playwright from 'playwright-chromium';
import { DatasourcesOverview } from './component/datasources/overview';
import { IngestionOverview } from './component/ingestion/overview';
import { ConfigureSchemaConfig } from './component/load-data/config/configure-schema';
-import { PartitionConfig } from './component/load-data/config/partition';
import { SegmentGranularity } from './component/load-data/config/partition';
+import { PartitionConfig } from './component/load-data/config/partition';
import { PublishConfig } from './component/load-data/config/publish';
import { LocalFileDataConnector } from './component/load-data/data-connector/local-file';
import { DataLoader } from './component/load-data/data-loader';
@@ -37,6 +38,8 @@ import { waitTillWebConsoleReady } from './util/setup';
jest.setTimeout(5 * 60 * 1000);
+const ALL_SORTS_OF_CHARS = '<>|!@#$%^&`\'".,:;\\*()[]{}Россия 한국 中国!?~';
+
describe('Tutorial: Loading a file', () => {
let browser: playwright.Browser;
let page: playwright.Page;
@@ -56,7 +59,7 @@ describe('Tutorial: Loading a file', () => {
it('Loads data from local disk', async () => {
const testName = 'load-data-from-local-disk-';
- const datasourceName = testName + new Date().toISOString();
+ const datasourceName = testName + ALL_SORTS_OF_CHARS + new Date().toISOString();
const dataConnector = new LocalFileDataConnector(page, {
baseDirectory: DRUID_EXAMPLES_QUICKSTART_TUTORIAL_DIR,
fileFilter: 'wikiticker-2015-09-12-sampled.json.gz',
@@ -168,7 +171,7 @@ async function validateDatasourceStatus(page: playwright.Page, datasourceName: s
async function validateQuery(page: playwright.Page, datasourceName: string) {
const queryOverview = new QueryOverview(page, UNIFIED_CONSOLE_URL);
- const query = `SELECT * FROM "${datasourceName}" ORDER BY __time`;
+ const query = `SELECT * FROM ${SqlRef.table(datasourceName)} ORDER BY __time`;
const results = await queryOverview.runQuery(query);
expect(results).toBeDefined();
expect(results.length).toBeGreaterThan(0);
diff --git a/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap b/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap
index baf1011bdc8e..02a63a1b1074 100644
--- a/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap
+++ b/web-console/src/components/auto-form/__snapshots__/auto-form.spec.tsx.snap
@@ -8,23 +8,13 @@ exports[`AutoForm matches snapshot 1`] = `
key="testOne"
label="Test one"
>
-
The dimensions to partition on. Leave blank to select all dimensions.
, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -122,7 +117,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":The dimension to partition on.
, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -132,7 +126,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -143,7 +136,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":Maximum number of rows to include in a partition.
, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -155,48 +147,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (dynamic partit "info":Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info":The dimensions to partition on. Leave blank to select all dimensions.
, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -362,7 +366,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":The dimension to partition on.
, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -372,7 +375,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -383,7 +385,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":Maximum number of rows to include in a partition.
, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -395,48 +396,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (hashed partiti "info":Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info":The dimensions to partition on. Leave blank to select all dimensions.
, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -602,7 +615,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":The dimension to partition on.
, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -612,7 +624,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -623,7 +634,6 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":Maximum number of rows to include in a partition.
, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -635,48 +645,65 @@ exports[`CompactionDialog matches snapshot with compactionConfig (single_dim par "info":Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info":The dimensions to partition on. Leave blank to select all dimensions.
, - "label": "Partition dimensions", "name": "tuningConfig.partitionsSpec.partitionDimensions", "placeholder": "(all dimensions)", "type": "string-array", @@ -842,7 +864,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":The dimension to partition on.
, - "label": "Partition dimension", "name": "tuningConfig.partitionsSpec.partitionDimension", "required": true, "type": "string", @@ -852,7 +873,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":Target number of rows to include in a partition, should be a number that targets segments of 500MB~1GB.
, - "label": "Target rows per segment", "name": "tuningConfig.partitionsSpec.targetRowsPerSegment", "required": [Function], "type": "number", @@ -863,7 +883,6 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":Maximum number of rows to include in a partition.
, - "label": "Max rows per segment", "name": "tuningConfig.partitionsSpec.maxRowsPerSegment", "required": [Function], "type": "number", @@ -875,48 +894,65 @@ exports[`CompactionDialog matches snapshot without compactionConfig 1`] = ` "info":Assume that input data has already been grouped on time and dimensions. Ingestion will run faster, but may choose sub-optimal partitions if this assumption is violated.
, - "label": "Assume grouped", "name": "tuningConfig.partitionsSpec.assumeGrouped", "type": "boolean", }, + Object { + "defaultValue": 419430400, + "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. +
, + "name": "inputSegmentSizeBytes", + "type": "number", + }, Object { "defaultValue": 1, "info":- Maximum number of total segment bytes processed per compaction task. Since a time chunk must be processed in its entirety, if the segments for a particular time chunk have a total size in bytes greater than this parameter, compaction will not run for that time chunk. Because each compaction task runs with a single thread, setting this value too far above 1–2GB will result in compaction tasks taking an excessive amount of time. -
, - "name": "inputSegmentSizeBytes", - "type": "number", - }, Object { "defaultValue": 10, "defined": [Function], "info":+ Maximum number of total segment bytes processed per compaction task. Since a time chunk must + be processed in its entirety, if the segments for a particular time chunk have a total size + in bytes greater than this parameter, compaction will not run for that time chunk. Because + each compaction task runs with a single thread, setting this value too far above 1–2GB will + result in compaction tasks taking an excessive amount of time. +
+ ), + }, { name: 'tuningConfig.maxNumConcurrentSubTasks', - label: 'Max num concurrent sub tasks', type: 'number', defaultValue: 1, min: 1, @@ -190,23 +194,8 @@ export const COMPACTION_CONFIG_FIELDS: Field- Maximum number of total segment bytes processed per compaction task. Since a time chunk must - be processed in its entirety, if the segments for a particular time chunk have a total size - in bytes greater than this parameter, compaction will not run for that time chunk. Because - each compaction task runs with a single thread, setting this value too far above 1–2GB will - result in compaction tasks taking an excessive amount of time. -
- ), - }, { name: 'tuningConfig.totalNumMergeTasks', - label: 'Total num merge tasks', type: 'number', defaultValue: 10, min: 1, @@ -215,12 +204,12 @@ export const COMPACTION_CONFIG_FIELDS: Fieldjute.maxbuffer) and the max packet size in MySQL (
+ max_allowed_packet). These can make ingestion tasks fail if the serialized
+ ingestion spec size hits one of them.
+ >
+ ),
+ },
];
diff --git a/web-console/src/druid-models/filter.tsx b/web-console/src/druid-models/filter.tsx
index 10791294063e..5c11784ec69e 100644
--- a/web-console/src/druid-models/filter.tsx
+++ b/web-console/src/druid-models/filter.tsx
@@ -16,9 +16,14 @@
* limitations under the License.
*/
-import { Field } from '../components';
+import React from 'react';
+
+import { ExternalLink, Field } from '../components';
+import { getLink } from '../links';
import { deepGet, EMPTY_ARRAY, oneOf } from '../utils';
+import { IngestionSpec } from './ingestion-spec';
+
export type DruidFilter = Record
+ A Druid{' '}
+
+ Note that only the value that match the filter will be included. If you want to remove + some data values you must negate the filter. +
+ > + ), + }, +]; diff --git a/web-console/src/druid-models/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec.spec.ts index e698128888e8..27b8aac64805 100644 --- a/web-console/src/druid-models/ingestion-spec.spec.ts +++ b/web-console/src/druid-models/ingestion-spec.spec.ts @@ -45,7 +45,6 @@ describe('ingestion-spec', () => { dataSchema: { dataSource: 'wikipedia', granularitySpec: { - type: 'uniform', segmentGranularity: 'day', queryGranularity: 'hour', rollup: true, @@ -183,7 +182,6 @@ describe('spec utils', () => { dataSchema: { dataSource: 'wikipedia', granularitySpec: { - type: 'uniform', segmentGranularity: 'day', queryGranularity: 'hour', }, @@ -207,9 +205,14 @@ describe('spec utils', () => { }); it('updateSchemaWithSample', () => { - expect( - updateSchemaWithSample(ingestionSpec, { header: ['header'], rows: [] }, 'specific', true), - ).toMatchInlineSnapshot(` + const withRollup = updateSchemaWithSample( + ingestionSpec, + { header: ['header'], rows: [] }, + 'specific', + true, + ); + + expect(withRollup).toMatchInlineSnapshot(` Object { "spec": Object { "dataSchema": Object { @@ -223,7 +226,6 @@ describe('spec utils', () => { "queryGranularity": "hour", "rollup": true, "segmentGranularity": "day", - "type": "uniform", }, "metricsSpec": Array [ Object { @@ -249,6 +251,60 @@ describe('spec utils', () => { "type": "index_parallel", }, "tuningConfig": Object { + "forceGuaranteedRollup": true, + "partitionsSpec": Object { + "type": "hashed", + }, + "type": "index_parallel", + }, + }, + "type": "index_parallel", + } + `); + + const noRollup = updateSchemaWithSample( + ingestionSpec, + { header: ['header'], rows: [] }, + 'specific', + false, + ); + + expect(noRollup).toMatchInlineSnapshot(` + Object { + "spec": Object { + "dataSchema": Object { + "dataSource": "wikipedia", + "dimensionsSpec": Object { + "dimensions": Array [ + "header", + ], + }, + "granularitySpec": Object { + "queryGranularity": "none", + "rollup": false, + "segmentGranularity": "day", + }, + "timestampSpec": Object { + "column": "timestamp", + "format": "iso", + }, + }, + "ioConfig": Object { + "inputFormat": Object { + "type": "json", + }, + "inputSource": Object { + "type": "http", + "uris": Array [ + "https://static.imply.io/data/wikipedia.json.gz", + ], + }, + "type": "index_parallel", + }, + "tuningConfig": Object { + "partitionsSpec": Object { + "type": "dynamic", + }, "type": "index_parallel", }, }, diff --git a/web-console/src/druid-models/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec.tsx index 73963da1589f..d4c9bdca89fe 100644 --- a/web-console/src/druid-models/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec.tsx @@ -19,13 +19,14 @@ import { Code } from '@blueprintjs/core'; import React from 'react'; -import { ExternalLink, Field } from '../components'; +import { AutoForm, ExternalLink, Field } from '../components'; import { getLink } from '../links'; import { deepDelete, deepGet, deepMove, deepSet, + deepSetIfUnset, EMPTY_ARRAY, EMPTY_OBJECT, filterMap, @@ -225,6 +226,22 @@ export function getRequiredModule(ingestionType: IngestionComboTypeWithExtra): s } } +export function getIssueWithSpec(spec: IngestionSpec): string | undefined { + if (!deepGet(spec, 'spec.dataSchema.dataSource')) { + return 'missing spec.dataSchema.dataSource'; + } + + if (!deepGet(spec, 'spec.dataSchema.timestampSpec')) { + return 'missing spec.dataSchema.timestampSpec'; + } + + if (!deepGet(spec, 'spec.dataSchema.dimensionsSpec')) { + return 'missing spec.dataSchema.dimensionsSpec'; + } + + return; +} + // -------------- export interface DataSchema { @@ -290,11 +307,9 @@ export function normalizeSpec(spec: Partial+ A list of intervals describing what time chunks of segments should be created. This list + will be broken up and rounded-off based on the segmentGranularity. +
++ If not provided, batch ingestion tasks will generally determine which time chunks to + output based on what timestamps are found in the input data. +
++ If specified, batch ingestion tasks may be able to skip a determining-partitions phase, + which can result in faster ingestion. Batch ingestion tasks may also be able to request + all their locks up-front instead of one by one. Batch ingestion tasks will throw away any + records with timestamps outside of the specified intervals. +
+ > + ), + }, +]; + +export function getSecondaryPartitionRelatedFormFields( + spec: IngestionSpec, dimensionSuggestions: string[] | undefined, -): Fielddynamic.
),
- adjustment: (t: TuningConfig) => {
- if (!Array.isArray(dimensionSuggestions) || !dimensionSuggestions.length) return t;
- return deepSet(t, 'partitionsSpec.partitionDimension', dimensionSuggestions[0]);
+ adjustment: s => {
+ if (
+ deepGet(s, 'spec.tuningConfig.partitionsSpec.type') !== 'single_dim' ||
+ !Array.isArray(dimensionSuggestions) ||
+ !dimensionSuggestions.length
+ ) {
+ return s;
+ }
+
+ return deepSet(
+ s,
+ 'spec.tuningConfig.partitionsSpec.partitionDimension',
+ dimensionSuggestions[0],
+ );
},
},
// partitionsSpec type: dynamic
{
- name: 'partitionsSpec.maxRowsPerSegment',
- label: 'Max rows per segment',
+ name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
type: 'number',
defaultValue: 5000000,
- defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'dynamic',
+ defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
info: <>Determines how many rows are in each segment.>,
},
{
- name: 'partitionsSpec.maxTotalRows',
- label: 'Max total rows',
+ name: 'spec.tuningConfig.partitionsSpec.maxTotalRows',
type: 'number',
defaultValue: 20000000,
- defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'dynamic',
+ defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
info: <>Total number of rows in segments waiting for being pushed.>,
},
// partitionsSpec type: hashed
{
- name: 'partitionsSpec.targetRowsPerSegment',
- label: 'Target rows per segment',
+ name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
type: 'number',
zeroMeansUndefined: true,
defaultValue: 5000000,
- defined: (t: TuningConfig) =>
- deepGet(t, 'partitionsSpec.type') === 'hashed' &&
- !deepGet(t, 'partitionsSpec.numShards'),
+ defined: s =>
+ deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' &&
+ !deepGet(s, 'spec.tuningConfig.partitionsSpec.numShards'),
info: (
<>
@@ -1449,14 +1496,13 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.numShards', - label: 'Num shards', + name: 'spec.tuningConfig.partitionsSpec.numShards', type: 'number', zeroMeansUndefined: true, hideInMore: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'hashed' && - !deepGet(t, 'partitionsSpec.targetRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'), info: ( <>
@@ -1472,19 +1518,17 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.partitionDimensions', - label: 'Partition dimensions', + name: 'spec.tuningConfig.partitionsSpec.partitionDimensions', type: 'string-array', placeholder: '(all dimensions)', - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'hashed', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed', info:
The dimensions to partition on. Leave blank to select all dimensions.
, }, // partitionsSpec type: single_dim { - name: 'partitionsSpec.partitionDimension', - label: 'Partition dimension', + name: 'spec.tuningConfig.partitionsSpec.partitionDimension', type: 'string', - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim', required: true, suggestions: dimensionSuggestions, info: ( @@ -1501,16 +1545,15 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.targetRowsPerSegment', - label: 'Target rows per segment', + name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'single_dim' && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), - required: (t: TuningConfig) => - !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), + required: s => + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), info: (Target number of rows to include in a partition, should be a number that targets @@ -1519,24 +1562,23 @@ export function getPartitionRelatedTuningSpecFormFields( ), }, { - name: 'partitionsSpec.maxRowsPerSegment', - label: 'Max rows per segment', + name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment', type: 'number', zeroMeansUndefined: true, - defined: (t: TuningConfig) => - deepGet(t, 'partitionsSpec.type') === 'single_dim' && - !deepGet(t, 'partitionsSpec.targetRowsPerSegment'), - required: (t: TuningConfig) => - !deepGet(t, 'partitionsSpec.targetRowsPerSegment') && - !deepGet(t, 'partitionsSpec.maxRowsPerSegment'), + defined: s => + deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim' && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'), + required: s => + !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') && + !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'), info:
Maximum number of rows to include in a partition.
, }, { - name: 'partitionsSpec.assumeGrouped', - label: 'Assume grouped', + name: 'spec.tuningConfig.partitionsSpec.assumeGrouped', type: 'boolean', defaultValue: false, - defined: (t: TuningConfig) => deepGet(t, 'partitionsSpec.type') === 'single_dim', + hideInMore: true, + defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim', info: (
Assume that input data has already been grouped on time and dimensions. Ingestion will
@@ -1550,13 +1592,13 @@ export function getPartitionRelatedTuningSpecFormFields(
case 'kinesis':
return [
{
- name: 'maxRowsPerSegment',
+ name: 'spec.tuningConfig.maxRowsPerSegment',
type: 'number',
defaultValue: 5000000,
info: <>Determines how many rows are in each segment.>,
},
{
- name: 'maxTotalRows',
+ name: 'spec.tuningConfig.maxTotalRows',
type: 'number',
defaultValue: 20000000,
info: <>Total number of rows in segments waiting for being pushed.>,
@@ -1567,13 +1609,13 @@ export function getPartitionRelatedTuningSpecFormFields(
throw new Error(`unknown spec type ${specType}`);
}
-const TUNING_CONFIG_FORM_FIELDS: Field
- Please specify where your raw data is located
-
+ Please specify where your raw data is located
+
- Druid ingests raw data and converts it into a custom,{' '}
- To get started, please paste some data in the box to the left. Click "Apply" to verify your data with Druid. To get started, please specify what data you want to ingest.
+ Druid ingests raw data and converts it into a custom,{' '}
+ To get started, please paste some data in the box to the left. Click "Apply" to verify your data with Druid. To get started, please specify what data you want to ingest.
- Druid requires flat data (non-nested, non-hierarchical). Each row should represent a
- discrete event.
-
- If you have nested data, you can{' '}
- Ensure that your data appears correctly in a row/column orientation.
+ If you have nested data, you can{' '}
+ Ensure that your data appears correctly in a row/column orientation.
- Druid partitions data based on the primary time column of your data. This column is stored
- internally in Druid as Configure how to define the time column for this data.
- If your data does not have a time column, you can select
+ Druid partitions data based on the primary time column of your data. This column is stored
+ internally in Druid as Configure how to define the time column for this data.
+ If your data does not have a time column, you can select
- Druid can perform per-row{' '}
-
+ Druid can perform per-row{' '}
+
- Druid can filter out unwanted data by applying per-row{' '}
-
+ Druid can filter out unwanted data by applying per-row{' '}
+
- Each column in Druid must have an assigned type (string, long, float, double, complex, etc).
-
- Default primitive types have been automatically assigned to your columns. If you want to
- change the type, click on the column header.
+ Each column in Druid must have an assigned type (string, long, float, double, complex,
+ etc).
+ Default primitive types have been automatically assigned to your columns. If you want to
+ change the type, click on the column header.
+ Configure how Druid will partition data. Configure how Druid will partition data. Fine tune how Druid will ingest data. Fine tune how Druid will ingest data. Configure behavior of indexed data once it reaches Druid. Configure behavior of indexed data once it reaches Druid.
- Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any values
- in this view, the values entered in previous sections will update accordingly. If you modify
- any values in previous sections, this spec will automatically update.
- Submit the spec to begin loading data into Druid.
+ Druid begins ingesting data once you submit a JSON ingestion spec. If you modify any
+ values in this view, the values entered in previous sections will update accordingly. If
+ you modify any values in previous sections, this spec will automatically update.
+ Submit the spec to begin loading data into Druid. Your partitioning and sorting configuration does not make sense.
- For best performance the first dimension in your schema (
-
- Your partitioning and sorting configuration does not make sense.
+ For best performance the first dimension in your schema (
+
+
-
+ jute.maxbuffer) and the max packet size in MySQL (
+ max_allowed_packet). These can make ingestion tasks fail if the serialized
+ ingestion spec size hits one of them.
+ >
+ ),
+ },
+ {
+ name: 'spec.tuningConfig.chatHandlerTimeout',
type: 'duration',
defaultValue: 'PT10S',
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Timeout for reporting the pushed segments in worker tasks.>,
},
{
- name: 'chatHandlerNumRetries',
+ name: 'spec.tuningConfig.chatHandlerNumRetries',
type: 'number',
defaultValue: 5,
- defined: (t: TuningConfig) => t.type === 'index_parallel',
+ defined: s => s.type === 'index_parallel',
hideInMore: true,
info: <>Retries for reporting the pushed segments in worker tasks.>,
},
{
- name: 'workerThreads',
+ name: 'spec.tuningConfig.workerThreads',
type: 'number',
placeholder: 'min(10, taskCount)',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
info: (
<>The number of threads that will be used by the supervisor for asynchronous operations.>
),
},
{
- name: 'chatThreads',
+ name: 'spec.tuningConfig.chatThreads',
type: 'number',
placeholder: 'min(10, taskCount * replicas)',
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: <>The number of threads that will be used for communicating with indexing tasks.>,
},
{
- name: 'chatRetries',
+ name: 'spec.tuningConfig.chatRetries',
type: 'number',
defaultValue: 8,
- defined: (t: TuningConfig) => oneOf(t.type, 'kafka', 'kinesis'),
+ defined: s => oneOf(s.type, 'kafka', 'kinesis'),
hideInMore: true,
info: (
<>
@@ -1805,17 +1883,17 @@ const TUNING_CONFIG_FORM_FIELDS: Field__time.
- None to use a
- placeholder value. If the time information is spread across multiple columns you can combine
- them into one by selecting Expression and defining a transform expression.
- __time.
+ None to use a
+ placeholder value. If the time information is spread across multiple columns you can
+ combine them into one by selecting Expression and defining a transform
+ expression.
+ {firstDimensionName}), which is what the data will be primarily sorted on,
- should match the partitioning dimension ({partitionDimension}).
- {firstDimensionName}), which is what the data will be primarily sorted
+ on, should match the partitioning dimension ({partitionDimension}).
+ Primary partitioning (by time)
Secondary partitioning
General tuning
-