From 1eab31821e300a4d8e209192bcd2e19fdbe6a5ac Mon Sep 17 00:00:00 2001 From: Vadim Ogievetsky Date: Fri, 23 Aug 2024 14:55:10 -0700 Subject: [PATCH 1/9] no force time --- .../dimension-spec/dimension-spec.ts | 1 + .../query-context/query-context.tsx | 2 ++ web-console/src/helpers/spec-conversion.ts | 8 ++++++++ .../workbench-view/run-panel/run-panel.tsx | 19 +++++++++++++++++++ 4 files changed, 30 insertions(+) diff --git a/web-console/src/druid-models/dimension-spec/dimension-spec.ts b/web-console/src/druid-models/dimension-spec/dimension-spec.ts index f8d8f229dab8..5c818d0c2829 100644 --- a/web-console/src/druid-models/dimension-spec/dimension-spec.ts +++ b/web-console/src/druid-models/dimension-spec/dimension-spec.ts @@ -28,6 +28,7 @@ export interface DimensionsSpec { readonly spatialDimensions?: any[]; readonly includeAllDimensions?: boolean; readonly useSchemaDiscovery?: boolean; + readonly forceSegmentSortByTime?: boolean; } export interface DimensionSpec { diff --git a/web-console/src/druid-models/query-context/query-context.tsx b/web-console/src/druid-models/query-context/query-context.tsx index 4b0367c25a67..ee4675775103 100644 --- a/web-console/src/druid-models/query-context/query-context.tsx +++ b/web-console/src/druid-models/query-context/query-context.tsx @@ -41,6 +41,7 @@ export interface QueryContext { failOnEmptyInsert?: boolean; waitUntilSegmentsLoad?: boolean; useConcurrentLocks?: boolean; + forceSegmentSortByTime?: boolean; [key: string]: any; } @@ -63,6 +64,7 @@ export const DEFAULT_SERVER_QUERY_CONTEXT: QueryContext = { failOnEmptyInsert: false, waitUntilSegmentsLoad: false, useConcurrentLocks: false, + forceSegmentSortByTime: true, }; export interface QueryWithContext { diff --git a/web-console/src/helpers/spec-conversion.ts b/web-console/src/helpers/spec-conversion.ts index 2c621f2466c2..00bc3d5f832c 100644 --- a/web-console/src/helpers/spec-conversion.ts +++ b/web-console/src/helpers/spec-conversion.ts @@ -86,6 +86,14 @@ export function convertSpecToSql(spec: any): QueryWithContext { context.arrayIngestMode = 'array'; } + const forceSegmentSortByTime = deepGet( + spec, + 'spec.dataSchema.dimensionsSpec.forceSegmentSortByTime', + ); + if (typeof forceSegmentSortByTime !== 'undefined') { + context.forceSegmentSortByTime = forceSegmentSortByTime; + } + const indexSpec = deepGet(spec, 'spec.tuningConfig.indexSpec'); if (indexSpec) { context.indexSpec = indexSpec; diff --git a/web-console/src/views/workbench-view/run-panel/run-panel.tsx b/web-console/src/views/workbench-view/run-panel/run-panel.tsx index e329f9d383bf..c2c047f36ee8 100644 --- a/web-console/src/views/workbench-view/run-panel/run-panel.tsx +++ b/web-console/src/views/workbench-view/run-panel/run-panel.tsx @@ -98,6 +98,8 @@ const DEFAULT_ENGINES_LABEL_FN = (engine: DruidEngine | undefined) => { }; }; +const EXPERIMENTAL_ICON = ; + export interface RunPanelProps extends Pick { query: WorkbenchQuery; @@ -169,6 +171,11 @@ export const RunPanel = React.memo(function RunPanel(props: RunPanelProps) { queryContext, defaultQueryContext, ); + const forceSegmentSortByTime = getQueryContextKey( + 'forceSegmentSortByTime', + queryContext, + defaultQueryContext, + ); const finalizeAggregations = queryContext.finalizeAggregations; const waitUntilSegmentsLoad = queryContext.waitUntilSegmentsLoad; const groupByEnableMultiValueUnnesting = queryContext.groupByEnableMultiValueUnnesting; @@ -365,9 +372,21 @@ export const RunPanel = React.memo(function RunPanel(props: RunPanelProps) { {effectiveEngine === 'sql-msq-task' ? ( <> + + changeQueryContext({ + ...queryContext, + forceSegmentSortByTime: !forceSegmentSortByTime, + }) + } + /> changeQueryContext({ ...queryContext, From 2e50534a8e34e02e7228bc817fefaa89c6318cf2 Mon Sep 17 00:00:00 2001 From: Vadim Ogievetsky Date: Mon, 26 Aug 2024 20:34:57 -0700 Subject: [PATCH 2/9] time UI --- .../dimension-spec/dimension-spec.ts | 11 +- .../ingestion-spec/ingestion-spec.spec.ts | 84 ++++++++++ .../ingestion-spec/ingestion-spec.tsx | 18 +++ web-console/src/utils/sampler.ts | 12 +- .../views/load-data-view/load-data-view.tsx | 143 +++++++++++++++--- .../schema-table/schema-table.tsx | 4 +- .../max-tasks-button/max-tasks-button.tsx | 31 +++- 7 files changed, 270 insertions(+), 33 deletions(-) diff --git a/web-console/src/druid-models/dimension-spec/dimension-spec.ts b/web-console/src/druid-models/dimension-spec/dimension-spec.ts index 5c818d0c2829..c0dcad2fa4d7 100644 --- a/web-console/src/druid-models/dimension-spec/dimension-spec.ts +++ b/web-console/src/druid-models/dimension-spec/dimension-spec.ts @@ -18,9 +18,10 @@ import type { Field } from '../../components'; import { filterMap, typeIsKnown } from '../../utils'; -import type { SampleResponse } from '../../utils/sampler'; +import type { SampleResponse, TimeColumnAction } from '../../utils/sampler'; import { getHeaderNamesFromSampleResponse } from '../../utils/sampler'; import { guessColumnTypeFromSampleResponse } from '../ingestion-spec/ingestion-spec'; +import { TIME_COLUMN } from '../timestamp-spec/timestamp-spec'; export interface DimensionsSpec { readonly dimensions?: (string | DimensionSpec)[]; @@ -62,6 +63,7 @@ export const DIMENSION_SPEC_FIELDS: Field[] = [ type: 'string', required: true, suggestions: KNOWN_TYPES, + disabled: d => d.name === TIME_COLUMN, }, { name: 'createBitmapIndex', @@ -164,8 +166,13 @@ export function getDimensionSpecs( guessNumericStringsAsNumbers: boolean, forceMvdInsteadOfArray: boolean, hasRollup: boolean, + timeColumnAction: TimeColumnAction, ): (string | DimensionSpec)[] { - return filterMap(getHeaderNamesFromSampleResponse(sampleResponse, 'ignore'), h => { + return filterMap(getHeaderNamesFromSampleResponse(sampleResponse, timeColumnAction), h => { + if (h === TIME_COLUMN) { + return { type: 'long', name: h }; + } + const columnTypeHint = columnTypeHints[h]; const guessedColumnType = guessColumnTypeFromSampleResponse( sampleResponse, diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts index d58e8c077924..3eb1afc804a6 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts @@ -22,6 +22,7 @@ import type { IngestionSpec } from './ingestion-spec'; import { adjustId, cleanSpec, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, guessColumnTypeFromInput, guessColumnTypeFromSampleResponse, guessKafkaInputFormat, @@ -857,10 +858,90 @@ describe('spec utils', () => { }); describe('updateSchemaWithSample', () => { + it('works with when not forcing time, arrays', () => { + const updateSpec = updateSchemaWithSample( + ingestionSpec, + JSON_SAMPLE, + false, + 'fixed', + 'arrays', + true, + ); + expect(updateSpec.spec).toMatchInlineSnapshot(` + { + "dataSchema": { + "dataSource": "wikipedia", + "dimensionsSpec": { + "dimensions": [ + "user", + "id", + { + "castToType": "ARRAY", + "name": "tags", + "type": "auto", + }, + { + "castToType": "ARRAY", + "name": "nums", + "type": "auto", + }, + ], + "forceSegmentSortByTime": false, + }, + "granularitySpec": { + "queryGranularity": "hour", + "rollup": true, + "segmentGranularity": "day", + }, + "metricsSpec": [ + { + "name": "count", + "type": "count", + }, + { + "fieldName": "followers", + "name": "sum_followers", + "type": "longSum", + }, + { + "fieldName": "spend", + "name": "sum_spend", + "type": "doubleSum", + }, + ], + "timestampSpec": { + "column": "timestamp", + "format": "iso", + }, + }, + "ioConfig": { + "inputFormat": { + "type": "json", + }, + "inputSource": { + "type": "http", + "uris": [ + "https://website.com/wikipedia.json.gz", + ], + }, + "type": "index_parallel", + }, + "tuningConfig": { + "forceGuaranteedRollup": true, + "partitionsSpec": { + "type": "hashed", + }, + "type": "index_parallel", + }, + } + `); + }); + it('works with rollup, arrays', () => { const updateSpec = updateSchemaWithSample( ingestionSpec, JSON_SAMPLE, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, 'fixed', 'arrays', true, @@ -938,6 +1019,7 @@ describe('spec utils', () => { const updateSpec = updateSchemaWithSample( ingestionSpec, JSON_SAMPLE, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, 'fixed', 'multi-values', true, @@ -1015,6 +1097,7 @@ describe('spec utils', () => { const updatedSpec = updateSchemaWithSample( ingestionSpec, JSON_SAMPLE, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, 'fixed', 'arrays', false, @@ -1083,6 +1166,7 @@ describe('spec utils', () => { const updatedSpec = updateSchemaWithSample( ingestionSpec, JSON_SAMPLE, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, 'fixed', 'multi-values', false, diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx index 3a7f0ae56748..03898732bb00 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx @@ -295,6 +295,13 @@ export type SchemaMode = 'fixed' | 'string-only-discovery' | 'type-aware-discove export type ArrayMode = 'arrays' | 'multi-values'; +export const DEFAULT_FORCE_SEGMENT_SORT_BY_TIME = true; +export function getForceSegmentSortByTime(spec: Partial): boolean { + return ( + deepGet(spec, 'spec.dataSchema.dimensionsSpec.forceSegmentSortByTime') ?? + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME + ); +} export function getSchemaMode(spec: Partial): SchemaMode { if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery') === true) { return 'type-aware-discovery'; @@ -2744,6 +2751,7 @@ function getColumnTypeHintsFromSpec(spec: Partial): Record, sampleResponse: SampleResponse, + forceSegmentSortByTime: boolean, schemaMode: SchemaMode, arrayMode: ArrayMode, rollup: boolean, @@ -2756,6 +2764,15 @@ export function updateSchemaWithSample( let newSpec = spec; + newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.forceSegmentSortByTime'); + if (forceSegmentSortByTime !== DEFAULT_FORCE_SEGMENT_SORT_BY_TIME) { + newSpec = deepSet( + newSpec, + 'spec.dataSchema.dimensionsSpec.forceSegmentSortByTime', + forceSegmentSortByTime, + ); + } + switch (schemaMode) { case 'type-aware-discovery': newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true); @@ -2784,6 +2801,7 @@ export function updateSchemaWithSample( guessNumericStringsAsNumbers, arrayMode === 'multi-values', rollup, + forceSegmentSortByTime ?? DEFAULT_FORCE_SEGMENT_SORT_BY_TIME ? 'ignore' : 'preserve', ), ); break; diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts index cc9ae32b8aef..f74acf024c9e 100644 --- a/web-console/src/utils/sampler.ts +++ b/web-console/src/utils/sampler.ts @@ -73,16 +73,18 @@ export interface SampleResponse { numRowsRead: number; } +export type TimeColumnAction = 'preserve' | 'ignore' | 'ignoreIfZero'; + export function getHeaderNamesFromSampleResponse( sampleResponse: SampleResponse, - timeColumnAction: 'preserve' | 'ignore' | 'ignoreIfZero' = 'preserve', + timeColumnAction: TimeColumnAction = 'preserve', ): string[] { return getHeaderFromSampleResponse(sampleResponse, timeColumnAction).map(s => s.name); } export function getHeaderFromSampleResponse( sampleResponse: SampleResponse, - timeColumnAction: 'preserve' | 'ignore' | 'ignoreIfZero' = 'preserve', + timeColumnAction: TimeColumnAction = 'preserve', ): { name: string; type: string }[] { const ignoreTimeColumn = timeColumnAction === 'ignore' || @@ -462,13 +464,17 @@ export async function sampleForTimestamp( export async function sampleForTransform( spec: Partial, cacheRows: CacheRows, + forceSegmentSortByTime: boolean, ): Promise { const samplerType = getSpecType(spec); const timestampSpec: TimestampSpec = deepGet(spec, 'spec.dataSchema.timestampSpec'); const transforms: Transform[] = deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || []; // Extra step to simulate auto-detecting dimension with transforms - let specialDimensionSpec: DimensionsSpec = { useSchemaDiscovery: true }; + let specialDimensionSpec: DimensionsSpec = { + useSchemaDiscovery: true, + forceSegmentSortByTime, + }; if (transforms && transforms.length) { const sampleSpecHack: SampleSpec = { type: samplerType, diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index 968b53888c62..42cfa25a7b70 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -83,6 +83,7 @@ import { computeFlattenPathsForData, CONSTANT_TIMESTAMP_SPEC, CONSTANT_TIMESTAMP_SPEC_FIELDS, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, DIMENSION_SPEC_FIELDS, fillDataSourceNameIfNeeded, fillInputFormatIfNeeded, @@ -92,6 +93,7 @@ import { getArrayMode, getDimensionSpecName, getFlattenSpec, + getForceSegmentSortByTime, getIngestionComboType, getIngestionImage, getIngestionTitle, @@ -311,7 +313,14 @@ function initializeSchemaWithSampleIfNeeded( sample: SampleResponse, ): Partial { if (deepGet(spec, 'spec.dataSchema.dimensionsSpec')) return spec; - return updateSchemaWithSample(spec, sample, 'fixed', 'multi-values', getRollup(spec, false)); + return updateSchemaWithSample( + spec, + sample, + DEFAULT_FORCE_SEGMENT_SORT_BY_TIME, + 'fixed', + 'multi-values', + getRollup(spec, false), + ); } type Step = @@ -394,6 +403,7 @@ export interface LoadDataViewState { continueToSpec: boolean; showResetConfirm: boolean; newRollup?: boolean; + newForceSegmentSortByTime?: boolean; newSchemaMode?: SchemaMode; newArrayMode?: ArrayMode; @@ -1965,7 +1975,11 @@ export class LoadDataView extends React.PureComponent ({ transformQueryState: new QueryState({ @@ -2359,6 +2373,7 @@ export class LoadDataView extends React.PureComponent {!somethingSelected && ( <> + +

+ When set to true (the default), segments created by the ingestion job are + sorted by {'{__time, dimensions[0], dimensions[1], ...}'}. When + set to false, segments created by the ingestion job are sorted by{' '} + {'{dimensions[0], dimensions[1], ...}'}. To include{' '} + __time in the sort order when this parameter is set to{' '} + false, you must include a dimension named __time{' '} + with type long explicitly in the `dimensions` list. +

+

+ Setting this to `false` is an experimental feature; see + + Sorting + {' '} + for details. +

+ + } + > + + this.setState({ + newForceSegmentSortByTime: !forceSegmentSortByTime, + }) + } + label="Force segment sort by time" + /> +
-