From eeb69883a2bc972e4a7a8a1d3835bc58554b3113 Mon Sep 17 00:00:00 2001 From: Vadim Ogievetsky Date: Tue, 31 Jan 2023 14:53:14 -0800 Subject: [PATCH 01/13] use new sampler features --- .../dimension-spec/dimension-spec.spec.ts | 85 - .../dimension-spec/dimension-spec.ts | 14 +- .../ingestion-spec/ingestion-spec.spec.ts | 300 +- .../ingestion-spec/ingestion-spec.tsx | 43 +- .../metric-spec/metric-spec.spec.ts | 6 +- .../druid-models/metric-spec/metric-spec.tsx | 11 +- web-console/src/helpers/spec-conversion.ts | 6 +- web-console/src/utils/sampler.mock.ts | 135 +- web-console/src/utils/sampler.spec.ts | 133 + web-console/src/utils/sampler.ts | 129 +- web-console/src/utils/utils.spec.ts | 16 +- .../__snapshots__/filter-table.spec.tsx.snap | 3753 ++++++++++++++-- .../filter-table/filter-table.spec.tsx | 14 +- .../filter-table/filter-table.tsx | 18 +- .../views/load-data-view/load-data-view.tsx | 120 +- .../parse-data-table.spec.tsx.snap | 3155 ++++++++++++-- .../parse-data-table.spec.tsx | 14 +- .../parse-data-table/parse-data-table.tsx | 80 +- .../parse-time-table.spec.tsx.snap | 3764 ++++++++++++++-- .../parse-time-table.spec.tsx | 13 +- .../parse-time-table/parse-time-table.tsx | 129 +- .../__snapshots__/schema-table.spec.tsx.snap | 3769 +++++++++++++++-- .../schema-table/schema-table.spec.tsx | 15 +- .../schema-table/schema-table.tsx | 18 +- .../transform-table.spec.tsx.snap | 3753 ++++++++++++++-- .../transform-table/transform-table.spec.tsx | 14 +- .../transform-table/transform-table.tsx | 22 +- .../input-format-step/input-format-step.tsx | 67 +- 28 files changed, 16875 insertions(+), 2721 deletions(-) delete mode 100644 web-console/src/druid-models/dimension-spec/dimension-spec.spec.ts create mode 100644 web-console/src/utils/sampler.spec.ts diff --git a/web-console/src/druid-models/dimension-spec/dimension-spec.spec.ts b/web-console/src/druid-models/dimension-spec/dimension-spec.spec.ts deleted file mode 100644 index 8d68d41df0c0..000000000000 --- a/web-console/src/druid-models/dimension-spec/dimension-spec.spec.ts +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { CSV_SAMPLE, JSON_SAMPLE } from '../../utils/sampler.mock'; - -import { getDimensionSpecs } from './dimension-spec'; - -describe('dimension-spec', () => { - describe('getDimensionSpecs', () => { - it('works for empty', () => { - expect(getDimensionSpecs({ header: ['header'], rows: [] }, {}, false, true)).toEqual([ - 'header', - ]); - }); - - it('works with json', () => { - expect(getDimensionSpecs(JSON_SAMPLE, {}, false, false)).toEqual([ - 'timestamp', - 'user', - { - name: 'followers', - type: 'long', - }, - { - name: 'spend', - type: 'double', - }, - 'id', - 'tags', - 'nums', - ]); - - expect(getDimensionSpecs(JSON_SAMPLE, {}, false, true)).toEqual([ - 'timestamp', - 'user', - 'id', - 'tags', - 'nums', - ]); - }); - - it('works with csv', () => { - expect(getDimensionSpecs(CSV_SAMPLE, {}, true, false)).toEqual([ - 'timestamp', - 'user', - { - name: 'followers', - type: 'long', - }, - { - name: 'spend', - type: 'double', - }, - { - name: 'id', - type: 'long', - }, - 'tags', - 'nums', - ]); - - expect(getDimensionSpecs(CSV_SAMPLE, {}, true, true)).toEqual([ - 'timestamp', - 'user', - 'tags', - 'nums', - ]); - }); - }); -}); diff --git a/web-console/src/druid-models/dimension-spec/dimension-spec.ts b/web-console/src/druid-models/dimension-spec/dimension-spec.ts index d679dc91d3ea..9cb40c8e0c66 100644 --- a/web-console/src/druid-models/dimension-spec/dimension-spec.ts +++ b/web-console/src/druid-models/dimension-spec/dimension-spec.ts @@ -18,13 +18,16 @@ import type { Field } from '../../components'; import { filterMap, typeIs } from '../../utils'; -import type { SampleHeaderAndRows } from '../../utils/sampler'; -import { guessColumnTypeFromHeaderAndRows } from '../ingestion-spec/ingestion-spec'; +import type { SampleResponse } from '../../utils/sampler'; +import { getHeaderNamesFromSampleResponse } from '../../utils/sampler'; +import { guessColumnTypeFromSampleResponse } from '../ingestion-spec/ingestion-spec'; export interface DimensionsSpec { readonly dimensions?: (string | DimensionSpec)[]; readonly dimensionExclusions?: string[]; readonly spatialDimensions?: any[]; + readonly includeAllDimensions?: boolean; + readonly useSchemaDiscovery?: boolean; } export interface DimensionSpec { @@ -77,16 +80,15 @@ export function inflateDimensionSpec(dimensionSpec: string | DimensionSpec): Dim } export function getDimensionSpecs( - headerAndRows: SampleHeaderAndRows, + sampleResponse: SampleResponse, typeHints: Record, guessNumericStringsAsNumbers: boolean, hasRollup: boolean, ): (string | DimensionSpec)[] { - return filterMap(headerAndRows.header, h => { - if (h === '__time') return; + return filterMap(getHeaderNamesFromSampleResponse(sampleResponse, true), h => { const type = typeHints[h] || - guessColumnTypeFromHeaderAndRows(headerAndRows, h, guessNumericStringsAsNumbers); + guessColumnTypeFromSampleResponse(sampleResponse, h, guessNumericStringsAsNumbers); if (type === 'string') return h; if (hasRollup) return; return { diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts index 91bf75f35b57..69353125e083 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts @@ -18,14 +18,12 @@ import { CSV_SAMPLE } from '../../utils/sampler.mock'; -import type { IngestionSpec } from './ingestion-spec'; import { adjustId, cleanSpec, - guessColumnTypeFromHeaderAndRows, guessColumnTypeFromInput, + guessColumnTypeFromSampleResponse, guessInputFormat, - updateSchemaWithSample, upgradeSpec, } from './ingestion-spec'; @@ -672,36 +670,36 @@ describe('ingestion-spec', () => { }); describe('spec utils', () => { - const ingestionSpec: IngestionSpec = { - type: 'index_parallel', - spec: { - ioConfig: { - type: 'index_parallel', - inputSource: { - type: 'http', - uris: ['https://website.com/wikipedia.json.gz'], - }, - inputFormat: { - type: 'json', - }, - }, - tuningConfig: { - type: 'index_parallel', - }, - dataSchema: { - dataSource: 'wikipedia', - granularitySpec: { - segmentGranularity: 'day', - queryGranularity: 'hour', - }, - timestampSpec: { - column: 'timestamp', - format: 'iso', - }, - dimensionsSpec: {}, - }, - }, - }; + // const ingestionSpec: IngestionSpec = { + // type: 'index_parallel', + // spec: { + // ioConfig: { + // type: 'index_parallel', + // inputSource: { + // type: 'http', + // uris: ['https://website.com/wikipedia.json.gz'], + // }, + // inputFormat: { + // type: 'json', + // }, + // }, + // tuningConfig: { + // type: 'index_parallel', + // }, + // dataSchema: { + // dataSource: 'wikipedia', + // granularitySpec: { + // segmentGranularity: 'day', + // queryGranularity: 'hour', + // }, + // timestampSpec: { + // column: 'timestamp', + // format: 'iso', + // }, + // dimensionsSpec: {}, + // }, + // }, + // }; describe('guessColumnTypeFromInput', () => { it('works for empty', () => { @@ -745,131 +743,125 @@ describe('spec utils', () => { }); }); - describe('guessColumnTypeFromHeaderAndRows', () => { - it('works in empty dataset', () => { - expect(guessColumnTypeFromHeaderAndRows({ header: ['c0'], rows: [] }, 'c0', false)).toEqual( - 'string', - ); - }); - + describe('guessColumnTypeFromSampleResponse', () => { it('works for generic dataset', () => { - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'user', false)).toEqual('string'); - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'followers', false)).toEqual('string'); - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'followers', true)).toEqual('long'); - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'spend', true)).toEqual('double'); - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'nums', false)).toEqual('string'); - expect(guessColumnTypeFromHeaderAndRows(CSV_SAMPLE, 'nums', true)).toEqual('string'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'user', false)).toEqual('string'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'followers', false)).toEqual('string'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'followers', true)).toEqual('long'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'spend', true)).toEqual('double'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'nums', false)).toEqual('string'); + expect(guessColumnTypeFromSampleResponse(CSV_SAMPLE, 'nums', true)).toEqual('string'); }); }); - it('updateSchemaWithSample', () => { - const withRollup = updateSchemaWithSample( - ingestionSpec, - { header: ['header'], rows: [] }, - 'specific', - true, - ); - - expect(withRollup).toMatchInlineSnapshot(` - Object { - "spec": Object { - "dataSchema": Object { - "dataSource": "wikipedia", - "dimensionsSpec": Object { - "dimensions": Array [ - "header", - ], - }, - "granularitySpec": Object { - "queryGranularity": "hour", - "rollup": true, - "segmentGranularity": "day", - }, - "metricsSpec": Array [ - Object { - "name": "count", - "type": "count", - }, - ], - "timestampSpec": Object { - "column": "timestamp", - "format": "iso", - }, - }, - "ioConfig": Object { - "inputFormat": Object { - "type": "json", - }, - "inputSource": Object { - "type": "http", - "uris": Array [ - "https://website.com/wikipedia.json.gz", - ], - }, - "type": "index_parallel", - }, - "tuningConfig": Object { - "forceGuaranteedRollup": true, - "partitionsSpec": Object { - "type": "hashed", - }, - "type": "index_parallel", - }, - }, - "type": "index_parallel", - } - `); - - const noRollup = updateSchemaWithSample( - ingestionSpec, - { header: ['header'], rows: [] }, - 'specific', - false, - ); - - expect(noRollup).toMatchInlineSnapshot(` - Object { - "spec": Object { - "dataSchema": Object { - "dataSource": "wikipedia", - "dimensionsSpec": Object { - "dimensions": Array [ - "header", - ], - }, - "granularitySpec": Object { - "queryGranularity": "none", - "rollup": false, - "segmentGranularity": "day", - }, - "timestampSpec": Object { - "column": "timestamp", - "format": "iso", - }, - }, - "ioConfig": Object { - "inputFormat": Object { - "type": "json", - }, - "inputSource": Object { - "type": "http", - "uris": Array [ - "https://website.com/wikipedia.json.gz", - ], - }, - "type": "index_parallel", - }, - "tuningConfig": Object { - "partitionsSpec": Object { - "type": "dynamic", - }, - "type": "index_parallel", - }, - }, - "type": "index_parallel", - } - `); - }); + // it('updateSchemaWithSample', () => { + // const withRollup = updateSchemaWithSample( + // ingestionSpec, + // { header: ['header'], rows: [] }, + // 'specific', + // true, + // ); + // + // expect(withRollup).toMatchInlineSnapshot(` + // Object { + // "spec": Object { + // "dataSchema": Object { + // "dataSource": "wikipedia", + // "dimensionsSpec": Object { + // "dimensions": Array [ + // "header", + // ], + // }, + // "granularitySpec": Object { + // "queryGranularity": "hour", + // "rollup": true, + // "segmentGranularity": "day", + // }, + // "metricsSpec": Array [ + // Object { + // "name": "count", + // "type": "count", + // }, + // ], + // "timestampSpec": Object { + // "column": "timestamp", + // "format": "iso", + // }, + // }, + // "ioConfig": Object { + // "inputFormat": Object { + // "type": "json", + // }, + // "inputSource": Object { + // "type": "http", + // "uris": Array [ + // "https://website.com/wikipedia.json.gz", + // ], + // }, + // "type": "index_parallel", + // }, + // "tuningConfig": Object { + // "forceGuaranteedRollup": true, + // "partitionsSpec": Object { + // "type": "hashed", + // }, + // "type": "index_parallel", + // }, + // }, + // "type": "index_parallel", + // } + // `); + // + // const noRollup = updateSchemaWithSample( + // ingestionSpec, + // { header: ['header'], rows: [] }, + // 'specific', + // false, + // ); + // + // expect(noRollup).toMatchInlineSnapshot(` + // Object { + // "spec": Object { + // "dataSchema": Object { + // "dataSource": "wikipedia", + // "dimensionsSpec": Object { + // "dimensions": Array [ + // "header", + // ], + // }, + // "granularitySpec": Object { + // "queryGranularity": "none", + // "rollup": false, + // "segmentGranularity": "day", + // }, + // "timestampSpec": Object { + // "column": "timestamp", + // "format": "iso", + // }, + // }, + // "ioConfig": Object { + // "inputFormat": Object { + // "type": "json", + // }, + // "inputSource": Object { + // "type": "http", + // "uris": Array [ + // "https://website.com/wikipedia.json.gz", + // ], + // }, + // "type": "index_parallel", + // }, + // "tuningConfig": Object { + // "partitionsSpec": Object { + // "type": "dynamic", + // }, + // "type": "index_parallel", + // }, + // }, + // "type": "index_parallel", + // } + // `); + // }); it('adjustId', () => { expect(adjustId('')).toEqual(''); diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx index 9f3eb072b3cd..3bcbbfb2d0b0 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx @@ -39,7 +39,7 @@ import { parseCsvLine, typeIs, } from '../../utils'; -import type { SampleHeaderAndRows } from '../../utils/sampler'; +import type { SampleResponse } from '../../utils/sampler'; import type { DimensionsSpec } from '../dimension-spec/dimension-spec'; import { getDimensionSpecName, @@ -269,6 +269,9 @@ export interface DataSchema { export type DimensionMode = 'specific' | 'auto-detect'; export function getDimensionMode(spec: Partial): DimensionMode { + if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery') === true) { + return 'auto-detect'; + } const dimensions = deepGet(spec, 'spec.dataSchema.dimensionsSpec.dimensions') || EMPTY_ARRAY; return Array.isArray(dimensions) && dimensions.length === 0 ? 'auto-detect' : 'specific'; } @@ -2182,8 +2185,8 @@ function noNumbers(xs: string[]): boolean { return xs.every(x => isNaN(Number(x))); } -export function guessInputFormat(sampleData: string[], canBeMultiLineJson = false): InputFormat { - let sampleDatum = sampleData[0]; +export function guessInputFormat(sampleRaw: string[], canBeMultiLineJson = false): InputFormat { + let sampleDatum = sampleRaw[0]; if (sampleDatum) { sampleDatum = String(sampleDatum); // Really ensure it is a string @@ -2319,11 +2322,11 @@ function inputFormatFromType(options: InputFormatFromTypeOptions): InputFormat { // ------------------------ -export function guessIsArrayFromHeaderAndRows( - headerAndRows: SampleHeaderAndRows, +export function guessIsArrayFromSampleResponse( + sampleResponse: SampleResponse, column: string, ): boolean { - return headerAndRows.rows.some(r => isSimpleArray(r.input?.[column])); + return sampleResponse.data.some(r => isSimpleArray(r.input?.[column])); } export function guessColumnTypeFromInput( @@ -2355,13 +2358,13 @@ export function guessColumnTypeFromInput( } } -export function guessColumnTypeFromHeaderAndRows( - headerAndRows: SampleHeaderAndRows, +export function guessColumnTypeFromSampleResponse( + sampleResponse: SampleResponse, column: string, guessNumericStringsAsNumbers: boolean, ): string { return guessColumnTypeFromInput( - filterMap(headerAndRows.rows, r => r.input?.[column]), + filterMap(sampleResponse.data, r => r.input?.[column]), guessNumericStringsAsNumbers, ); } @@ -2391,7 +2394,7 @@ function getTypeHintsFromSpec(spec: Partial): Record, - headerAndRows: SampleHeaderAndRows, + sampleResponse: SampleResponse, dimensionMode: DimensionMode, rollup: boolean, forcePartitionInitialization = false, @@ -2404,26 +2407,24 @@ export function updateSchemaWithSample( let newSpec = spec; if (dimensionMode === 'auto-detect') { - newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions'); + newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true); + newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions', true); newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []); } else { + newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery'); + newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions'); newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions'); - - const dimensions = getDimensionSpecs( - headerAndRows, - typeHints, - guessNumericStringsAsNumbers, - rollup, + newSpec = deepSet( + newSpec, + 'spec.dataSchema.dimensionsSpec.dimensions', + getDimensionSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers, rollup), ); - if (dimensions) { - newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions', dimensions); - } } if (rollup) { newSpec = deepSet(newSpec, 'spec.dataSchema.granularitySpec.queryGranularity', 'hour'); - const metrics = getMetricSpecs(headerAndRows, typeHints, guessNumericStringsAsNumbers); + const metrics = getMetricSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers); if (metrics) { newSpec = deepSet(newSpec, 'spec.dataSchema.metricsSpec', metrics); } diff --git a/web-console/src/druid-models/metric-spec/metric-spec.spec.ts b/web-console/src/druid-models/metric-spec/metric-spec.spec.ts index e30b806344b5..3f6d47272a29 100644 --- a/web-console/src/druid-models/metric-spec/metric-spec.spec.ts +++ b/web-console/src/druid-models/metric-spec/metric-spec.spec.ts @@ -16,16 +16,14 @@ * limitations under the License. */ -import { JSON_SAMPLE } from '../../utils/sampler.mock'; +import { EMPTY_SAMPLE, JSON_SAMPLE } from '../../utils/sampler.mock'; import { getMetricSpecs } from './metric-spec'; describe('metric-spec', () => { describe('getMetricSecs', () => { it('works for empty', () => { - expect(getMetricSpecs({ header: ['header'], rows: [] }, {}, false)).toEqual([ - { name: 'count', type: 'count' }, - ]); + expect(getMetricSpecs(EMPTY_SAMPLE, {}, false)).toEqual([{ name: 'count', type: 'count' }]); }); it('works with json', () => { diff --git a/web-console/src/druid-models/metric-spec/metric-spec.tsx b/web-console/src/druid-models/metric-spec/metric-spec.tsx index 3eb0a24d780c..61acf08e66a3 100644 --- a/web-console/src/druid-models/metric-spec/metric-spec.tsx +++ b/web-console/src/druid-models/metric-spec/metric-spec.tsx @@ -23,8 +23,8 @@ import type { Field } from '../../components'; import { ExternalLink } from '../../components'; import { getLink } from '../../links'; import { filterMap, typeIs } from '../../utils'; -import type { SampleHeaderAndRows } from '../../utils/sampler'; -import { guessColumnTypeFromHeaderAndRows } from '../ingestion-spec/ingestion-spec'; +import type { SampleResponse } from '../../utils/sampler'; +import { guessColumnTypeFromSampleResponse } from '../ingestion-spec/ingestion-spec'; export interface MetricSpec { readonly type: string; @@ -388,16 +388,17 @@ export function getMetricSpecOutputType(metricSpec: MetricSpec): string | undefi } export function getMetricSpecs( - headerAndRows: SampleHeaderAndRows, + sampleResponse: SampleResponse, typeHints: Record, guessNumericStringsAsNumbers: boolean, ): MetricSpec[] { return [{ name: 'count', type: 'count' }].concat( - filterMap(headerAndRows.header, h => { + filterMap(sampleResponse.logicalSegmentSchema, s => { + const h = s.name; if (h === '__time') return; const type = typeHints[h] || - guessColumnTypeFromHeaderAndRows(headerAndRows, h, guessNumericStringsAsNumbers); + guessColumnTypeFromSampleResponse(sampleResponse, h, guessNumericStringsAsNumbers); switch (type) { case 'double': return { name: `sum_${h}`, type: 'doubleSum', fieldName: h }; diff --git a/web-console/src/helpers/spec-conversion.ts b/web-console/src/helpers/spec-conversion.ts index 8562bd1f68f1..990147b57ad7 100644 --- a/web-console/src/helpers/spec-conversion.ts +++ b/web-console/src/helpers/spec-conversion.ts @@ -36,7 +36,7 @@ import type { Transform, } from '../druid-models'; import { inflateDimensionSpec, upgradeSpec } from '../druid-models'; -import { deepGet, filterMap, oneOf } from '../utils'; +import { deepGet, filterMap, nonEmptyArray, oneOf } from '../utils'; export function getSpecDatasourceName(spec: IngestionSpec): string { return deepGet(spec, 'spec.dataSchema.dataSource') || 'unknown_datasource'; @@ -86,6 +86,10 @@ export function convertSpecToSql(spec: any): QueryWithContext { const rollup = deepGet(spec, 'spec.dataSchema.granularitySpec.rollup') ?? true; + if (nonEmptyArray(deepGet(spec, 'spec.dataSchema.dimensionsSpec.spatialDimensions'))) { + throw new Error(`spatialDimensions are not currently supported in SQL-based ingestion`); + } + const timestampSpec: TimestampSpec = deepGet(spec, 'spec.dataSchema.timestampSpec'); if (!timestampSpec) throw new Error(`spec.dataSchema.timestampSpec is not defined`); diff --git a/web-console/src/utils/sampler.mock.ts b/web-console/src/utils/sampler.mock.ts index a95b73a9457c..3cae4370f969 100644 --- a/web-console/src/utils/sampler.mock.ts +++ b/web-console/src/utils/sampler.mock.ts @@ -17,9 +17,18 @@ */ // Just to make sure we are in a test context. This line will cause trouble if this file is ever compiled into the main build -import type { SampleHeaderAndRows } from './sampler'; +import type { SampleResponse } from './sampler'; -expect(1).toEqual(1); +expect(1).toEqual(1); // Just to make sure this file does not get included in the build by accident + +export const EMPTY_SAMPLE: SampleResponse = { + numRowsRead: 0, + numRowsIndexed: 0, + logicalDimensions: [], + physicalDimensions: [], + logicalSegmentSchema: [{ name: '__time', type: 'LONG' }], + data: [], +}; /* This data is the returned sample when ingested with: @@ -29,9 +38,45 @@ This data is the returned sample when ingested with: {"timestamp":"2016-04-11T09:22:00Z","user":"Alice","followers":3,"spend":5.1,"id":"73534533","tags":["a","b"],"nums":[7,8]} */ -export const JSON_SAMPLE: SampleHeaderAndRows = { - header: ['timestamp', 'user', 'followers', 'spend', 'id', 'tags', 'nums'], - rows: [ +export const JSON_SAMPLE: SampleResponse = { + numRowsRead: 3, + numRowsIndexed: 3, + logicalDimensions: [ + { type: 'string', name: 'user', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { + type: 'long', + name: 'followers', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: false, + }, + { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'string', name: 'id', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + ], + physicalDimensions: [ + { type: 'json', name: 'user', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { + type: 'json', + name: 'followers', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'id', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + ], + logicalSegmentSchema: [ + { name: '__time', type: 'LONG' }, + { name: 'user', type: 'STRING' }, + { name: 'followers', type: 'LONG' }, + { name: 'spend', type: 'COMPLEX' }, + { name: 'id', type: 'STRING' }, + { name: 'tags', type: 'ARRAY' }, + { name: 'nums', type: 'ARRAY' }, + ], + data: [ { input: { timestamp: '2016-04-11T09:20:00Z', @@ -43,14 +88,13 @@ export const JSON_SAMPLE: SampleHeaderAndRows = { nums: [4], }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:20:00Z', + __time: 1460366400000, user: 'Alice', - followers: '10', - spend: '0', + followers: 10, + spend: 0, id: '12232323', tags: null, - nums: '4', + nums: [4], }, }, { @@ -64,14 +108,13 @@ export const JSON_SAMPLE: SampleHeaderAndRows = { nums: [5, 6], }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:21:00Z', + __time: 1460366460000, user: 'Bob', - followers: '0', - spend: '3', + followers: 0, + spend: 3, id: '45345634', - tags: 'a', - nums: ['5', '6'], + tags: ['a'], + nums: [5, 6], }, }, { @@ -85,14 +128,13 @@ export const JSON_SAMPLE: SampleHeaderAndRows = { nums: [7, 8], }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:22:00Z', + __time: 1460366520000, user: 'Alice', - followers: '3', - spend: '5.1', + followers: 3, + spend: 5.1, id: '73534533', tags: ['a', 'b'], - nums: ['7', '8'], + nums: [7, 8], }, }, ], @@ -119,9 +161,45 @@ SELECT FROM test_data */ -export const CSV_SAMPLE: SampleHeaderAndRows = { - header: ['timestamp', 'user', 'followers', 'spend', 'id', 'tags', 'nums'], - rows: [ +export const CSV_SAMPLE: SampleResponse = { + numRowsRead: 3, + numRowsIndexed: 3, + logicalDimensions: [ + { type: 'string', name: 'user', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { + type: 'string', + name: 'followers', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { type: 'string', name: 'spend', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'string', name: 'id', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + ], + physicalDimensions: [ + { type: 'json', name: 'user', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { + type: 'json', + name: 'followers', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { type: 'json', name: 'spend', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'id', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'tags', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + { type: 'json', name: 'nums', multiValueHandling: 'SORTED_ARRAY', createBitmapIndex: true }, + ], + logicalSegmentSchema: [ + { name: '__time', type: 'LONG' }, + { name: 'user', type: 'STRING' }, + { name: 'followers', type: 'STRING' }, + { name: 'spend', type: 'STRING' }, + { name: 'id', type: 'STRING' }, + { name: 'tags', type: 'COMPLEX' }, + { name: 'nums', type: 'COMPLEX' }, + ], + data: [ { input: { timestamp: '2016-04-11T09:20:00.000Z', @@ -133,8 +211,7 @@ export const CSV_SAMPLE: SampleHeaderAndRows = { nums: '4', }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:20:00.000Z', + __time: 1460366400000, user: 'Alice', followers: '10', spend: '0', @@ -154,8 +231,7 @@ export const CSV_SAMPLE: SampleHeaderAndRows = { nums: ['5', '6'], }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:21:00.000Z', + __time: 1460366460000, user: 'Bob', followers: '0', spend: '3', @@ -175,8 +251,7 @@ export const CSV_SAMPLE: SampleHeaderAndRows = { nums: ['7', '8'], }, parsed: { - __time: 0, - timestamp: '2016-04-11T09:22:00.000Z', + __time: 1460366520000, user: 'Alice', followers: '3', spend: '5.1', diff --git a/web-console/src/utils/sampler.spec.ts b/web-console/src/utils/sampler.spec.ts new file mode 100644 index 000000000000..8fdc505e46ec --- /dev/null +++ b/web-console/src/utils/sampler.spec.ts @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import type { SampleResponse } from './sampler'; +import { guessDimensionsFromSampleResponse } from './sampler'; + +describe('sampler', () => { + describe('getInferredDimensionsFromSampleResponse', () => { + const sampleResponse: SampleResponse = { + numRowsRead: 20, + numRowsIndexed: 20, + logicalDimensions: [ + { + type: 'long', + name: 'isRobot', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: false, + }, + { + type: 'string', + name: 'channel', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { + type: 'string', + name: 'flags', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { + type: 'long', + name: 'isUnpatrolled', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: false, + }, + ], + physicalDimensions: [ + { + type: 'json', + name: 'isRobot', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { + type: 'json', + name: 'channel', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { + type: 'json', + name: 'flags', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + { + type: 'json', + name: 'isUnpatrolled', + multiValueHandling: 'SORTED_ARRAY', + createBitmapIndex: true, + }, + ], + logicalSegmentSchema: [ + { name: '__time', type: 'LONG' }, + { name: 'isRobot', type: 'LONG' }, + { name: 'channel', type: 'STRING' }, + { name: 'flags', type: 'STRING' }, + { name: 'isUnpatrolled', type: 'LONG' }, + ], + data: [ + { + input: { + isRobot: true, + channel: '#sv.wikipedia', + timestamp: '2016-06-27T00:00:11.080Z', + flags: 'NB', + isUnpatrolled: false, + }, + parsed: { + __time: 1466985611080, + isRobot: true, + channel: '#sv.wikipedia', + flags: 'NB', + isUnpatrolled: false, + }, + }, + ], + }; + + it('works', () => { + expect(guessDimensionsFromSampleResponse(sampleResponse)).toMatchInlineSnapshot(` + Array [ + Object { + "name": "isRobot", + "type": "string", + }, + Object { + "createBitmapIndex": true, + "multiValueHandling": "SORTED_ARRAY", + "name": "channel", + "type": "string", + }, + Object { + "createBitmapIndex": true, + "multiValueHandling": "SORTED_ARRAY", + "name": "flags", + "type": "string", + }, + Object { + "name": "isUnpatrolled", + "type": "string", + }, + ] + `); + }); + }); +}); diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts index 1860d4fa2023..88bf7cca0535 100644 --- a/web-console/src/utils/sampler.ts +++ b/web-console/src/utils/sampler.ts @@ -20,6 +20,7 @@ import { dedupe } from 'druid-query-toolkit'; import * as JSONBig from 'json-bigint-native'; import type { + DimensionSpec, DimensionsSpec, IngestionSpec, IngestionType, @@ -32,6 +33,7 @@ import type { } from '../druid-models'; import { getDimensionNamesFromTransforms, + getDimensionSpecName, getSpecType, getTimestampSchema, isDruidSource, @@ -43,10 +45,9 @@ import { import { Api } from '../singletons'; import { getDruidErrorMessage, queryDruidRune } from './druid-query'; -import { arrangeWithPrefixSuffix, EMPTY_ARRAY, filterMap } from './general'; +import { EMPTY_ARRAY, filterMap } from './general'; import { deepGet, deepSet } from './object-change'; -const SAMPLER_URL = `/druid/indexer/v1/sampler`; const BASE_SAMPLER_CONFIG: SamplerConfig = { numRows: 500, timeoutMs: 15000, @@ -63,6 +64,38 @@ export interface SamplerConfig { export interface SampleResponse { data: SampleEntry[]; + logicalSegmentSchema: { name: string; type: string }[]; + logicalDimensions: DimensionSpec[]; + physicalDimensions: DimensionSpec[]; + numRowsIndexed: number; + numRowsRead: number; +} + +export function getHeaderNamesFromSampleResponse( + sampleResponse: SampleResponse, + ignoreTimeColumn = false, +) { + return filterMap(sampleResponse.logicalSegmentSchema, s => + ignoreTimeColumn && s.name === '__time' ? undefined : s.name, + ); +} + +export function guessDimensionsFromSampleResponse(sampleResponse: SampleResponse): DimensionSpec[] { + const { logicalDimensions, physicalDimensions, data } = sampleResponse; + return logicalDimensions.map(d => { + // Boolean column are currently reported as "long" so let's turn them into "string" + if ( + d.type === 'long' && + physicalDimensions.find(_ => _.name === d.name)?.type === 'json' && + typeof data[0]?.input?.[d.name] === 'boolean' + ) { + return { + name: d.name, + type: 'string', + }; + } + return d; + }); } export type CacheRows = Record[]; @@ -81,11 +114,6 @@ export interface SampleEntry { error?: string; } -export interface SampleHeaderAndRows { - header: string[]; - rows: SampleEntry[]; -} - export interface ExampleManifest { name: string; description: string; @@ -126,46 +154,6 @@ export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) { return sampleSpec; } -export interface HeaderFromSampleResponseOptions { - sampleResponse: SampleResponse; - ignoreTimeColumn?: boolean; - columnOrder?: string[]; - suffixColumnOrder?: string[]; - useInput?: boolean; -} - -export function headerFromSampleResponse(options: HeaderFromSampleResponseOptions): string[] { - const { sampleResponse, ignoreTimeColumn, columnOrder, suffixColumnOrder, useInput } = options; - - const key = useInput ? 'input' : 'parsed'; - let columns = arrangeWithPrefixSuffix( - dedupe(sampleResponse.data.flatMap(s => (s[key] ? Object.keys(s[key]!) : []))), - columnOrder || [TIME_COLUMN], - suffixColumnOrder || [], - ); - - if (ignoreTimeColumn) { - columns = columns.filter(c => c !== TIME_COLUMN); - } - - return columns; -} - -export interface HeaderAndRowsFromSampleResponseOptions extends HeaderFromSampleResponseOptions { - parsedOnly?: boolean; -} - -export function headerAndRowsFromSampleResponse( - options: HeaderAndRowsFromSampleResponseOptions, -): SampleHeaderAndRows { - const { sampleResponse, parsedOnly } = options; - - return { - header: headerFromSampleResponse(options), - rows: parsedOnly ? sampleResponse.data.filter(d => d.parsed) : sampleResponse.data, - }; -} - export async function getProxyOverlordModules(): Promise { let statusResp: any; try { @@ -185,7 +173,7 @@ export async function postToSampler( let sampleResp: any; try { - sampleResp = await Api.instance.post(`${SAMPLER_URL}?for=${forStr}`, sampleSpec); + sampleResp = await Api.instance.post(`/druid/indexer/v1/sampler?for=${forStr}`, sampleSpec); } catch (e) { throw new Error(getDruidErrorMessage(e)); } @@ -332,7 +320,9 @@ export async function sampleForParser( dataSchema: { dataSource: 'sample', timestampSpec: reingestMode ? REINDEX_TIMESTAMP_SPEC : PLACEHOLDER_TIMESTAMP_SPEC, - dimensionsSpec: {}, + dimensionsSpec: { + useSchemaDiscovery: true, + }, granularitySpec: { rollup: false, }, @@ -359,7 +349,9 @@ export async function sampleForTimestamp( ioConfig: deepGet(spec, 'spec.ioConfig'), dataSchema: { dataSource: 'sample', - dimensionsSpec: {}, + dimensionsSpec: { + useSchemaDiscovery: true, + }, timestampSpec: timestampSchema === 'column' ? PLACEHOLDER_TIMESTAMP_SPEC : timestampSpec, granularitySpec: { rollup: false, @@ -380,7 +372,7 @@ export async function sampleForTimestamp( const transforms: Transform[] = deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || EMPTY_ARRAY; - // If we are trying to parts a column then get a bit fancy: + // If we are trying to parse a column then get a bit fancy: // Query the same sample again (same cache key) const sampleSpec: SampleSpec = { type: samplerType, @@ -388,7 +380,9 @@ export async function sampleForTimestamp( ioConfig: deepGet(spec, 'spec.ioConfig'), dataSchema: { dataSource: 'sample', - dimensionsSpec: {}, + dimensionsSpec: { + useSchemaDiscovery: true, + }, timestampSpec, transformSpec: { transforms: transforms.filter(transform => transform.name === TIME_COLUMN), @@ -430,8 +424,8 @@ export async function sampleForTransform( const timestampSpec: TimestampSpec = deepGet(spec, 'spec.dataSchema.timestampSpec'); const transforms: Transform[] = deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || []; - // Extra step to simulate auto detecting dimension with transforms - let specialDimensionSpec: DimensionsSpec = {}; + // Extra step to simulate auto-detecting dimension with transforms + let specialDimensionSpec: DimensionsSpec = { useSchemaDiscovery: true }; if (transforms && transforms.length) { const sampleSpecHack: SampleSpec = { type: samplerType, @@ -440,7 +434,9 @@ export async function sampleForTransform( dataSchema: { dataSource: 'sample', timestampSpec, - dimensionsSpec: {}, + dimensionsSpec: { + useSchemaDiscovery: true, + }, granularitySpec: { rollup: false, }, @@ -458,10 +454,10 @@ export async function sampleForTransform( specialDimensionSpec, 'dimensions', dedupe( - headerFromSampleResponse({ - sampleResponse: sampleResponseHack, - ignoreTimeColumn: true, - }).concat(getDimensionNamesFromTransforms(transforms)), + ( + guessDimensionsFromSampleResponse(sampleResponseHack) as (DimensionSpec | string)[] + ).concat(getDimensionNamesFromTransforms(transforms)), + getDimensionSpecName, ), ); } @@ -497,8 +493,8 @@ export async function sampleForFilter( const transforms: Transform[] = deepGet(spec, 'spec.dataSchema.transformSpec.transforms') || []; const filter: any = deepGet(spec, 'spec.dataSchema.transformSpec.filter'); - // Extra step to simulate auto detecting dimension with transforms - let specialDimensionSpec: DimensionsSpec = {}; + // Extra step to simulate auto-detecting dimension with transforms + let specialDimensionSpec: DimensionsSpec = { useSchemaDiscovery: true }; if (transforms && transforms.length) { const sampleSpecHack: SampleSpec = { type: samplerType, @@ -507,7 +503,9 @@ export async function sampleForFilter( dataSchema: { dataSource: 'sample', timestampSpec, - dimensionsSpec: {}, + dimensionsSpec: { + useSchemaDiscovery: true, + }, granularitySpec: { rollup: false, }, @@ -525,10 +523,9 @@ export async function sampleForFilter( specialDimensionSpec, 'dimensions', dedupe( - headerFromSampleResponse({ - sampleResponse: sampleResponseHack, - ignoreTimeColumn: true, - }).concat(getDimensionNamesFromTransforms(transforms)), + getHeaderNamesFromSampleResponse(sampleResponseHack, true).concat( + getDimensionNamesFromTransforms(transforms), + ), ), ); } diff --git a/web-console/src/utils/utils.spec.ts b/web-console/src/utils/utils.spec.ts index 2d5c1909dc3f..87f8b121a66d 100644 --- a/web-console/src/utils/utils.spec.ts +++ b/web-console/src/utils/utils.spec.ts @@ -18,7 +18,7 @@ import type { IngestionSpec } from '../druid-models'; -import { applyCache, headerFromSampleResponse } from './sampler'; +import { applyCache } from './sampler'; describe('utils', () => { const ingestionSpec: IngestionSpec = { @@ -52,20 +52,6 @@ describe('utils', () => { }, }; - // const cacheRows: CacheRows = [{ make: 'Honda', model: 'Civic' }, { make: 'BMW', model: 'M3' }]; - - it('spec-utils headerFromSampleResponse', () => { - expect( - headerFromSampleResponse({ - sampleResponse: { data: [{ input: { a: 1 }, parsed: { a: 1 } }] }, - }), - ).toMatchInlineSnapshot(` - Array [ - "a", - ] - `); - }); - it('spec-utils applyCache', () => { expect( applyCache( diff --git a/web-console/src/views/load-data-view/filter-table/__snapshots__/filter-table.spec.tsx.snap b/web-console/src/views/load-data-view/filter-table/__snapshots__/filter-table.spec.tsx.snap index 29a8d96f8406..804196839664 100644 --- a/web-console/src/views/load-data-view/filter-table/__snapshots__/filter-table.spec.tsx.snap +++ b/web-console/src/views/load-data-view/filter-table/__snapshots__/filter-table.spec.tsx.snap @@ -10,7 +10,7 @@ exports[`FilterTable matches snapshot 1`] = ` >
- c1 + __time +
+
+ +   +
+
+ +
+
+
+
+
+
+ user +
+
+ +   +
+
+
+
+
+
+
+
+
+ followers +
+
+ +   +
+
+
+
+
+
+
+
+
+ spend +
+
+ +   +
+
+
+
+
+
+
+
+
+ id +
+
+ +   +
+
+
+
+
+
+
+
+
+ tags +
+
+ +   +
+
+
+
+
+
+
+
+
+ nums
-
- hello -
+
+ 2016-04-11T09:20:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 10 +
+
+
+
+ 0 +
+
+
+
+ 12232323 +
+
+
+
+ null +
+
+
+
+ [4] +
+
+
+
+
+
+
+
+ 2016-04-11T09:21:00.000Z +
+
+
+
+ Bob +
+
+
+
+ 0 +
+
+
+
+ 3 +
+
+
+
+ 45345634 +
+
+
+
+ [a] +
+
+
+
+ [5, 6] +
+
+
+
+
+
+
+
+ 2016-04-11T09:22:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 3 +
+
+
+
+ 5.1 +
+
+
+
+ 73534533 +
+
+
+
+ [a, b] +
+
+
+
+ [7, 8] +
+
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{ it('matches snapshot', () => { - const sampleData = { - header: ['c1'], - rows: [ - { - input: { c1: 'hello' }, - parsed: { c1: 'hello' }, - }, - ], - }; - const filterTable = ( | undefined, ): string | undefined { if (!selectedFilter) return; const selectedFilterName = selectedFilter.dimension; - if (!sampleData.header.includes(selectedFilterName)) return; + if (!getHeaderNamesFromSampleResponse(sampleResponse).includes(selectedFilterName)) return; return selectedFilterName; } export interface FilterTableProps { - sampleData: SampleHeaderAndRows; + sampleResponse: SampleResponse; columnFilter: string; dimensionFilters: DruidFilter[]; selectedFilterName: string | undefined; @@ -53,17 +54,18 @@ export interface FilterTableProps { } export const FilterTable = React.memo(function FilterTable(props: FilterTableProps) { - const { sampleData, columnFilter, dimensionFilters, selectedFilterName, onFilterSelect } = props; + const { sampleResponse, columnFilter, dimensionFilters, selectedFilterName, onFilterSelect } = + props; return ( STANDARD_TABLE_PAGE_SIZE} - columns={filterMap(sampleData.header, (columnName, i) => { + showPagination={sampleResponse.data.length > STANDARD_TABLE_PAGE_SIZE} + columns={filterMap(getHeaderNamesFromSampleResponse(sampleResponse), (columnName, i) => { if (!caseInsensitiveContains(columnName, columnFilter)) return; const timestamp = columnName === '__time'; const filterIndex = dimensionFilters.findIndex(f => getFilterDimension(f) === columnName); diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index 5c08442b689f..499b4f4dd382 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -149,15 +149,15 @@ import type { CacheRows, ExampleManifest, SampleEntry, - SampleHeaderAndRows, SampleResponse, SampleResponseWithExtraInfo, SampleStrategy, } from '../../utils/sampler'; import { getCacheRowsFromSampleResponse, + getHeaderNamesFromSampleResponse, getProxyOverlordModules, - headerAndRowsFromSampleResponse, + guessDimensionsFromSampleResponse, sampleForConnect, sampleForExampleManifests, sampleForFilter, @@ -237,19 +237,22 @@ function formatSampleEntries(sampleEntries: SampleEntry[], isDruidSource: boolea } } -function getTimestampSpec(headerAndRows: SampleHeaderAndRows | null): TimestampSpec { - if (!headerAndRows) return CONSTANT_TIMESTAMP_SPEC; +function getTimestampSpec(sampleResponse: SampleResponse | null): TimestampSpec { + if (!sampleResponse) return CONSTANT_TIMESTAMP_SPEC; - const timestampSpecs = filterMap(headerAndRows.header, sampleHeader => { - const possibleFormat = possibleDruidFormatForValues( - filterMap(headerAndRows.rows, d => (d.parsed ? d.parsed[sampleHeader] : undefined)), - ); - if (!possibleFormat) return; - return { - column: sampleHeader, - format: possibleFormat, - }; - }); + const timestampSpecs = filterMap( + getHeaderNamesFromSampleResponse(sampleResponse), + sampleHeader => { + const possibleFormat = possibleDruidFormatForValues( + filterMap(sampleResponse.data, d => (d.parsed ? d.parsed[sampleHeader] : undefined)), + ); + if (!possibleFormat) return; + return { + column: sampleHeader, + format: possibleFormat, + }; + }, + ); return ( timestampSpecs.find(ts => /time/i.test(ts.column)) || // Use a suggestion that has time in the name if possible @@ -354,30 +357,31 @@ export interface LoadDataViewState { inputQueryState: QueryState; // for parser - parserQueryState: QueryState; + parserQueryState: QueryState; // for flatten selectedFlattenField?: SelectedIndex; // for timestamp timestampQueryState: QueryState<{ - headerAndRows: SampleHeaderAndRows; + sampleResponse: SampleResponse; spec: Partial; }>; // for transform - transformQueryState: QueryState; + transformQueryState: QueryState; selectedTransform?: SelectedIndex; // for filter - filterQueryState: QueryState; + filterQueryState: QueryState; selectedFilter?: SelectedIndex; // for schema schemaQueryState: QueryState<{ - headerAndRows: SampleHeaderAndRows; + sampleResponse: SampleResponse; dimensions: (string | DimensionSpec)[] | undefined; metricsSpec: MetricSpec[] | undefined; + definedDimensions: boolean; }>; selectedAutoDimension?: string; selectedDimensionSpec?: SelectedIndex; @@ -1427,10 +1431,7 @@ export class LoadDataView extends React.PureComponent ({ cacheRows: getCacheRowsFromSampleResponse(sampleResponse), parserQueryState: new QueryState({ - data: headerAndRowsFromSampleResponse({ - sampleResponse, - ignoreTimeColumn: true, - }), + data: sampleResponse, lastData: parserQueryState.getSomeData(), }), })); @@ -1473,7 +1474,7 @@ export class LoadDataView extends React.PureComponent {data && ( r.input), + filterMap(parserQueryState.data.data, r => r.input), 'ignore-arrays', ); } @@ -1668,9 +1669,7 @@ export class LoadDataView extends React.PureComponent ({ timestampQueryState: new QueryState({ data: { - headerAndRows: headerAndRowsFromSampleResponse({ - sampleResponse, - }), + sampleResponse, spec, }, lastData: timestampQueryState.getSomeData(), @@ -1717,7 +1716,7 @@ export class LoadDataView extends React.PureComponent ({ transformQueryState: new QueryState({ - data: headerAndRowsFromSampleResponse({ - sampleResponse, - }), + data: sampleResponse, lastData: transformQueryState.getSomeData(), }), })); @@ -1876,7 +1873,8 @@ export class LoadDataView extends React.PureComponentPlease fill in the previous steps; } else { - const data = transformQueryState.getSomeData(); + const sampleResponse = transformQueryState.getSomeData(); + mainFill = (
@@ -1892,13 +1890,16 @@ export class LoadDataView extends React.PureComponent
- {data && ( + {sampleResponse && ( )} @@ -2045,10 +2046,7 @@ export class LoadDataView extends React.PureComponent ({ filterQueryState: new QueryState({ - data: headerAndRowsFromSampleResponse({ - sampleResponse, - parsedOnly: true, - }), + data: sampleResponse, lastData: filterQueryState.getSomeData(), }), })); @@ -2067,15 +2065,10 @@ export class LoadDataView extends React.PureComponent ({ // cacheRows: sampleResponseNoFilter.cacheKey, filterQueryState: new QueryState({ - data: deepSet(headerAndRowsNoFilter, 'rows', []), + data: sampleResponseNoFilter, lastData: filterQueryState.getSomeData(), }), })); @@ -2095,7 +2088,8 @@ export class LoadDataView extends React.PureComponentPlease enter more details for the previous steps; } else { - const data = filterQueryState.getSomeData(); + const filterQuery = filterQueryState.getSomeData(); + mainFill = (
@@ -2105,12 +2099,12 @@ export class LoadDataView extends React.PureComponent
- {data && ( + {filterQuery && ( )} @@ -2243,15 +2237,10 @@ export class LoadDataView extends React.PureComponent ({ schemaQueryState: new QueryState({ data: { - headerAndRows: headerAndRowsFromSampleResponse({ - sampleResponse, - columnOrder: [TIME_COLUMN].concat( - dimensions ? dimensions.map(getDimensionSpecName) : [], - ), - suffixColumnOrder: metricsSpec ? metricsSpec.map(getMetricSpecName) : undefined, - }), - dimensions, + sampleResponse, + dimensions: dimensions || guessDimensionsFromSampleResponse(sampleResponse), metricsSpec, + definedDimensions: Boolean(dimensions), }, lastData: schemaQueryState.getSomeData(), }), @@ -2569,13 +2558,7 @@ export class LoadDataView extends React.PureComponent { const sampleResponse = await sampleForTransform(spec, cacheRows); this.updateSpec( - updateSchemaWithSample( - spec, - headerAndRowsFromSampleResponse({ sampleResponse }), - getDimensionMode(spec), - newRollup, - true, - ), + updateSchemaWithSample(spec, sampleResponse, getDimensionMode(spec), newRollup, true), ); }} confirmButtonText={`Yes - ${newRollup ? 'enable' : 'disable'} rollup`} @@ -2600,12 +2583,7 @@ export class LoadDataView extends React.PureComponent { const sampleResponse = await sampleForTransform(spec, cacheRows); this.updateSpec( - updateSchemaWithSample( - spec, - headerAndRowsFromSampleResponse({ sampleResponse }), - newDimensionMode, - getRollup(spec), - ), + updateSchemaWithSample(spec, sampleResponse, newDimensionMode, getRollup(spec)), ); }} confirmButtonText={`Yes - ${autoDetect ? 'auto detect' : 'explicitly set'} columns`} diff --git a/web-console/src/views/load-data-view/parse-data-table/__snapshots__/parse-data-table.spec.tsx.snap b/web-console/src/views/load-data-view/parse-data-table/__snapshots__/parse-data-table.spec.tsx.snap index ec7f03e32f68..ccc726a8562a 100644 --- a/web-console/src/views/load-data-view/parse-data-table/__snapshots__/parse-data-table.spec.tsx.snap +++ b/web-console/src/views/load-data-view/parse-data-table/__snapshots__/parse-data-table.spec.tsx.snap @@ -10,7 +10,7 @@ exports[`ParseDataTable matches snapshot 1`] = ` >
- c1 + user
+
+
+
+
+ followers +
+
+ +   +
+
+
+
+
+
+
+
+
+ spend +
+
+ +   +
+
+
+
+
+
+
+
+
+ id +
+
+ +   +
+
+
+
+
+
+
+
+
+ tags +
+
+ +   +
+
+
+
+
+
+
+
+
+ nums +
+
+ +   +
+
+
+
+
+
+
+
+
+
+
+
+ • +
+
+
+
+ Alice +
+
+
+
+ 10 +
+
+
+
+ 0 +
+
+
+
+ 12232323 +
+
+
+
+ null +
+
+
+
+ [4] +
+
+
+
+
+
+
+
+ • +
+
+
+
+ Bob +
+
+
+
+ 0 +
+
+
+
+ 3 +
+
+
+
+ 45345634 +
+
+
+
+ [a] +
+
+
+
+ [5, 6] +
+
+
+
+
+
+
+
+ • +
+
+
+
+ Alice +
+
+
+
+ 3 +
+
+
+
+ 5.1 +
+
+
+
+ 73534533 +
+
+
+
+ [a, b] +
+
+
+
+ [7, 8] +
+
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
-
-
-
- • -
+ +   +
-
- hello -
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   +
@@ -119,6 +2537,51 @@ exports[`ParseDataTable matches snapshot 1`] = `  
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
  @@ -175,20 +2783,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -231,20 +2829,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -259,20 +2847,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -315,20 +2902,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -343,20 +2920,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -399,20 +2975,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -427,20 +2993,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -483,20 +3048,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -511,20 +3066,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -567,20 +3121,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -595,20 +3139,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -651,20 +3194,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -679,20 +3212,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -735,20 +3267,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -763,20 +3285,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -819,20 +3340,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -847,20 +3367,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -903,20 +3413,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -931,20 +3431,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -987,20 +3486,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1015,20 +3504,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -1071,20 +3559,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1099,20 +3577,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -1155,20 +3632,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1183,20 +3650,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -1239,20 +3705,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1267,20 +3723,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -1323,20 +3778,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1351,20 +3796,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  @@ -1407,20 +3851,10 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
  @@ -1435,20 +3869,19 @@ exports[`ParseDataTable matches snapshot 1`] = `  
-
-
-
-
+ +   + +
+
  diff --git a/web-console/src/views/load-data-view/parse-data-table/parse-data-table.spec.tsx b/web-console/src/views/load-data-view/parse-data-table/parse-data-table.spec.tsx index c1ee01bff8fe..448a23dffad6 100644 --- a/web-console/src/views/load-data-view/parse-data-table/parse-data-table.spec.tsx +++ b/web-console/src/views/load-data-view/parse-data-table/parse-data-table.spec.tsx @@ -19,23 +19,15 @@ import { render } from '@testing-library/react'; import React from 'react'; +import { JSON_SAMPLE } from '../../../utils/sampler.mock'; + import { ParseDataTable } from './parse-data-table'; describe('ParseDataTable', () => { it('matches snapshot', () => { - const sampleData = { - header: ['c1'], - rows: [ - { - input: { c1: 'hello' }, - parsed: { c1: 'hello' }, - }, - ], - }; - const parseDataTable = ( STANDARD_TABLE_PAGE_SIZE} - columns={filterMap(sampleData.header, (columnName, i) => { - if (!caseInsensitiveContains(columnName, columnFilter)) return; - const flattenFieldIndex = flattenFields.findIndex(f => f.name === columnName); - if (flattenFieldIndex === -1 && flattenedColumnsOnly) return; - const flattenField = flattenFields[flattenFieldIndex]; - return { - Header: ( -
{ - if (!flattenField) return; - onFlattenFieldSelect(flattenField, flattenFieldIndex); - }} - > -
{columnName}
-
- {flattenField ? `${flattenField.type}: ${flattenField.expr}` : ''}  + showPagination={sampleResponse.data.length > STANDARD_TABLE_PAGE_SIZE} + columns={filterMap( + getHeaderNamesFromSampleResponse(sampleResponse, true), + (columnName, i) => { + if (!caseInsensitiveContains(columnName, columnFilter)) return; + const flattenFieldIndex = flattenFields.findIndex(f => f.name === columnName); + if (flattenFieldIndex === -1 && flattenedColumnsOnly) return; + const flattenField = flattenFields[flattenFieldIndex]; + return { + Header: ( +
{ + if (!flattenField) return; + onFlattenFieldSelect(flattenField, flattenFieldIndex); + }} + > +
{columnName}
+
+ {flattenField ? `${flattenField.type}: ${flattenField.expr}` : ''}  +
-
- ), - id: String(i), - accessor: (row: SampleEntry) => (row[key] ? row[key]![columnName] : null), - width: 140, - Cell: function ParseDataTableCell(row: RowRenderProps) { - if (row.original.unparseable) { - return ; - } - return ; - }, - headerClassName: classNames({ - flattened: flattenField, - }), - }; - })} + ), + id: String(i), + accessor: (row: SampleEntry) => (row[key] ? row[key]![columnName] : null), + width: 140, + Cell: function ParseDataTableCell(row: RowRenderProps) { + if (row.original.unparseable) { + return ; + } + return ; + }, + headerClassName: classNames({ + flattened: flattenField, + }), + }; + }, + )} SubComponent={rowInfo => { const { input, error } = rowInfo.original; const inputStr = JSONBig.stringify(input, undefined, 2); diff --git a/web-console/src/views/load-data-view/parse-time-table/__snapshots__/parse-time-table.spec.tsx.snap b/web-console/src/views/load-data-view/parse-time-table/__snapshots__/parse-time-table.spec.tsx.snap index 046e4a4a46e9..b6cab0e1c850 100644 --- a/web-console/src/views/load-data-view/parse-time-table/__snapshots__/parse-time-table.spec.tsx.snap +++ b/web-console/src/views/load-data-view/parse-time-table/__snapshots__/parse-time-table.spec.tsx.snap @@ -10,12 +10,183 @@ exports[`ParseTimeTable matches snapshot 1`] = ` >
+
+
+
+
+ __time +
+
+ Constant: 1970-01-01T00:00:00Z +   +
+
+
+
+
+
+
+
+ user +
+
+ +   +
+
+
+
+
+
+
+
+
+ followers +
+
+ +   +
+
+
+
+
+
+
+
+
+ spend +
+
+ +   +
+
+
+
+
+
+
+
+
+ id +
+
+ +   +
+
+
+
+
+
+
+
+
+ tags +
+
+ +   +
+
+
+
+
- c1 + nums
+
+
+ 2016-04-11T09:20:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 10 +
+
+
+
+ 0 +
+
+
+
+ 12232323 +
+
+
+
+ null +
+
+
+
+ [4] +
+
+
+
+
+
+
+
+ 2016-04-11T09:21:00.000Z +
+
+
+
+ Bob +
+
+
+
+ 0 +
+
+
+
+ 3 +
+
+
+
+ 45345634 +
+
+
+
+ [a] +
+
+
+
+ [5, 6] +
+
+
+
+
+
+
+
+ 2016-04-11T09:22:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 3 +
+
+
+
+ 5.1 +
+
+
+
+ 73534533 +
+
+
+
+ [a, b] +
+
+
+
+ [7, 8] +
+
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
- hello -
+ +   +
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{ it('matches snapshot', () => { - const sampleData = { - header: ['c1'], - rows: [ - { - input: { c1: 'hello' }, - parsed: { c1: 'hello' }, - }, - ], - }; - const spec = deepSet( {} as IngestionSpec, 'spec.dataSchema.timestampSpec', @@ -46,7 +37,7 @@ describe('ParseTimeTable', () => { const parseTimeTable = ( ; }; columnFilter: string; @@ -67,76 +73,73 @@ export const ParseTimeTable = React.memo(function ParseTimeTable(props: ParseTim selectedColumnName, onTimestampColumnSelect, } = props; - const { headerAndRows, spec } = sampleBundle; + const { sampleResponse, spec } = sampleBundle; const timestampSpecColumn = getTimestampSpecColumnFromSpec(spec); const timestampDetail = getTimestampDetailFromSpec(spec); return ( STANDARD_TABLE_PAGE_SIZE} - columns={filterMap( - headerAndRows.header.length ? headerAndRows.header : ['__error__'], - (columnName, i) => { - const isTimestamp = columnName === '__time'; - if (!isTimestamp && !caseInsensitiveContains(columnName, columnFilter)) return; - const used = timestampSpecColumn === columnName; - const possibleFormat = isTimestamp - ? null - : possibleDruidFormatForValues( - filterMap(headerAndRows.rows, d => (d.parsed ? d.parsed[columnName] : undefined)), - ); - if (possibleTimestampColumnsOnly && !isTimestamp && !possibleFormat) return; + showPagination={sampleResponse.data.length > STANDARD_TABLE_PAGE_SIZE} + columns={filterMap(getHeaderNamesFromSampleResponse(sampleResponse), (columnName, i) => { + const isTimestamp = columnName === '__time'; + if (!isTimestamp && !caseInsensitiveContains(columnName, columnFilter)) return; + const used = timestampSpecColumn === columnName; + const possibleFormat = isTimestamp + ? null + : possibleDruidFormatForValues( + filterMap(sampleResponse.data, d => (d.parsed ? d.parsed[columnName] : undefined)), + ); + if (possibleTimestampColumnsOnly && !isTimestamp && !possibleFormat) return; - const columnClassName = classNames({ - timestamp: isTimestamp, - used, - selected: selectedColumnName === columnName, - }); - return { - Header: ( -
{ - onTimestampColumnSelect({ - column: columnName, - format: possibleFormat || '!!! Could not auto detect a format !!!', - }); - } - } - > -
{columnName}
-
- {isTimestamp ? timestampDetail : possibleFormat || ''} -   -
-
- ), - headerClassName: columnClassName, - className: columnClassName, - id: String(i), - accessor: (row: SampleEntry) => (row.parsed ? row.parsed[columnName] : null), - Cell: function ParseTimeTableCell(row: RowRenderProps) { - if (columnName === '__error__') { - return ; - } - if (row.original.unparseable) { - return ; + const columnClassName = classNames({ + timestamp: isTimestamp, + used, + selected: selectedColumnName === columnName, + }); + return { + Header: ( +
{ + onTimestampColumnSelect({ + column: columnName, + format: possibleFormat || '!!! Could not auto detect a format !!!', + }); + } } - return ; - }, - width: isTimestamp ? 200 : 140, - resizable: !isTimestamp, - }; - }, - )} + > +
{columnName}
+
+ {isTimestamp ? timestampDetail : possibleFormat || ''} +   +
+
+ ), + headerClassName: columnClassName, + className: columnClassName, + id: String(i), + accessor: (row: SampleEntry) => (row.parsed ? row.parsed[columnName] : null), + Cell: function ParseTimeTableCell(row: RowRenderProps) { + if (columnName === '__error__') { + return ; + } + if (row.original.unparseable) { + return ; + } + return ; + }, + width: isTimestamp ? 200 : 140, + resizable: !isTimestamp, + }; + })} /> ); }); diff --git a/web-console/src/views/load-data-view/schema-table/__snapshots__/schema-table.spec.tsx.snap b/web-console/src/views/load-data-view/schema-table/__snapshots__/schema-table.spec.tsx.snap index 5e0d3294c93f..a7c677ae0d01 100644 --- a/web-console/src/views/load-data-view/schema-table/__snapshots__/schema-table.spec.tsx.snap +++ b/web-console/src/views/load-data-view/schema-table/__snapshots__/schema-table.spec.tsx.snap @@ -10,12 +10,186 @@ exports[`SchemaTable matches snapshot 1`] = ` >
+
+
+
+
+ __time +
+
+ long (time column) +   +
+
+
+
+
+
+
+
+
+ user +
+
+ (auto) +   +
+
+
+
+
+
+
+
+
+ followers +
+
+ (auto) +   +
+
+
+
+
+
+
+
+
+ spend +
+
+ (auto) +   +
+
+
+
+
+
+
+
+
+ id +
+
+ (auto) +   +
+
+
+
+
+
+
+
+
+ tags +
+
+ (auto) +   +
+
+
+
+
- c1 + nums
- string (auto) + (auto)  
@@ -49,7 +223,7 @@ exports[`SchemaTable matches snapshot 1`] = `
+
+
+ 2016-04-11T09:20:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 10 +
+
+
+
+ 0 +
+
+
+
+ 12232323 +
+
+
+
+ null +
+
+
+
+ [4] +
+
+
+
+
+
+
+
+ 2016-04-11T09:21:00.000Z +
+
+
+
+ Bob +
+
+
+
+ 0 +
+
+
+
+ 3 +
+
+
+
+ 45345634 +
+
+
+
+ [a] +
+
+
+
+ [5, 6] +
+
+
+
+
+
+
+
+ 2016-04-11T09:22:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 3 +
+
+
+
+ 5.1 +
+
+
+
+ 73534533 +
+
+
+
+ [a, b] +
+
+
+
+ [7, 8] +
+
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
- hello -
+ +   +
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{ it('matches snapshot', () => { - const sampleData = { - header: ['c1'], - rows: [ - { - input: { c1: 'hello' }, - parsed: { c1: 'hello' }, - }, - ], - }; - const schemaTable = ( STANDARD_TABLE_PAGE_SIZE} - columns={filterMap(headerAndRows.header, (columnName, i) => { + showPagination={sampleResponse.data.length > STANDARD_TABLE_PAGE_SIZE} + columns={filterMap(getHeaderNamesFromSampleResponse(sampleResponse), (columnName, i) => { if (!caseInsensitiveContains(columnName, columnFilter)) return; const metricSpecIndex = metricsSpec @@ -130,7 +132,7 @@ export const SchemaTable = React.memo(function SchemaTable(props: SchemaTablePro onClick={() => { if (isTimestamp) return; - if (dimensionSpec) { + if (definedDimensions && dimensionSpec) { onDimensionSelect(inflateDimensionSpec(dimensionSpec), dimensionSpecIndex); } else { onAutoDimensionSelect(columnName); @@ -139,7 +141,7 @@ export const SchemaTable = React.memo(function SchemaTable(props: SchemaTablePro >
{columnName}
- {isTimestamp ? 'long (time column)' : dimensionSpecType || 'string (auto)'}  + {isTimestamp ? 'long (time column)' : dimensionSpecType || '(auto)'} 
), diff --git a/web-console/src/views/load-data-view/transform-table/__snapshots__/transform-table.spec.tsx.snap b/web-console/src/views/load-data-view/transform-table/__snapshots__/transform-table.spec.tsx.snap index 3ed1b238f2ba..5a6165949ff3 100644 --- a/web-console/src/views/load-data-view/transform-table/__snapshots__/transform-table.spec.tsx.snap +++ b/web-console/src/views/load-data-view/transform-table/__snapshots__/transform-table.spec.tsx.snap @@ -10,7 +10,7 @@ exports[`TransformTable matches snapshot 1`] = ` >
- c1 + __time +
+
+ +   +
+
+
+
+
+
+
+
+
+ user +
+
+ +   +
+
+
+
+
+
+
+
+
+ followers +
+
+ +   +
+
+
+
+
+
+
+
+
+ spend +
+
+ +   +
+
+
+
+
+
+
+
+
+ id +
+
+ +   +
+
+
+
+
+
+
+
+
+ tags +
+
+ +   +
+
+
+
+
+
+
+
+
+ nums
-
- hello -
+
+ 2016-04-11T09:20:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 10 +
+
+
+
+ 0 +
+
+
+
+ 12232323 +
+
+
+
+ null +
+
+
+
+ [4] +
+
+
+
+
+
+
+
+ 2016-04-11T09:21:00.000Z +
+
+
+
+ Bob +
+
+
+
+ 0 +
+
+
+
+ 3 +
+
+
+
+ 45345634 +
+
+
+
+ [a] +
+
+
+
+ [5, 6] +
+
+
+
+
+
+
+
+ 2016-04-11T09:22:00.000Z +
+
+
+
+ Alice +
+
+
+
+ 3 +
+
+
+
+ 5.1 +
+
+
+
+ 73534533 +
+
+
+
+ [a, b] +
+
+
+
+ [7, 8] +
+
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+
+
+
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
+
+ +   + +
+
+ +   + +
+
+ +   + +
+
+ +   + +
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{ it('matches snapshot', () => { - const sampleData = { - header: ['c1'], - rows: [ - { - input: { c1: 'hello' }, - parsed: { c1: 'hello' }, - }, - ], - }; - const transformTable = ( | undefined, ): string | undefined { if (!selectedTransform) return; const selectedTransformName = selectedTransform.name; - if (selectedTransformName && !sampleData.header.includes(selectedTransformName)) return; + if ( + selectedTransformName && + !getHeaderNamesFromSampleResponse(sampleResponse).includes(selectedTransformName) + ) { + return; + } return selectedTransformName; } export interface TransformTableProps { - sampleData: SampleHeaderAndRows; + sampleResponse: SampleResponse; columnFilter: string; transformedColumnsOnly: boolean; transforms: Transform[]; @@ -55,7 +61,7 @@ export interface TransformTableProps { export const TransformTable = React.memo(function TransformTable(props: TransformTableProps) { const { - sampleData, + sampleResponse, columnFilter, transformedColumnsOnly, transforms, @@ -66,12 +72,12 @@ export const TransformTable = React.memo(function TransformTable(props: Transfor return ( STANDARD_TABLE_PAGE_SIZE} - columns={filterMap(sampleData.header, (columnName, i) => { + showPagination={sampleResponse.data.length > STANDARD_TABLE_PAGE_SIZE} + columns={filterMap(getHeaderNamesFromSampleResponse(sampleResponse), (columnName, i) => { if (!caseInsensitiveContains(columnName, columnFilter)) return; const timestamp = columnName === '__time'; const transformIndex = transforms.findIndex(f => f.name === columnName); diff --git a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx index 0faf4a3dba57..aee24b937048 100644 --- a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx +++ b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx @@ -25,8 +25,8 @@ import React, { useState } from 'react'; import { AutoForm, CenterMessage, LearnMore, Loader } from '../../../components'; import type { InputFormat, InputSource } from '../../../druid-models'; import { - guessColumnTypeFromHeaderAndRows, - guessIsArrayFromHeaderAndRows, + guessColumnTypeFromSampleResponse, + guessIsArrayFromSampleResponse, INPUT_FORMAT_FIELDS, inputFormatOutputsNumericStrings, PLACEHOLDER_TIMESTAMP_SPEC, @@ -41,8 +41,8 @@ import { filterMap, timeFormatToSql, } from '../../../utils'; -import type { SampleHeaderAndRows, SampleSpec } from '../../../utils/sampler'; -import { headerAndRowsFromSampleResponse, postToSampler } from '../../../utils/sampler'; +import type { SampleResponse, SampleSpec } from '../../../utils/sampler'; +import { getHeaderNamesFromSampleResponse, postToSampler } from '../../../utils/sampler'; import { ParseDataTable } from '../../load-data-view/parse-data-table/parse-data-table'; import './input-format-step.scss'; @@ -80,7 +80,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF ); const [selectTimestamp, setSelectTimestamp] = useState(true); - const [previewState] = useQueryManager({ + const [previewState] = useQueryManager({ query: inputFormatToSample, processQuery: async (inputFormat: InputFormat) => { const sampleSpec: SampleSpec = { @@ -106,53 +106,50 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF }, }; - const sampleResponse = await postToSampler(sampleSpec, 'input-format-step'); - - return headerAndRowsFromSampleResponse({ - sampleResponse, - ignoreTimeColumn: true, - useInput: true, - }); + return await postToSampler(sampleSpec, 'input-format-step'); }, }); - const previewData = previewState.data; + const previewSampleResponse = previewState.data; let possibleTimeExpression: PossibleTimeExpression | undefined; - if (previewData) { - possibleTimeExpression = filterMap(previewData.header, column => { - const values = filterMap(previewData.rows, row => row.input?.[column]); - const possibleDruidFormat = possibleDruidFormatForValues(values); - if (!possibleDruidFormat) return; - - const formatSql = timeFormatToSql(possibleDruidFormat); - if (!formatSql) return; - - return { - column, - timeExpression: formatSql.fillPlaceholders([C(column)]), - }; - })[0]; + if (previewSampleResponse) { + possibleTimeExpression = filterMap( + getHeaderNamesFromSampleResponse(previewSampleResponse), + column => { + const values = filterMap(previewSampleResponse.data, d => d.input?.[column]); + const possibleDruidFormat = possibleDruidFormatForValues(values); + if (!possibleDruidFormat) return; + + const formatSql = timeFormatToSql(possibleDruidFormat); + if (!formatSql) return; + + return { + column, + timeExpression: formatSql.fillPlaceholders([C(column)]), + }; + }, + )[0]; } const inputFormatAndMore = - previewData && AutoForm.isValidModel(inputFormat, INPUT_FORMAT_FIELDS) + previewSampleResponse && AutoForm.isValidModel(inputFormat, INPUT_FORMAT_FIELDS) ? { inputFormat, - signature: previewData.header.map(name => + signature: getHeaderNamesFromSampleResponse(previewSampleResponse).map(name => SqlColumnDeclaration.create( name, SqlType.fromNativeType( - guessColumnTypeFromHeaderAndRows( - previewData, + guessColumnTypeFromSampleResponse( + previewSampleResponse, name, inputFormatOutputsNumericStrings(inputFormat), ), ), ), ), - isArrays: previewData.header.map(name => - guessIsArrayFromHeaderAndRows(previewData, name), + isArrays: getHeaderNamesFromSampleResponse(previewSampleResponse).map(name => + guessIsArrayFromSampleResponse(previewSampleResponse, name), ), timeExpression: selectTimestamp ? possibleTimeExpression?.timeExpression : undefined, } @@ -171,9 +168,9 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF {previewState.error && ( {`Error: ${previewState.getErrorMessage()}`} )} - {previewData && ( + {previewSampleResponse && ( Date: Mon, 3 Apr 2023 00:16:22 -0700 Subject: [PATCH 02/13] supprot kafka format --- .../extensions-core/kafka-ingestion.md | 9 +- docs/ingestion/data-formats.md | 4 +- .../ingestion-spec/ingestion-spec.spec.ts | 30 +- .../ingestion-spec/ingestion-spec.tsx | 69 ++- .../input-format/input-format.tsx | 509 +++++++++++++++++- web-console/src/utils/general.tsx | 7 + web-console/src/utils/sampler.ts | 28 +- .../views/load-data-view/load-data-view.tsx | 79 ++- .../input-format-step/input-format-step.tsx | 16 +- .../input-source-step/input-source-step.tsx | 6 +- 10 files changed, 658 insertions(+), 99 deletions(-) diff --git a/docs/development/extensions-core/kafka-ingestion.md b/docs/development/extensions-core/kafka-ingestion.md index 58636e7f16f1..855ed2a44eb1 100644 --- a/docs/development/extensions-core/kafka-ingestion.md +++ b/docs/development/extensions-core/kafka-ingestion.md @@ -38,6 +38,7 @@ This topic covers how to submit a supervisor spec to ingest event data, also kno - For a walk-through, see the [Loading from Apache Kafka](../../tutorials/tutorial-kafka.md) tutorial. ## Kafka support + The Kafka indexing service supports transactional topics introduced in Kafka 0.11.x by default. The consumer for Kafka indexing service is incompatible with older Kafka brokers. If you are using an older version, refer to the [Kafka upgrade guide](https://kafka.apache.org/documentation/#upgrade). Additionally, you can set `isolation.level` to `read_uncommitted` in `consumerProperties` if either: @@ -51,6 +52,7 @@ If your Kafka cluster enables consumer-group based ACLs, you can set `group.id` To use the Kafka indexing service, load the `druid-kafka-indexing-service` extension on both the Overlord and the MiddleManagers. See [Loading extensions](../extensions.md#loading-extensions) for instructions on how to configure extensions. ## Define a supervisor spec + Similar to the ingestion spec for batch ingestion, the supervisor spec configures the data ingestion for Kafka streaming ingestion. A supervisor spec has the following sections: - `dataSchema` to specify the Druid datasource name, primary timestamp, dimensions, metrics, transforms, and any necessary filters. - `ioConfig` to configure Kafka connection settings and configure how Druid parses the data. Kafka-specific connection details go in the `consumerProperties`. The `ioConfig` is also where you define the input format (`inputFormat`) of your Kafka data. For supported formats for Kafka and information on how to configure the input format, see [Data formats](../../ingestion/data-formats.md). @@ -128,6 +130,7 @@ The following example demonstrates a supervisor spec for Kafka that uses the `JS ``` ### Kafka input format supervisor spec example + If you want to ingest data from other fields in addition to the Kafka message contents, you can use the `kafka` input format. The `kafka` input format lets you ingest: - the event key field - event headers @@ -141,7 +144,7 @@ For example, consider the following structure for a message that represents a fi - **Event timestamp**: "Nov. 10, 2021 at 14:06" When you use the `kafka` input format, you configure the way that Druid names the dimensions created from the Kafka message: -- `headerLabelPrefix`: Supply a prefix to the Kafka headers to avoid any conflicts with named dimensions. The default is `kafka.header`. Considering the header from the example, Druid maps the header to the following column: `kafka.header.environment`. +- `headerColumnPrefix`: Supply a prefix to the Kafka headers to avoid any conflicts with named dimensions. The default is `kafka.header`. Considering the header from the example, Druid maps the header to the following column: `kafka.header.environment`. - `timestampColumnName`: Supply a custom name for the Kafka timestamp in the Druid schema to avoid conflicts with other time columns. The default is `kafka.timestamp`. - `keyColumnName`: Supply the name for the Kafka key column in Druid. The default is `kafka.key`. Additionally, you must provide information about how Druid should parse the data in the Kafka message: @@ -159,7 +162,7 @@ Additionally, you must provide information about how Druid should parse the data For more information on data formats, see [Data formats](../../ingestion/data-formats.md). -Finally, add the Kafka message columns to the `dimensionsSpec`. For the key and timestamp, you can use the dimension names you defined for `keyColumnName` and `timestampColumnName`. For header dimensions, append the header key to the `headerLabelPrefix`. For example `kafka.header.environment`. +Finally, add the Kafka message columns to the `dimensionsSpec`. For the key and timestamp, you can use the dimension names you defined for `keyColumnName` and `timestampColumnName`. For header dimensions, append the header key to the `headerColumnPrefix`. For example `kafka.header.environment`. The following supervisor spec demonstrates how to ingest the Kafka header, key, and timestamp into Druid dimensions: ``` @@ -174,7 +177,7 @@ The following supervisor spec demonstrates how to ingest the Kafka header, key, "topic": "wiki-edits", "inputFormat": { "type": "kafka", - "headerLabelPrefix": "kafka.header.", + "headerColumnPrefix": "kafka.header.", "timestampColumnName": "kafka.timestamp", "keyColumnName": "kafka.key", "headerFormat": { diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 7bf50956a55a..c975f885509f 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -170,7 +170,7 @@ Configure the Kafka `inputFormat` to load complete kafka records including heade | Field | Type | Description | Required | |-------|------|-------------|----------| | `type` | String | Set value to `kafka`. | yes | -| `headerLabelPrefix` | String | Custom label prefix for all the header columns. | no (default = "kafka.header.") | +| `headerColumnPrefix` | String | Custom prefix for all the header columns. | no (default = "kafka.header.") | | `timestampColumnName` | String | Name of the column for the kafka record's timestamp.| no (default = "kafka.timestamp") | | `keyColumnName` | String | Name of the column for the kafka record's key.| no (default = "kafka.key") | | `headerFormat` | Object | `headerFormat` specifies how to parse the Kafka headers. Supports String types. Because Kafka header values are bytes, the parser decodes them as UTF-8 encoded strings. To change this behavior, implement your own parser based on the encoding style. Change the 'encoding' type in `KafkaStringHeaderFormat` to match your custom implementation. | no | @@ -183,7 +183,7 @@ For example: "ioConfig": { "inputFormat": { "type": "kafka", - "headerLabelPrefix": "kafka.header.", + "headerColumnPrefix": "kafka.header.", "timestampColumnName": "kafka.timestamp", "keyColumnName": "kafka.key", "headerFormat": diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts index 69353125e083..3336c1199d35 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts @@ -23,7 +23,7 @@ import { cleanSpec, guessColumnTypeFromInput, guessColumnTypeFromSampleResponse, - guessInputFormat, + guessSimpleInputFormat, upgradeSpec, } from './ingestion-spec'; @@ -563,26 +563,26 @@ describe('ingestion-spec', () => { }); }); - describe('guessInputFormat', () => { + describe('guessSimpleInputFormat', () => { it('works for parquet', () => { - expect(guessInputFormat(['PAR1lol']).type).toEqual('parquet'); + expect(guessSimpleInputFormat(['PAR1lol']).type).toEqual('parquet'); }); it('works for orc', () => { - expect(guessInputFormat(['ORClol']).type).toEqual('orc'); + expect(guessSimpleInputFormat(['ORClol']).type).toEqual('orc'); }); it('works for AVRO', () => { - expect(guessInputFormat(['Obj\x01lol']).type).toEqual('avro_ocf'); - expect(guessInputFormat(['Obj1lol']).type).toEqual('regex'); + expect(guessSimpleInputFormat(['Obj\x01lol']).type).toEqual('avro_ocf'); + expect(guessSimpleInputFormat(['Obj1lol']).type).toEqual('regex'); }); it('works for JSON (strict)', () => { - expect(guessInputFormat(['{"a":1}'])).toEqual({ type: 'json' }); + expect(guessSimpleInputFormat(['{"a":1}'])).toEqual({ type: 'json' }); }); it('works for JSON (lax)', () => { - expect(guessInputFormat([`{hello:'world'}`])).toEqual({ + expect(guessSimpleInputFormat([`{hello:'world'}`])).toEqual({ type: 'json', featureSpec: { ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER: true, @@ -600,14 +600,14 @@ describe('ingestion-spec', () => { }); it('works for CSV (with header)', () => { - expect(guessInputFormat(['A,B,"X,1",Y'])).toEqual({ + expect(guessSimpleInputFormat(['A,B,"X,1",Y'])).toEqual({ type: 'csv', findColumnsFromHeader: true, }); }); it('works for CSV (no header)', () => { - expect(guessInputFormat(['"A,1","B,2",1,2'])).toEqual({ + expect(guessSimpleInputFormat(['"A,1","B,2",1,2'])).toEqual({ type: 'csv', findColumnsFromHeader: false, columns: ['column1', 'column2', 'column3', 'column4'], @@ -615,14 +615,14 @@ describe('ingestion-spec', () => { }); it('works for TSV (with header)', () => { - expect(guessInputFormat(['A\tB\tX\tY'])).toEqual({ + expect(guessSimpleInputFormat(['A\tB\tX\tY'])).toEqual({ type: 'tsv', findColumnsFromHeader: true, }); }); it('works for TSV (no header)', () => { - expect(guessInputFormat(['A\tB\t1\t2\t3\t4\t5\t6\t7\t8\t9'])).toEqual({ + expect(guessSimpleInputFormat(['A\tB\t1\t2\t3\t4\t5\t6\t7\t8\t9'])).toEqual({ type: 'tsv', findColumnsFromHeader: false, columns: [ @@ -642,7 +642,7 @@ describe('ingestion-spec', () => { }); it('works for TSV with ;', () => { - const inputFormat = guessInputFormat(['A;B;X;Y']); + const inputFormat = guessSimpleInputFormat(['A;B;X;Y']); expect(inputFormat).toEqual({ type: 'tsv', delimiter: ';', @@ -651,7 +651,7 @@ describe('ingestion-spec', () => { }); it('works for TSV with |', () => { - const inputFormat = guessInputFormat(['A|B|X|Y']); + const inputFormat = guessSimpleInputFormat(['A|B|X|Y']); expect(inputFormat).toEqual({ type: 'tsv', delimiter: '|', @@ -660,7 +660,7 @@ describe('ingestion-spec', () => { }); it('works for regex', () => { - expect(guessInputFormat(['A/B/X/Y'])).toEqual({ + expect(guessSimpleInputFormat(['A/B/X/Y'])).toEqual({ type: 'regex', pattern: '([\\s\\S]*)', columns: ['line'], diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx index 3bcbbfb2d0b0..a13cbfcd49f7 100644 --- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx +++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx @@ -34,6 +34,7 @@ import { EMPTY_ARRAY, EMPTY_OBJECT, filterMap, + findMap, isSimpleArray, oneOf, parseCsvLine, @@ -2137,33 +2138,32 @@ export function updateIngestionType( } export function issueWithSampleData( - sampleData: string[], + sampleData: SampleResponse, spec: Partial, ): JSX.Element | undefined { if (isStreamingSpec(spec)) return; - if (sampleData.length) { - const firstData = sampleData[0]; + const firstData: string = findMap(sampleData.data, l => l.input?.raw); + if (firstData) return; - if (firstData === '{') { - return ( - <> - This data looks like regular JSON object. For Druid to parse a text file it must have one - row per event. Maybe look at{' '} - newline delimited JSON instead. - - ); - } + if (firstData === '{') { + return ( + <> + This data looks like regular JSON object. For Druid to parse a text file it must have one + row per event. Maybe look at{' '} + newline delimited JSON instead. + + ); + } - if (oneOf(firstData, '[', '[]')) { - return ( - <> - This data looks like a multi-line JSON array. For Druid to parse a text file it must have - one row per event. Maybe look at{' '} - newline delimited JSON instead. - - ); - } + if (oneOf(firstData, '[', '[]')) { + return ( + <> + This data looks like a multi-line JSON array. For Druid to parse a text file it must have + one row per event. Maybe look at{' '} + newline delimited JSON instead. + + ); } return; @@ -2171,13 +2171,19 @@ export function issueWithSampleData( export function fillInputFormatIfNeeded( spec: Partial, - sampleData: string[], + sampleResponse: SampleResponse, ): Partial { if (deepGet(spec, 'spec.ioConfig.inputFormat.type')) return spec; + return deepSet( spec, 'spec.ioConfig.inputFormat', - guessInputFormat(sampleData, isStreamingSpec(spec)), + getSpecType(spec) === 'kafka' + ? guessKafkaInputFormat(filterMap(sampleResponse.data, l => l.input)) + : guessSimpleInputFormat( + filterMap(sampleResponse.data, l => l.input?.raw), + isStreamingSpec(spec), + ), ); } @@ -2185,7 +2191,22 @@ function noNumbers(xs: string[]): boolean { return xs.every(x => isNaN(Number(x))); } -export function guessInputFormat(sampleRaw: string[], canBeMultiLineJson = false): InputFormat { +export function guessKafkaInputFormat(sampleRaw: Record[]): InputFormat { + const hasHeader = sampleRaw.some(x => Object.keys(x).some(k => k.startsWith('kafka.header.'))); + const keys = filterMap(sampleRaw, x => x['kafka.key']); + const payloads = filterMap(sampleRaw, x => x.raw); + return { + type: 'kafka', + headerFormat: hasHeader ? { type: 'string' } : undefined, + keyFormat: keys.length ? guessSimpleInputFormat(keys, true) : undefined, + valueFormat: guessSimpleInputFormat(payloads, true), + }; +} + +export function guessSimpleInputFormat( + sampleRaw: string[], + canBeMultiLineJson = false, +): InputFormat { let sampleDatum = sampleRaw[0]; if (sampleDatum) { sampleDatum = String(sampleDatum); // Really ensure it is a string diff --git a/web-console/src/druid-models/input-format/input-format.tsx b/web-console/src/druid-models/input-format/input-format.tsx index 679f3bafb0e4..317ed33c804b 100644 --- a/web-console/src/druid-models/input-format/input-format.tsx +++ b/web-console/src/druid-models/input-format/input-format.tsx @@ -22,7 +22,7 @@ import React from 'react'; import type { Field } from '../../components'; import { AutoForm, ExternalLink } from '../../components'; import { getLink } from '../../links'; -import { compact, oneOf, typeIs } from '../../utils'; +import { compact, deepGet, deepSet, oneOf, typeIs } from '../../utils'; import type { FlattenSpec } from '../flatten-spec/flatten-spec'; export interface InputFormat { @@ -39,15 +39,33 @@ export interface InputFormat { readonly keepNullColumns?: boolean; readonly assumeNewlineDelimited?: boolean; readonly useJsonNodeReader?: boolean; + + // type: kafka + readonly timestampColumnName?: string; + readonly headerFormat?: { type: 'string'; encoding?: string }; + readonly headerColumnPrefix?: string; + readonly keyFormat?: InputFormat; + readonly keyColumnName?: string; + readonly valueFormat?: InputFormat; } -function generateInputFormatFields(streaming: boolean) { - return compact([ +function generateInputFormatFields(streamingType: 'kafka' | 'kinesis' | undefined) { + const fields = compact([ { name: 'type', label: 'Input format', type: 'string', - suggestions: ['json', 'csv', 'tsv', 'parquet', 'orc', 'avro_ocf', 'avro_stream', 'regex'], + suggestions: compact([ + streamingType === 'kafka' ? 'kafka' : undefined, + 'json', + 'csv', + 'tsv', + 'parquet', + 'orc', + 'avro_ocf', + 'avro_stream', + 'regex', + ]), required: true, info: ( <> @@ -83,12 +101,12 @@ function generateInputFormatFields(streaming: boolean) { ), }, - streaming + streamingType ? { name: 'assumeNewlineDelimited', type: 'boolean', defined: typeIs('json'), - disabled: (inputFormat: InputFormat) => inputFormat.useJsonNodeReader, + disabled: inputFormat => Boolean(inputFormat.useJsonNodeReader), defaultValue: false, info: ( <> @@ -112,13 +130,13 @@ function generateInputFormatFields(streaming: boolean) { ), } : undefined, - streaming + streamingType ? { name: 'useJsonNodeReader', - title: 'Use JSON node reader', + label: 'Use JSON node reader', type: 'boolean', defined: typeIs('json'), - disabled: (inputFormat: InputFormat) => inputFormat.assumeNewlineDelimited, + disabled: inputFormat => Boolean(inputFormat.assumeNewlineDelimited), defaultValue: false, info: ( <> @@ -222,13 +240,480 @@ function generateInputFormatFields(streaming: boolean) { ), }, ] as (Field | undefined)[]); + + if (streamingType === 'kafka') { + fields.push( + { + name: 'timestampColumnName', + label: 'Kafka timestamp column name', + type: 'string', + defaultValue: 'kafka.timestamp', + defined: typeIs('kafka'), + info: `Name of the column for the kafka record's timestamp.`, + }, + + // ----------------------------------------------------- + // valueFormat fields + + { + name: 'valueFormat.type', + label: 'Kafka payload input format', + type: 'string', + suggestions: ['json', 'csv', 'tsv', 'parquet', 'orc', 'avro_ocf', 'avro_stream', 'regex'], + required: true, + defined: typeIs('kafka'), + info: ( + <> +

The parser used to parse the payload of the Kafka message.

+

+ For more information see{' '} + + the documentation + + . +

+ + ), + }, + { + name: 'valueFormat.featureSpec', + label: 'Kafka payload JSON parser features', + type: 'json', + defined: inputFormat => deepGet(inputFormat, 'valueFormat.type') === 'json', + info: ( + <> +

+ + JSON parser features + {' '} + supported by Jackson library. Those features will be applied when parsing the input + JSON data. +

+

+ Example:{' '} + {`{ "ALLOW_SINGLE_QUOTES": true, "ALLOW_UNQUOTED_FIELD_NAMES": true }`} +

+ + ), + }, + { + name: 'valueFormat.assumeNewlineDelimited', + label: 'Kafka payload assume newline delimited', + type: 'boolean', + defined: inputFormat => deepGet(inputFormat, 'valueFormat.type') === 'json', + disabled: inputFormat => Boolean(inputFormat.useJsonNodeReader), + defaultValue: false, + info: ( + <> +

+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single + JSON event spans multiple lines). However, if a parsing exception occurs, all JSON + events that are present in the same streaming record will be discarded. +

+

+ assumeNewlineDelimited and useJsonNodeReader (at most one + can be true) affect only how parsing exceptions are handled. +

+

+ If the input is known to be newline delimited JSON (each individual JSON event is + contained in a single line, separated by newlines), setting this option to true allows + for more flexible parsing exception handling. Only the lines with invalid JSON syntax + will be discarded, while lines containing valid JSON events will still be ingested. +

+ + ), + }, + { + name: 'valueFormat.useJsonNodeReader', + label: 'Kafka payload use JSON node reader', + type: 'boolean', + defined: inputFormat => deepGet(inputFormat, 'valueFormat.type') === 'json', + disabled: inputFormat => Boolean(inputFormat.assumeNewlineDelimited), + defaultValue: false, + info: ( + <> + {' '} +

+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single + JSON event spans multiple lines). However, if a parsing exception occurs, all JSON + events that are present in the same streaming record will be discarded. +

+

+ assumeNewlineDelimited and useJsonNodeReader (at most one + can be true) affect only how parsing exceptions are handled. +

+

+ When ingesting multi-line JSON events, enabling this option will enable the use of a + JSON parser which will retain any valid JSON events encountered within a streaming + record prior to when a parsing exception occurred. +

+ + ), + }, + { + name: 'valueFormat.delimiter', + label: 'Kafka payload delimiter', + type: 'string', + defaultValue: '\t', + suggestions: ['\t', ';', '|', '#'], + defined: inputFormat => deepGet(inputFormat, 'valueFormat.type') === 'tsv', + info: <>A custom delimiter for data values., + }, + { + name: 'valueFormat.pattern', + label: 'Kafka payload pattern', + type: 'string', + defined: inputFormat => deepGet(inputFormat, 'valueFormat.type') === 'regex', + required: true, + }, + { + name: 'valueFormat.skipHeaderRows', + label: 'Kafka payload skip header rows', + type: 'number', + defaultValue: 0, + defined: inputFormat => oneOf(deepGet(inputFormat, 'valueFormat.type'), 'csv', 'tsv'), + min: 0, + info: ( + <> + If this is set, skip the first skipHeaderRows rows from each file. + + ), + }, + { + name: 'valueFormat.findColumnsFromHeader', + label: 'Kafka payload find columns from header', + type: 'boolean', + defined: inputFormat => oneOf(deepGet(inputFormat, 'valueFormat.type'), 'csv', 'tsv'), + required: true, + info: ( + <> + If this is set, find the column names from the header row. Note that + skipHeaderRows will be applied before finding column names from the header. + For example, if you set skipHeaderRows to 2 and{' '} + findColumnsFromHeader to true, the task will skip the first two lines and + then extract column information from the third line. + + ), + }, + { + name: 'valueFormat.columns', + label: 'Kafka payload columns', + type: 'string-array', + required: true, + defined: inputFormat => + (oneOf(deepGet(inputFormat, 'valueFormat.type'), 'csv', 'tsv') && + deepGet(inputFormat, 'valueFormat.findColumnsFromHeader') === false) || + deepGet(inputFormat, 'valueFormat.type') === 'regex', + info: ( + <> + Specifies the columns of the data. The columns should be in the same order with the + columns of your data. + + ), + }, + { + name: 'valueFormat.listDelimiter', + label: 'Kafka payload list delimiter', + type: 'string', + defaultValue: '\x01', + suggestions: ['\x01', '\x00'], + defined: inputFormat => + oneOf(deepGet(inputFormat, 'valueFormat.type'), 'csv', 'tsv', 'regex'), + info: <>A custom delimiter for multi-value dimensions., + }, + { + name: 'valueFormat.binaryAsString', + label: 'Kafka payload list binary as string', + type: 'boolean', + defaultValue: false, + defined: inputFormat => + oneOf( + deepGet(inputFormat, 'valueFormat.type'), + 'parquet', + 'orc', + 'avro_ocf', + 'avro_stream', + ), + info: ( + <> + Specifies if the binary column which is not logically marked as a string should be + treated as a UTF-8 encoded string. + + ), + }, + + // ----------------------------------------------------- + // keyFormat fields + + { + name: 'keyFormat.type', + label: 'Kafka key input format', + type: 'string', + suggestions: [ + undefined, + 'json', + 'csv', + 'tsv', + 'parquet', + 'orc', + 'avro_ocf', + 'avro_stream', + 'regex', + ], + placeholder: `(don't parse Kafka key)`, + defined: typeIs('kafka'), + info: ( + <> +

The parser used to parse the key of the Kafka message.

+

+ For more information see{' '} + + the documentation + + . +

+ + ), + adjustment: inputFormat => { + const keyFormatType = deepGet(inputFormat, 'keyFormat.type'); + // If the user selects one of these formats then populate the columns (that are in any case meaningless in this context) + // with an initial value. + switch (keyFormatType) { + case 'regex': + inputFormat = deepSet(inputFormat, 'keyFormat.pattern', '([\\s\\S]*)'); + inputFormat = deepSet(inputFormat, 'keyFormat.columns', ['x']); + break; + + case 'csv': + case 'tsv': + inputFormat = deepSet(inputFormat, 'keyFormat.findColumnsFromHeader', false); + inputFormat = deepSet(inputFormat, 'keyFormat.columns', ['x']); + break; + } + return inputFormat; + }, + }, + { + name: 'keyFormat.featureSpec', + label: 'Kafka key JSON parser features', + type: 'json', + defined: inputFormat => deepGet(inputFormat, 'keyFormat.type') === 'json', + hideInMore: true, + info: ( + <> +

+ + JSON parser features + {' '} + supported by Jackson library. Those features will be applied when parsing the input + JSON data. +

+

+ Example:{' '} + {`{ "ALLOW_SINGLE_QUOTES": true, "ALLOW_UNQUOTED_FIELD_NAMES": true }`} +

+ + ), + }, + { + name: 'keyFormat.assumeNewlineDelimited', + label: 'Kafka key assume newline delimited', + type: 'boolean', + defined: inputFormat => deepGet(inputFormat, 'keyFormat.type') === 'json', + disabled: inputFormat => Boolean(inputFormat.useJsonNodeReader), + defaultValue: false, + hideInMore: true, + info: ( + <> +

+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single + JSON event spans multiple lines). However, if a parsing exception occurs, all JSON + events that are present in the same streaming record will be discarded. +

+

+ assumeNewlineDelimited and useJsonNodeReader (at most one + can be true) affect only how parsing exceptions are handled. +

+

+ If the input is known to be newline delimited JSON (each individual JSON event is + contained in a single line, separated by newlines), setting this option to true allows + for more flexible parsing exception handling. Only the lines with invalid JSON syntax + will be discarded, while lines containing valid JSON events will still be ingested. +

+ + ), + }, + { + name: 'keyFormat.useJsonNodeReader', + label: 'Kafka key use JSON node reader', + type: 'boolean', + defined: inputFormat => deepGet(inputFormat, 'keyFormat.type') === 'json', + disabled: inputFormat => Boolean(inputFormat.assumeNewlineDelimited), + defaultValue: false, + hideInMore: true, + info: ( + <> + {' '} +

+ In streaming ingestion, multi-line JSON events can be ingested (i.e. where a single + JSON event spans multiple lines). However, if a parsing exception occurs, all JSON + events that are present in the same streaming record will be discarded. +

+

+ assumeNewlineDelimited and useJsonNodeReader (at most one + can be true) affect only how parsing exceptions are handled. +

+

+ When ingesting multi-line JSON events, enabling this option will enable the use of a + JSON parser which will retain any valid JSON events encountered within a streaming + record prior to when a parsing exception occurred. +

+ + ), + }, + { + name: 'keyFormat.delimiter', + label: 'Kafka key delimiter', + type: 'string', + defaultValue: '\t', + suggestions: ['\t', ';', '|', '#'], + defined: inputFormat => deepGet(inputFormat, 'keyFormat.type') === 'tsv', + info: <>A custom delimiter for data values., + }, + { + name: 'keyFormat.pattern', + label: 'Kafka key pattern', + type: 'string', + defined: inputFormat => deepGet(inputFormat, 'keyFormat.type') === 'regex', + required: true, + }, + { + name: 'keyFormat.skipHeaderRows', + label: 'Kafka key skip header rows', + type: 'number', + defaultValue: 0, + defined: inputFormat => oneOf(deepGet(inputFormat, 'keyFormat.type'), 'csv', 'tsv'), + min: 0, + info: ( + <> + If this is set, skip the first skipHeaderRows rows from each file. + + ), + }, + { + name: 'keyFormat.findColumnsFromHeader', + label: 'Kafka key find columns from header', + type: 'boolean', + defined: inputFormat => oneOf(deepGet(inputFormat, 'keyFormat.type'), 'csv', 'tsv'), + required: true, + hideInMore: true, + info: ( + <> + If this is set, find the column names from the header row. Note that + skipHeaderRows will be applied before finding column names from the header. + For example, if you set skipHeaderRows to 2 and{' '} + findColumnsFromHeader to true, the task will skip the first two lines and + then extract column information from the third line. + + ), + }, + { + name: 'keyFormat.columns', + label: 'Kafka key columns', + type: 'string-array', + required: true, + defined: inputFormat => + (oneOf(deepGet(inputFormat, 'keyFormat.type'), 'csv', 'tsv') && + deepGet(inputFormat, 'keyFormat.findColumnsFromHeader') === false) || + deepGet(inputFormat, 'keyFormat.type') === 'regex', + hideInMore: true, + info: ( + <> + Only the value of the first column will be read, the name of the column will be ignored + so enter anything here. + + ), + }, + { + name: 'keyFormat.listDelimiter', + label: 'Kafka key list delimiter', + type: 'string', + defaultValue: '\x01', + suggestions: ['\x01', '\x00'], + defined: inputFormat => + oneOf(deepGet(inputFormat, 'keyFormat.type'), 'csv', 'tsv', 'regex'), + info: <>A custom delimiter for multi-value dimensions., + }, + { + name: 'keyFormat.binaryAsString', + label: 'Kafka key list binary as string', + type: 'boolean', + defaultValue: false, + defined: inputFormat => + oneOf( + deepGet(inputFormat, 'valueFormat.type'), + 'parquet', + 'orc', + 'avro_ocf', + 'avro_stream', + ), + info: ( + <> + Specifies if the binary column which is not logically marked as a string should be + treated as a UTF-8 encoded string. + + ), + }, + + // keyColumnName + { + name: 'keyColumnName', + label: 'Kafka key column name', + type: 'string', + defaultValue: 'kafka.key', + defined: inputFormat => Boolean(deepGet(inputFormat, 'keyFormat.type')), + info: `Custom prefix for all the header columns.`, + }, + + // ----------------------------------------------------- + + { + name: 'headerFormat.type', + label: 'Kafka header format type', + type: 'string', + defined: typeIs('kafka'), + placeholder: `(don't parse Kafka herders)`, + suggestions: [undefined, 'string'], + }, + { + name: 'headerFormat.encoding', + label: 'Kafka header format encoding', + type: 'string', + defaultValue: 'UTF-8', + defined: inputFormat => deepGet(inputFormat, 'headerFormat.type') === 'string', + suggestions: ['UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'US-ASCII', 'ISO-8859-1'], + }, + { + name: 'headerColumnPrefix', + label: 'Kafka header column prefix', + type: 'string', + defaultValue: 'kafka.header.', + defined: typeIs('kafka'), + info: `Custom prefix for all the header columns.`, + }, + ); + } + + return fields; } -export const INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields(false); -export const STREAMING_INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields(true); +export const BATCH_INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields(undefined); +export const KINESIS_INPUT_FORMAT_FIELDS: Field[] = + generateInputFormatFields('kinesis'); +export const KAFKA_INPUT_FORMAT_FIELDS: Field[] = generateInputFormatFields('kafka'); export function issueWithInputFormat(inputFormat: InputFormat | undefined): string | undefined { - return AutoForm.issueWithModel(inputFormat, INPUT_FORMAT_FIELDS); + return AutoForm.issueWithModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS); } export const inputFormatCanProduceNestedData: (inputFormat: InputFormat) => boolean = typeIs( diff --git a/web-console/src/utils/general.tsx b/web-console/src/utils/general.tsx index 9140452947c9..3b29dceafdcd 100644 --- a/web-console/src/utils/general.tsx +++ b/web-console/src/utils/general.tsx @@ -278,6 +278,13 @@ export function filterMap(xs: readonly T[], f: (x: T, i: number) => Q | un return xs.map(f).filter((x: Q | undefined) => typeof x !== 'undefined') as Q[]; } +export function findMap( + xs: readonly T[], + f: (x: T, i: number) => Q | undefined, +): Q | undefined { + return filterMap(xs, f)[0]; +} + export function compact(xs: (T | undefined | false | null | '')[]): T[] { return xs.filter(Boolean) as T[]; } diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts index 88bf7cca0535..00988f2af5c4 100644 --- a/web-console/src/utils/sampler.ts +++ b/web-console/src/utils/sampler.ts @@ -217,6 +217,23 @@ function fixSamplerTypes(sampleSpec: SampleSpec): SampleSpec { return sampleSpec; } +const WHOLE_ROW_INPUT_FORMAT: InputFormat = { + type: 'regex', + pattern: '([\\s\\S]*)', // Match the entire line, every single character + listDelimiter: '56616469-6de2-9da4-efb8-8f416e6e6965', // Just a UUID to disable the list delimiter, let's hope we do not see this UUID in the data + columns: ['raw'], +}; + +const KAFKA_SAMPLE_INPUT_FORMAT: InputFormat = { + type: 'kafka', + headerFormat: { + type: 'string', + encoding: 'UTF-8', + }, + keyFormat: WHOLE_ROW_INPUT_FORMAT, + valueFormat: WHOLE_ROW_INPUT_FORMAT, +}; + export async function sampleForConnect( spec: Partial, sampleStrategy: SampleStrategy, @@ -230,12 +247,11 @@ export async function sampleForConnect( const reingestMode = isDruidSource(spec); if (!reingestMode) { - ioConfig = deepSet(ioConfig, 'inputFormat', { - type: 'regex', - pattern: '([\\s\\S]*)', // Match the entire line, every single character - listDelimiter: '56616469-6de2-9da4-efb8-8f416e6e6965', // Just a UUID to disable the list delimiter, let's hope we do not see this UUID in the data - columns: ['raw'], - }); + ioConfig = deepSet( + ioConfig, + 'inputFormat', + samplerType === 'kafka' ? KAFKA_SAMPLE_INPUT_FORMAT : WHOLE_ROW_INPUT_FORMAT, + ); } const sampleSpec: SampleSpec = { diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx index 499b4f4dd382..35acba45a781 100644 --- a/web-console/src/views/load-data-view/load-data-view.tsx +++ b/web-console/src/views/load-data-view/load-data-view.tsx @@ -73,6 +73,7 @@ import { addTimestampTransform, adjustForceGuaranteedRollup, adjustId, + BATCH_INPUT_FORMAT_FIELDS, cleanSpec, computeFlattenPathsForData, CONSTANT_TIMESTAMP_SPEC, @@ -95,10 +96,10 @@ import { getRequiredModule, getRollup, getSecondaryPartitionRelatedFormFields, + getSpecType, getTimestampExpressionFields, getTimestampSchema, getTuningFormFields, - INPUT_FORMAT_FIELDS, inputFormatCanProduceNestedData, invalidIoConfig, invalidPartitionConfig, @@ -108,6 +109,8 @@ import { issueWithIoConfig, issueWithSampleData, joinFilter, + KAFKA_INPUT_FORMAT_FIELDS, + KINESIS_INPUT_FORMAT_FIELDS, KNOWN_FILTER_TYPES, MAX_INLINE_DATA_LENGTH, METRIC_SPEC_FIELDS, @@ -117,7 +120,6 @@ import { PRIMARY_PARTITION_RELATED_FORM_FIELDS, removeTimestampTransform, splitFilter, - STREAMING_INPUT_FORMAT_FIELDS, TIME_COLUMN, TIMESTAMP_SPEC_FIELDS, TRANSFORM_FIELDS, @@ -129,6 +131,7 @@ import { getLink } from '../../links'; import { Api, AppToaster, UrlBaser } from '../../singletons'; import { alphanumericCompare, + compact, deepDelete, deepGet, deepSet, @@ -213,28 +216,49 @@ function showRawLine(line: SampleEntry): string { } function showDruidLine(line: SampleEntry): string { - if (!line.input) return 'Invalid row'; - return `Druid row: ${JSONBig.stringify(line.input)}`; + if (!line.input) return 'Invalid druid row'; + return `[Druid row: ${JSONBig.stringify(line.input)}]`; +} + +function showKafkaLine(line: SampleEntry): string { + const { input } = line; + if (!input) return 'Invalid kafka row'; + return compact([ + `[ Kafka timestamp: ${input['kafka.timestamp']}`, + ...filterMap(Object.entries(input), ([k, v]) => { + if (!k.startsWith('kafka.header.')) return; + return ` Header: ${k.slice(13)}=${v}`; + }), + input['kafka.key'] ? ` Key: ${input['kafka.key']}` : undefined, + ` Payload: ${input.raw}`, + ']', + ]).join('\n'); } function showBlankLine(line: SampleEntry): string { return line.parsed ? `[Row: ${JSONBig.stringify(line.parsed)}]` : '[Binary data]'; } -function formatSampleEntries(sampleEntries: SampleEntry[], isDruidSource: boolean): string { - if (sampleEntries.length) { - if (isDruidSource) { - return sampleEntries.map(showDruidLine).join('\n'); - } +function formatSampleEntries( + sampleEntries: SampleEntry[], + druidSource: boolean, + kafkaSource: boolean, +): string { + if (!sampleEntries.length) return 'No data returned from sampler'; - return ( - sampleEntries.every(l => !l.parsed) - ? sampleEntries.map(showBlankLine) - : sampleEntries.map(showRawLine) - ).join('\n'); - } else { - return 'No data returned from sampler'; + if (druidSource) { + return sampleEntries.map(showDruidLine).join('\n'); + } + + if (kafkaSource) { + return sampleEntries.map(showKafkaLine).join('\n'); } + + return ( + sampleEntries.every(l => !l.parsed) + ? sampleEntries.map(showBlankLine) + : sampleEntries.map(showRawLine) + ).join('\n'); } function getTimestampSpec(sampleResponse: SampleResponse | null): TimestampSpec { @@ -1215,6 +1239,7 @@ export class LoadDataView extends React.PureComponent )} {inputQueryState.isLoading() && } @@ -1368,11 +1393,7 @@ export class LoadDataView extends React.PureComponent - l.input ? l.input.raw : undefined, - ); - - const issue = issueWithSampleData(sampleLines, spec); + const issue = issueWithSampleData(inputData, spec); if (issue) { AppToaster.show({ icon: IconNames.WARNING_SIGN, @@ -1383,9 +1404,7 @@ export class LoadDataView extends React.PureComponent
{mainFill}
diff --git a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx index aee24b937048..7da27679e281 100644 --- a/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx +++ b/web-console/src/views/workbench-view/input-format-step/input-format-step.tsx @@ -25,9 +25,9 @@ import React, { useState } from 'react'; import { AutoForm, CenterMessage, LearnMore, Loader } from '../../../components'; import type { InputFormat, InputSource } from '../../../druid-models'; import { + BATCH_INPUT_FORMAT_FIELDS, guessColumnTypeFromSampleResponse, guessIsArrayFromSampleResponse, - INPUT_FORMAT_FIELDS, inputFormatOutputsNumericStrings, PLACEHOLDER_TIMESTAMP_SPEC, possibleDruidFormatForValues, @@ -76,7 +76,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF const [inputFormat, setInputFormat] = useState>(initInputFormat); const [inputFormatToSample, setInputFormatToSample] = useState( - AutoForm.isValidModel(initInputFormat, INPUT_FORMAT_FIELDS) ? initInputFormat : undefined, + AutoForm.isValidModel(initInputFormat, BATCH_INPUT_FORMAT_FIELDS) ? initInputFormat : undefined, ); const [selectTimestamp, setSelectTimestamp] = useState(true); @@ -133,7 +133,7 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF } const inputFormatAndMore = - previewSampleResponse && AutoForm.isValidModel(inputFormat, INPUT_FORMAT_FIELDS) + previewSampleResponse && AutoForm.isValidModel(inputFormat, BATCH_INPUT_FORMAT_FIELDS) ? { inputFormat, signature: getHeaderNamesFromSampleResponse(previewSampleResponse).map(name => @@ -188,15 +188,19 @@ export const InputFormatStep = React.memo(function InputFormatStep(props: InputF - + {inputFormatToSample !== inputFormat && (