= {
name: 'inputSource.type',
- label: 'Firehose type',
+ label: 'Source type',
type: 'string',
- suggestions: ['local', 'http', 'inline', 'static-s3', 'static-google-blobstore', 'hdfs'],
+ suggestions: ['local', 'http', 'inline', 's3', 'static-google-blobstore', 'hdfs'],
info: (
Druid connects to raw data through{' '}
@@ -1139,7 +1139,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
// do not add 'data' here as it has special handling in the load-data view
];
- case 'index:static-s3':
+ case 'index:s3':
return [
inputSourceType,
{
@@ -1353,7 +1353,7 @@ function issueWithInputSource(inputSource: InputSource | undefined): string | un
if (!inputSource.data) return `must have 'data'`;
break;
- case 'static-s3':
+ case 's3':
if (!nonEmptyArray(inputSource.uris) && !nonEmptyArray(inputSource.prefixes)) {
return 'must have at least one uri or prefix';
}
@@ -1409,7 +1409,7 @@ export function getIoConfigTuningFormFields(
): Field[] {
switch (ingestionComboType) {
case 'index:http':
- case 'index:static-s3':
+ case 'index:s3':
case 'index:static-google-blobstore':
case 'index:hdfs':
return [
@@ -1746,7 +1746,7 @@ export function guessDataSourceName(spec: IngestionSpec): string | undefined {
return;
}
- case 'static-s3':
+ case 's3':
const s3Path =
(inputSource.uris || EMPTY_ARRAY)[0] || (inputSource.prefixes || EMPTY_ARRAY)[0];
return s3Path ? filenameFromPath(s3Path) : undefined;
@@ -2434,6 +2434,9 @@ export function getFilterFormFields() {
export function upgradeSpec(spec: any): any {
if (deepGet(spec, 'ioConfig.firehose')) {
+ if (deepGet(spec, 'ioConfig.firehose.type') === 'static-s3') {
+ deepSet(spec, 'ioConfig.firehose.type', 's3');
+ }
spec = deepMove(spec, 'ioConfig.firehose', 'ioConfig.inputSource');
spec = deepMove(spec, 'dataSchema.parser.parseSpec.timestampSpec', 'dataSchema.timestampSpec');
spec = deepMove(
@@ -2460,6 +2463,9 @@ export function downgradeSpec(spec: any): any {
);
spec = deepMove(spec, 'dataSchema.timestampSpec', 'dataSchema.parser.parseSpec.timestampSpec');
spec = deepMove(spec, 'ioConfig.inputSource', 'ioConfig.firehose');
+ if (deepGet(spec, 'ioConfig.firehose.type') === 's3') {
+ deepSet(spec, 'ioConfig.firehose.type', 'static-s3');
+ }
}
return spec;
}
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index ca7c12646783..4e4a938981cf 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -55,15 +55,14 @@ export interface SampleSpec {
export interface SamplerConfig {
numRows?: number;
timeoutMs?: number;
- cacheKey?: string;
- skipCache?: boolean;
}
export interface SampleResponse {
- cacheKey?: string;
data: SampleEntry[];
}
+export type CacheRows = Record[];
+
export interface SampleResponseWithExtraInfo extends SampleResponse {
queryGranularity?: any;
timestampSpec?: any;
@@ -110,6 +109,10 @@ export function getSamplerType(spec: IngestionSpec): SamplerType {
return 'index';
}
+export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse): CacheRows {
+ return filterMap(sampleResponse.data, d => d.input).slice(0, 20);
+}
+
export function headerFromSampleResponse(
sampleResponse: SampleResponse,
ignoreColumn?: string,
@@ -308,7 +311,7 @@ export async function sampleForConnect(
export async function sampleForParser(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheKey: string | undefined,
+ cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -326,18 +329,21 @@ export async function sampleForParser(
dimensionsSpec: {},
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- return postToSampler(sampleSpec, 'parser');
+ return postToSampler(applyCache(sampleSpec, cacheRows), 'parser');
+}
+
+function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows | undefined) {
+ if (!cacheRows) return sampleSpec;
+ return sampleSpec; // ToDo;
}
export async function sampleForTimestamp(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheKey: string | undefined,
+ cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -359,12 +365,13 @@ export async function sampleForTimestamp(
timestampSpec: columnTimestampSpec ? getEmptyTimestampSpec() : timestampSpec,
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- const sampleColumns = await postToSampler(sampleSpecColumns, 'timestamp-columns');
+ const sampleColumns = await postToSampler(
+ applyCache(sampleSpecColumns, cacheRows),
+ 'timestamp-columns',
+ );
// If we are not parsing a column then there is nothing left to do
if (!columnTimestampSpec) return sampleColumns;
@@ -382,17 +389,12 @@ export async function sampleForTimestamp(
timestampSpec,
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey: sampleColumns.cacheKey || cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
const sampleTime = await postToSampler(sampleSpec, 'timestamp-time');
- if (
- sampleTime.cacheKey !== sampleColumns.cacheKey ||
- sampleTime.data.length !== sampleColumns.data.length
- ) {
+ if (sampleTime.data.length !== sampleColumns.data.length) {
// If the two responses did not come from the same cache (or for some reason have different lengths) then
// just return the one with the parsed time column.
return sampleTime;
@@ -413,7 +415,7 @@ export async function sampleForTimestamp(
export async function sampleForTransform(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheKey: string | undefined,
+ cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -437,12 +439,13 @@ export async function sampleForTransform(
dimensionsSpec: {},
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- const sampleResponseHack = await postToSampler(sampleSpecHack, 'transform-pre');
+ const sampleResponseHack = await postToSampler(
+ applyCache(sampleSpecHack, cacheRows),
+ 'transform-pre',
+ );
specialDimensionSpec.dimensions = dedupe(
headerFromSampleResponse(
@@ -467,9 +470,7 @@ export async function sampleForTransform(
},
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
return postToSampler(sampleSpec, 'transform');
@@ -478,7 +479,7 @@ export async function sampleForTransform(
export async function sampleForFilter(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheKey: string | undefined,
+ cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -503,12 +504,13 @@ export async function sampleForFilter(
dimensionsSpec: {},
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- const sampleResponseHack = await postToSampler(sampleSpecHack, 'filter-pre');
+ const sampleResponseHack = await postToSampler(
+ applyCache(sampleSpecHack, cacheRows),
+ 'filter-pre',
+ );
specialDimensionSpec.dimensions = dedupe(
headerFromSampleResponse(
@@ -534,18 +536,16 @@ export async function sampleForFilter(
},
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- return postToSampler(sampleSpec, 'filter');
+ return postToSampler(applyCache(sampleSpec, cacheRows), 'filter');
}
export async function sampleForSchema(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheKey: string | undefined,
+ cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -576,18 +576,16 @@ export async function sampleForSchema(
metricsSpec,
},
},
- samplerConfig: Object.assign({}, BASE_SAMPLER_CONFIG, {
- cacheKey,
- }),
+ samplerConfig: BASE_SAMPLER_CONFIG,
};
- return postToSampler(sampleSpec, 'schema');
+ return postToSampler(applyCache(sampleSpec, cacheRows), 'schema');
}
export async function sampleForExampleManifests(
exampleManifestUrl: string,
): Promise {
- const sampleSpec: SampleSpec = {
+ const exampleSpec: SampleSpec = {
type: 'index',
spec: {
type: 'index',
@@ -605,10 +603,10 @@ export async function sampleForExampleManifests(
dimensionsSpec: {},
},
},
- samplerConfig: { numRows: 50, timeoutMs: 10000, skipCache: true },
+ samplerConfig: { numRows: 50, timeoutMs: 10000 },
};
- const exampleData = await postToSampler(sampleSpec, 'example-manifest');
+ const exampleData = await postToSampler(exampleSpec, 'example-manifest');
return filterMap(exampleData.data, datum => {
const parsed = datum.parsed;
diff --git a/web-console/src/utils/utils.spec.ts b/web-console/src/utils/utils.spec.ts
index 5402b7d9bd31..efdf4fb02a36 100644
--- a/web-console/src/utils/utils.spec.ts
+++ b/web-console/src/utils/utils.spec.ts
@@ -25,17 +25,7 @@ import {
updateSchemaWithSample,
} from './druid-type';
import { IngestionSpec } from './ingestion-spec';
-import {
- getSamplerType,
- headerFromSampleResponse,
- sampleForConnect,
- sampleForExampleManifests,
- sampleForFilter,
- sampleForParser,
- sampleForSchema,
- sampleForTimestamp,
- sampleForTransform,
-} from './sampler';
+import { getSamplerType, headerFromSampleResponse } from './sampler';
describe('test-utils', () => {
const ingestionSpec: IngestionSpec = {
@@ -67,39 +57,48 @@ describe('test-utils', () => {
dimensionsSpec: {},
},
};
+
+ // const cacheRows: CacheRows = [{ make: 'Honda', model: 'Civic' }, { make: 'BMW', model: 'M3' }];
+
it('spec-utils getSamplerType', () => {
expect(getSamplerType(ingestionSpec)).toMatchInlineSnapshot(`"index"`);
});
+
it('spec-utils headerFromSampleResponse', () => {
- expect(headerFromSampleResponse({ cacheKey: 'abc123', data: [] })).toMatchInlineSnapshot(
- `Array []`,
- );
- });
- it('spec-utils sampleForParser', () => {
- expect(sampleForParser(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(`Promise {}`);
- });
- it('spec-utils SampleSpec', () => {
- expect(sampleForConnect(ingestionSpec, 'start')).toMatchInlineSnapshot(`Promise {}`);
- });
- it('spec-utils sampleForTimestamp', () => {
- expect(sampleForTimestamp(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(
- `Promise {}`,
- );
- });
- it('spec-utils sampleForTransform', () => {
- expect(sampleForTransform(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(
- `Promise {}`,
- );
- });
- it('spec-utils sampleForFilter', () => {
- expect(sampleForFilter(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(`Promise {}`);
- });
- it('spec-utils sampleForSchema', () => {
- expect(sampleForSchema(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(`Promise {}`);
- });
- it('spec-utils sampleForExampleManifests', () => {
- expect(sampleForExampleManifests('abc123')).toMatchInlineSnapshot(`Promise {}`);
+ expect(
+ headerFromSampleResponse({ data: [{ input: { a: 1 }, parsed: { a: 1 } }] }),
+ ).toMatchInlineSnapshot();
});
+
+ // it('spec-utils sampleForParser', async () => {
+ // expect(await sampleForParser(ingestionSpec, 'start', 'abc123')).toMatchInlineSnapshot(
+ // `Promise {}`,
+ // );
+ // });
+ //
+ // it('spec-utils SampleSpec', async () => {
+ // expect(await sampleForConnect(ingestionSpec, 'start')).toMatchInlineSnapshot(`Promise {}`);
+ // });
+ //
+ // it('spec-utils sampleForTimestamp', async () => {
+ // expect(await sampleForTimestamp(ingestionSpec, 'start', cacheRows)).toMatchInlineSnapshot();
+ // });
+ //
+ // it('spec-utils sampleForTransform', async () => {
+ // expect(await sampleForTransform(ingestionSpec, 'start', cacheRows)).toMatchInlineSnapshot();
+ // });
+ //
+ // it('spec-utils sampleForFilter', async () => {
+ // expect(await sampleForFilter(ingestionSpec, 'start', cacheRows)).toMatchInlineSnapshot();
+ // });
+ //
+ // it('spec-utils sampleForSchema', async () => {
+ // expect(await sampleForSchema(ingestionSpec, 'start', cacheRows)).toMatchInlineSnapshot();
+ // });
+ //
+ // it('spec-utils sampleForExampleManifests', async () => {
+ // expect(await sampleForExampleManifests('some url')).toMatchInlineSnapshot();
+ // });
});
describe('druid-type.ts', () => {
@@ -132,14 +131,17 @@ describe('druid-type.ts', () => {
dimensionsSpec: {},
},
};
+
it('spec-utils getSamplerType', () => {
expect(guessTypeFromSample([])).toMatchInlineSnapshot(`"string"`);
});
+
it('spec-utils getColumnTypeFromHeaderAndRows', () => {
expect(
getColumnTypeFromHeaderAndRows({ header: ['header'], rows: [] }, 'header'),
).toMatchInlineSnapshot(`"string"`);
});
+
it('spec-utils getDimensionSpecs', () => {
expect(getDimensionSpecs({ header: ['header'], rows: [] }, true)).toMatchInlineSnapshot(`
Array [
@@ -147,6 +149,7 @@ describe('druid-type.ts', () => {
]
`);
});
+
it('spec-utils getMetricSecs', () => {
expect(getMetricSecs({ header: ['header'], rows: [] })).toMatchInlineSnapshot(`
Array [
@@ -157,6 +160,7 @@ describe('druid-type.ts', () => {
]
`);
});
+
it('spec-utils updateSchemaWithSample', () => {
expect(
updateSchemaWithSample(ingestionSpec, { header: ['header'], rows: [] }, 'specific', true),
@@ -210,9 +214,11 @@ describe('druid-query.ts', () => {
it('spec-utils parseHtmlError', () => {
expect(parseHtmlError('')).toMatchInlineSnapshot(`undefined`);
});
+
it('spec-utils parseHtmlError', () => {
expect(getDruidErrorMessage({})).toMatchInlineSnapshot(`undefined`);
});
+
it('spec-utils parseQueryPlan', () => {
expect(parseQueryPlan('start')).toMatchInlineSnapshot(`"start"`);
});
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index a1c853572cff..d198ad3c6d25 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -127,7 +127,9 @@ import {
} from '../../utils/ingestion-spec';
import { deepDelete, deepGet, deepSet } from '../../utils/object-change';
import {
+ CacheRows,
ExampleManifest,
+ getCacheRowsFromSampleResponse,
getProxyOverlordModules,
HeaderAndRows,
headerAndRowsFromSampleResponse,
@@ -165,6 +167,7 @@ import './load-data-view.scss';
function showRawLine(line: SampleEntry): string {
if (!line.parsed) return 'No parse';
const raw = line.parsed.raw;
+ if (typeof raw !== 'string') return 'Bad raw';
if (raw.includes('\n')) {
return `[Multi-line row, length: ${raw.length}]`;
}
@@ -262,7 +265,7 @@ export interface LoadDataViewState {
step: Step;
spec: IngestionSpec;
specPreview: IngestionSpec;
- cacheKey?: string;
+ cacheRows?: CacheRows;
// dialogs / modals
continueToSpec: boolean;
showResetConfirm: boolean;
@@ -699,7 +702,7 @@ export class LoadDataView extends React.PureComponent
{this.renderIngestionCard('kafka')}
{this.renderIngestionCard('kinesis')}
- {this.renderIngestionCard('index:static-s3')}
+ {this.renderIngestionCard('index:s3')}
{this.renderIngestionCard('index:static-google-blobstore')}
{this.renderIngestionCard('index:hdfs')}
{this.renderIngestionCard('index:ingestSegment')}
@@ -774,7 +777,7 @@ export class LoadDataView extends React.PureComponent
);
- case 'index:static-s3':
+ case 'index:s3':
return Load text based data from Amazon S3.
;
case 'index:static-google-blobstore':
@@ -826,7 +829,7 @@ export class LoadDataView extends React.PureComponent {
- const sampleResponse = await sampleForTransform(spec, sampleStrategy, cacheKey);
+ const sampleResponse = await sampleForTransform(spec, sampleStrategy, cacheRows);
this.updateSpec(
updateSchemaWithSample(
spec,
@@ -2374,14 +2377,14 @@ export class LoadDataView extends React.PureComponent {
- const sampleResponse = await sampleForTransform(spec, sampleStrategy, cacheKey);
+ const sampleResponse = await sampleForTransform(spec, sampleStrategy, cacheRows);
this.updateSpec(
updateSchemaWithSample(
spec,
From 225e58c16b76c08d902fa6ab0b27385968fb0725 Mon Sep 17 00:00:00 2001
From: Vadim Ogievetsky
Date: Sat, 23 Nov 2019 05:15:01 -0800
Subject: [PATCH 09/27] step
---
...static-google-blobstore.png => google.png} | Bin
web-console/src/utils/ingestion-spec.tsx | 42 ++++++++----
web-console/src/utils/sampler.ts | 30 +++++++--
web-console/src/utils/utils.spec.ts | 63 +++++++++++++++++-
.../views/load-data-view/load-data-view.tsx | 6 +-
5 files changed, 115 insertions(+), 26 deletions(-)
rename web-console/assets/{static-google-blobstore.png => google.png} (100%)
diff --git a/web-console/assets/static-google-blobstore.png b/web-console/assets/google.png
similarity index 100%
rename from web-console/assets/static-google-blobstore.png
rename to web-console/assets/google.png
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx
index 7bb6b4a7bdb4..cd30814c8b74 100644
--- a/web-console/src/utils/ingestion-spec.tsx
+++ b/web-console/src/utils/ingestion-spec.tsx
@@ -61,7 +61,7 @@ export type IngestionComboType =
| 'index:ingestSegment'
| 'index:inline'
| 'index:s3'
- | 'index:static-google-blobstore'
+ | 'index:google'
| 'index:hdfs';
// Some extra values that can be selected in the initial screen
@@ -100,7 +100,7 @@ export function getIngestionComboType(spec: IngestionSpec): IngestionComboType |
case 'ingestSegment':
case 'inline':
case 's3':
- case 'static-google-blobstore':
+ case 'google':
case 'hdfs':
return `index:${inputSource.type}` as IngestionComboType;
}
@@ -126,7 +126,7 @@ export function getIngestionTitle(ingestionType: IngestionComboTypeWithExtra): s
case 'index:s3':
return 'Amazon S3';
- case 'index:static-google-blobstore':
+ case 'index:google':
return 'Google Cloud Storage';
case 'index:hdfs':
@@ -178,7 +178,7 @@ export function getRequiredModule(ingestionType: IngestionComboTypeWithExtra): s
case 'index:s3':
return 'druid-s3-extensions';
- case 'index:static-google-blobstore':
+ case 'index:google':
return 'druid-google-extensions';
case 'index:hdfs':
@@ -292,7 +292,7 @@ const INPUT_FORMAT_FORM_FIELDS: Field[] = [
{
name: 'type',
type: 'string',
- suggestions: ['json', 'csv', 'tsv', 'regex'],
+ suggestions: ['json', 'csv', 'tsv', 'regex', 'parquet'],
info: (
<>
The parser used to parse the data.
@@ -974,7 +974,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
name: 'inputSource.type',
label: 'Source type',
type: 'string',
- suggestions: ['local', 'http', 'inline', 's3', 'static-google-blobstore', 'hdfs'],
+ suggestions: ['local', 'http', 'inline', 's3', 'google', 'hdfs'],
info: (
Druid connects to raw data through{' '}
@@ -1047,7 +1047,7 @@ export function getIoConfigFormFields(ingestionComboType: IngestionComboType): F
label: 'File filter',
type: 'string',
required: true,
- suggestions: ['*', '*.json', '*.json.gz', '*.csv', '*.tsv'],
+ suggestions: ['*', '*.json', '*.json.gz', '*.csv', '*.tsv', '*.parquet'],
info: (
<>
d.input).slice(0, 20);
}
+export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows | undefined) {
+ if (!cacheRows) return sampleSpec;
+
+ // If this is already an inline spec there is nothing to do
+ if (deepGet(sampleSpec, 'spec.ioConfig.inputSource.type') === 'inline') return sampleSpec;
+
+ // Make the spec into an inline json spec
+ sampleSpec = deepSet(sampleSpec, 'type', 'index');
+ sampleSpec = deepSet(sampleSpec, 'spec.type', 'index');
+ sampleSpec = deepSet(sampleSpec, 'spec.ioConfig.type', 'index');
+ sampleSpec = deepSet(sampleSpec, 'spec.ioConfig.inputSource', {
+ type: 'inline',
+ data: cacheRows.map(r => JSON.stringify(r)).join('\n'),
+ });
+
+ const flattenSpec = deepGet(sampleSpec, 'spec.ioConfig.inputFormat.flattenSpec');
+ const inputFormat: InputFormat = { type: 'json' };
+ if (flattenSpec) inputFormat.flattenSpec = flattenSpec;
+ sampleSpec = deepSet(sampleSpec, 'spec.ioConfig.inputFormat', inputFormat);
+
+ return sampleSpec;
+}
+
export function headerFromSampleResponse(
sampleResponse: SampleResponse,
ignoreColumn?: string,
@@ -335,11 +358,6 @@ export async function sampleForParser(
return postToSampler(applyCache(sampleSpec, cacheRows), 'parser');
}
-function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows | undefined) {
- if (!cacheRows) return sampleSpec;
- return sampleSpec; // ToDo;
-}
-
export async function sampleForTimestamp(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
diff --git a/web-console/src/utils/utils.spec.ts b/web-console/src/utils/utils.spec.ts
index efdf4fb02a36..5ca93498f3c9 100644
--- a/web-console/src/utils/utils.spec.ts
+++ b/web-console/src/utils/utils.spec.ts
@@ -25,7 +25,7 @@ import {
updateSchemaWithSample,
} from './druid-type';
import { IngestionSpec } from './ingestion-spec';
-import { getSamplerType, headerFromSampleResponse } from './sampler';
+import { applyCache, getSamplerType, headerFromSampleResponse } from './sampler';
describe('test-utils', () => {
const ingestionSpec: IngestionSpec = {
@@ -65,9 +65,66 @@ describe('test-utils', () => {
});
it('spec-utils headerFromSampleResponse', () => {
+ expect(headerFromSampleResponse({ data: [{ input: { a: 1 }, parsed: { a: 1 } }] }))
+ .toMatchInlineSnapshot(`
+ Array [
+ "a",
+ ]
+ `);
+ });
+
+ it('spec-utils applyCache', () => {
expect(
- headerFromSampleResponse({ data: [{ input: { a: 1 }, parsed: { a: 1 } }] }),
- ).toMatchInlineSnapshot();
+ applyCache(
+ {
+ type: 'index_parallel',
+ spec: ingestionSpec,
+ samplerConfig: {
+ numRows: 500,
+ timeoutMs: 15000,
+ },
+ },
+ [{ make: 'Honda', model: 'Accord' }, { make: 'Toyota', model: 'Prius' }],
+ ),
+ ).toMatchInlineSnapshot(`
+ Object {
+ "samplerConfig": Object {
+ "numRows": 500,
+ "timeoutMs": 15000,
+ },
+ "spec": Object {
+ "dataSchema": Object {
+ "dataSource": "wikipedia",
+ "dimensionsSpec": Object {},
+ "granularitySpec": Object {
+ "queryGranularity": "HOUR",
+ "segmentGranularity": "DAY",
+ "type": "uniform",
+ },
+ "timestampSpec": Object {
+ "column": "timestamp",
+ "format": "iso",
+ },
+ },
+ "ioConfig": Object {
+ "inputFormat": Object {
+ "type": "json",
+ },
+ "inputSource": Object {
+ "data": "{\\"make\\":\\"Honda\\",\\"model\\":\\"Accord\\"}
+ {\\"make\\":\\"Toyota\\",\\"model\\":\\"Prius\\"}",
+ "type": "inline",
+ },
+ "type": "index",
+ },
+ "tuningConfig": Object {
+ "type": "index_parallel",
+ },
+ "type": "index",
+ },
+ "type": "index",
+ }
+ `);
});
// it('spec-utils sampleForParser', async () => {
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index d198ad3c6d25..6ff428a21101 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -703,7 +703,7 @@ export class LoadDataView extends React.PureComponentLoad text based data from Amazon S3.
;
- case 'index:static-google-blobstore':
+ case 'index:google':
return Load text based data from the Google Blobstore.
;
case 'index:hdfs':
@@ -830,7 +830,7 @@ export class LoadDataView extends React.PureComponent
Date: Sat, 23 Nov 2019 08:30:13 -0800
Subject: [PATCH 10/27] neo cache
---
web-console/src/utils/ingestion-spec.tsx | 6 +-
web-console/src/utils/sampler.ts | 17 +++--
web-console/src/utils/spec-utils.ts | 4 +-
.../views/load-data-view/load-data-view.tsx | 62 +++++++------------
4 files changed, 36 insertions(+), 53 deletions(-)
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx
index cd30814c8b74..d51460e2a4ca 100644
--- a/web-console/src/utils/ingestion-spec.tsx
+++ b/web-console/src/utils/ingestion-spec.tsx
@@ -383,7 +383,7 @@ export function issueWithInputFormat(inputFormat: InputFormat | undefined): stri
}
export function inputFormatCanFlatten(inputFormat: InputFormat): boolean {
- return inputFormat.type === 'json';
+ return inputFormat.type === 'json' || inputFormat.type === 'parquet';
}
export interface TimestampSpec {
@@ -2270,6 +2270,10 @@ export function updateIngestionType(
newSpec = deepSet(newSpec, 'dataSchema.granularitySpec', granularitySpec);
}
+ if (!deepGet(spec, 'dataSchema.dimensionsSpec')) {
+ newSpec = deepSet(newSpec, 'dataSchema.dimensionsSpec', {});
+ }
+
return newSpec;
}
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index afa5cae72731..0ed4c5fe5e41 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -113,7 +113,7 @@ export function getCacheRowsFromSampleResponse(sampleResponse: SampleResponse):
return filterMap(sampleResponse.data, d => d.input).slice(0, 20);
}
-export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows | undefined) {
+export function applyCache(sampleSpec: SampleSpec, cacheRows: CacheRows) {
if (!cacheRows) return sampleSpec;
// If this is already an inline spec there is nothing to do
@@ -334,7 +334,6 @@ export async function sampleForConnect(
export async function sampleForParser(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheRows: CacheRows | undefined,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -355,13 +354,13 @@ export async function sampleForParser(
samplerConfig: BASE_SAMPLER_CONFIG,
};
- return postToSampler(applyCache(sampleSpec, cacheRows), 'parser');
+ return postToSampler(sampleSpec, 'parser');
}
export async function sampleForTimestamp(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheRows: CacheRows | undefined,
+ cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -410,7 +409,7 @@ export async function sampleForTimestamp(
samplerConfig: BASE_SAMPLER_CONFIG,
};
- const sampleTime = await postToSampler(sampleSpec, 'timestamp-time');
+ const sampleTime = await postToSampler(applyCache(sampleSpec, cacheRows), 'timestamp-time');
if (sampleTime.data.length !== sampleColumns.data.length) {
// If the two responses did not come from the same cache (or for some reason have different lengths) then
@@ -433,7 +432,7 @@ export async function sampleForTimestamp(
export async function sampleForTransform(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheRows: CacheRows | undefined,
+ cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -491,13 +490,13 @@ export async function sampleForTransform(
samplerConfig: BASE_SAMPLER_CONFIG,
};
- return postToSampler(sampleSpec, 'transform');
+ return postToSampler(applyCache(sampleSpec, cacheRows), 'transform');
}
export async function sampleForFilter(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheRows: CacheRows | undefined,
+ cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
@@ -563,7 +562,7 @@ export async function sampleForFilter(
export async function sampleForSchema(
spec: IngestionSpec,
sampleStrategy: SampleStrategy,
- cacheRows: CacheRows | undefined,
+ cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
diff --git a/web-console/src/utils/spec-utils.ts b/web-console/src/utils/spec-utils.ts
index 8e0b465dabdd..2842118ba995 100644
--- a/web-console/src/utils/spec-utils.ts
+++ b/web-console/src/utils/spec-utils.ts
@@ -26,10 +26,10 @@ export function computeFlattenPathsForData(
exprType: ExprType,
arrayHandling: ArrayHandling,
): FlattenField[] {
- return computeFlattenExprsForData(data, exprType, arrayHandling).map((expr, i) => {
+ return computeFlattenExprsForData(data, exprType, arrayHandling).map(expr => {
return {
+ name: expr.replace(/^\$?\./, ''),
type: exprType,
- name: `expr_${i}`,
expr,
};
});
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 6ff428a21101..05b55b0452bb 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -1166,7 +1166,7 @@ export class LoadDataView extends React.PureComponent r.input),
@@ -1437,19 +1437,15 @@ export class LoadDataView extends React.PureComponent
Date: Sat, 23 Nov 2019 10:02:02 -0800
Subject: [PATCH 11/27] fix time selection
---
web-console/src/utils/ingestion-spec.tsx | 19 ++++++--
web-console/src/utils/sampler.ts | 42 +++++-------------
.../views/load-data-view/load-data-view.tsx | 44 ++++++++++---------
.../parse-time-table.spec.tsx | 4 +-
4 files changed, 53 insertions(+), 56 deletions(-)
diff --git a/web-console/src/utils/ingestion-spec.tsx b/web-console/src/utils/ingestion-spec.tsx
index d51460e2a4ca..f83dab7da629 100644
--- a/web-console/src/utils/ingestion-spec.tsx
+++ b/web-console/src/utils/ingestion-spec.tsx
@@ -399,13 +399,22 @@ export function getTimestampSpecColumn(timestampSpec: TimestampSpec) {
const NO_SUCH_COLUMN = '!!!_no_such_column_!!!';
-const EMPTY_TIMESTAMP_SPEC: TimestampSpec = {
+const DUMMY_TIMESTAMP_SPEC: TimestampSpec = {
+ column: NO_SUCH_COLUMN,
+ missingValue: '1970-01-01T00:00:00Z',
+};
+
+export function getDummyTimestampSpec() {
+ return DUMMY_TIMESTAMP_SPEC;
+}
+
+const CONSTANT_TIMESTAMP_SPEC: TimestampSpec = {
column: NO_SUCH_COLUMN,
missingValue: '2010-01-01T00:00:00Z',
};
-export function getEmptyTimestampSpec() {
- return EMPTY_TIMESTAMP_SPEC;
+export function getConstantTimestampSpec() {
+ return CONSTANT_TIMESTAMP_SPEC;
}
export function isColumnTimestampSpec(timestampSpec: TimestampSpec) {
@@ -2270,6 +2279,10 @@ export function updateIngestionType(
newSpec = deepSet(newSpec, 'dataSchema.granularitySpec', granularitySpec);
}
+ if (!deepGet(spec, 'dataSchema.timestampSpec')) {
+ newSpec = deepSet(newSpec, 'dataSchema.timestampSpec', getDummyTimestampSpec());
+ }
+
if (!deepGet(spec, 'dataSchema.dimensionsSpec')) {
newSpec = deepSet(newSpec, 'dataSchema.dimensionsSpec', {});
}
diff --git a/web-console/src/utils/sampler.ts b/web-console/src/utils/sampler.ts
index 0ed4c5fe5e41..9a1d63d87cca 100644
--- a/web-console/src/utils/sampler.ts
+++ b/web-console/src/utils/sampler.ts
@@ -23,7 +23,7 @@ import { alphanumericCompare, filterMap, sortWithPrefixSuffix } from './general'
import {
DimensionsSpec,
downgradeSpec,
- getEmptyTimestampSpec,
+ getDummyTimestampSpec,
getSpecType,
IngestionSpec,
InputFormat,
@@ -295,7 +295,7 @@ export async function sampleForConnect(
}),
dataSchema: {
dataSource: 'sample',
- timestampSpec: getEmptyTimestampSpec(),
+ timestampSpec: getDummyTimestampSpec(),
dimensionsSpec: {},
},
} as any,
@@ -347,7 +347,7 @@ export async function sampleForParser(
ioConfig,
dataSchema: {
dataSource: 'sample',
- timestampSpec: getEmptyTimestampSpec(),
+ timestampSpec: getDummyTimestampSpec(),
dimensionsSpec: {},
},
},
@@ -359,15 +359,10 @@ export async function sampleForParser(
export async function sampleForTimestamp(
spec: IngestionSpec,
- sampleStrategy: SampleStrategy,
cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
- const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
- makeSamplerIoConfig(deepGet(spec, 'ioConfig'), samplerType, sampleStrategy),
- );
- const timestampSpec: TimestampSpec =
- deepGet(spec, 'dataSchema.timestampSpec') || getEmptyTimestampSpec();
+ const timestampSpec: TimestampSpec = deepGet(spec, 'dataSchema.timestampSpec');
const columnTimestampSpec = isColumnTimestampSpec(timestampSpec);
// First do a query with a static timestamp spec
@@ -375,11 +370,11 @@ export async function sampleForTimestamp(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
dimensionsSpec: {},
- timestampSpec: columnTimestampSpec ? getEmptyTimestampSpec() : timestampSpec,
+ timestampSpec: columnTimestampSpec ? getDummyTimestampSpec() : timestampSpec,
},
},
samplerConfig: BASE_SAMPLER_CONFIG,
@@ -399,7 +394,7 @@ export async function sampleForTimestamp(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
dimensionsSpec: {},
@@ -431,13 +426,9 @@ export async function sampleForTimestamp(
export async function sampleForTransform(
spec: IngestionSpec,
- sampleStrategy: SampleStrategy,
cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
- const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
- makeSamplerIoConfig(deepGet(spec, 'ioConfig'), samplerType, sampleStrategy),
- );
const inputFormatColumns: string[] = deepGet(spec, 'ioConfig.inputFormat.columns') || [];
const timestampSpec: TimestampSpec = deepGet(spec, 'dataSchema.timestampSpec');
const transforms: Transform[] = deepGet(spec, 'dataSchema.transformSpec.transforms') || [];
@@ -449,7 +440,7 @@ export async function sampleForTransform(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
timestampSpec,
@@ -477,7 +468,7 @@ export async function sampleForTransform(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
timestampSpec,
@@ -495,13 +486,9 @@ export async function sampleForTransform(
export async function sampleForFilter(
spec: IngestionSpec,
- sampleStrategy: SampleStrategy,
cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
- const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
- makeSamplerIoConfig(deepGet(spec, 'ioConfig'), samplerType, sampleStrategy),
- );
const inputFormatColumns: string[] = deepGet(spec, 'ioConfig.inputFormat.columns') || [];
const timestampSpec: TimestampSpec = deepGet(spec, 'dataSchema.timestampSpec');
const transforms: Transform[] = deepGet(spec, 'dataSchema.transformSpec.transforms') || [];
@@ -514,7 +501,7 @@ export async function sampleForFilter(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
timestampSpec,
@@ -542,7 +529,7 @@ export async function sampleForFilter(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
timestampSpec,
@@ -561,14 +548,9 @@ export async function sampleForFilter(
export async function sampleForSchema(
spec: IngestionSpec,
- sampleStrategy: SampleStrategy,
cacheRows: CacheRows,
): Promise {
const samplerType = getSamplerType(spec);
- const ioConfig: IoConfig = await scopeDownIngestSegmentInputSourceIntervalIfNeeded(
- makeSamplerIoConfig(deepGet(spec, 'ioConfig'), samplerType, sampleStrategy),
- );
-
const timestampSpec: TimestampSpec = deepGet(spec, 'dataSchema.timestampSpec');
const transformSpec: TransformSpec =
deepGet(spec, 'dataSchema.transformSpec') || ({} as TransformSpec);
@@ -581,7 +563,7 @@ export async function sampleForSchema(
type: samplerType,
spec: {
type: samplerType,
- ioConfig,
+ ioConfig: deepGet(spec, 'ioConfig'),
dataSchema: {
dataSource: 'sample',
timestampSpec,
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 05b55b0452bb..e4ec32b2da79 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -63,6 +63,7 @@ import {
LocalStorageKeys,
localStorageSet,
parseJson,
+ pluralIfNeeded,
QueryState,
} from '../../utils';
import { NUMERIC_TIME_FORMATS, possibleDruidFormatForValues } from '../../utils/druid-time';
@@ -78,9 +79,9 @@ import {
fillDataSourceNameIfNeeded,
fillInputFormat,
FlattenField,
+ getConstantTimestampSpec,
getDimensionMode,
getDimensionSpecFormFields,
- getEmptyTimestampSpec,
getFilterFormFields,
getFlattenFieldFormFields,
getIngestionComboType,
@@ -167,7 +168,7 @@ import './load-data-view.scss';
function showRawLine(line: SampleEntry): string {
if (!line.parsed) return 'No parse';
const raw = line.parsed.raw;
- if (typeof raw !== 'string') return 'Bad raw';
+ if (typeof raw !== 'string') return String(raw);
if (raw.includes('\n')) {
return `[Multi-line row, length: ${raw.length}]`;
}
@@ -182,7 +183,7 @@ function showBlankLine(line: SampleEntry): string {
}
function getTimestampSpec(headerAndRows: HeaderAndRows | null): TimestampSpec {
- if (!headerAndRows) return getEmptyTimestampSpec();
+ if (!headerAndRows) return getConstantTimestampSpec();
const timestampSpecs = filterMap(headerAndRows.header, sampleHeader => {
const possibleFormat = possibleDruidFormatForValues(
@@ -199,7 +200,7 @@ function getTimestampSpec(headerAndRows: HeaderAndRows | null): TimestampSpec {
timestampSpecs.find(ts => /time/i.test(ts.column)) || // Use a suggestion that has time in the name if possible
timestampSpecs.find(ts => !NUMERIC_TIME_FORMATS.includes(ts.format)) || // Use a suggestion that is not numeric
timestampSpecs[0] || // Fall back to the first one
- getEmptyTimestampSpec() // Ok, empty it is...
+ getConstantTimestampSpec() // Ok, empty it is...
);
}
@@ -984,7 +985,6 @@ export class LoadDataView extends React.PureComponent
)}
{this.renderFlattenControls()}
- {Boolean(sugestedFlattenFields && sugestedFlattenFields.length) && (
+ {sugestedFlattenFields && sugestedFlattenFields.length ? (
+ ) : (
+ undefined
)}