From 6a4ee82a8fcbe5cc79d973abe4857e32ff686490 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 17:35:32 -0700 Subject: [PATCH 01/15] WIP integration tests --- .../druid/tests/indexer/ITTransformTest.java | 77 +++++++++++++ ...ikipedia_index_queries_with_transform.json | 54 +++++++++ .../wikipedia_index_task_with_transform.json | 106 ++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java new file mode 100644 index 000000000000..89690abd2578 --- /dev/null +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.tests.indexer; + +import org.apache.druid.testing.guice.DruidTestModuleFactory; +import org.apache.druid.tests.TestNGGroup; +import org.testng.annotations.Guice; +import org.testng.annotations.Test; + +import java.io.Closeable; + +@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) +@Guice(moduleFactory = DruidTestModuleFactory.class) +public class ITTransformTest extends AbstractITBatchIndexTest +{ + private static final String INDEX_TASK = "/indexer/wikipedia_index_task_with_transform.json"; + private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json"; + private static final String INDEX_DATASOURCE = "wikipedia_index_test"; + + private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task.json"; + private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task.json"; + private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json"; + private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test"; + + @Test + public void testIndexAndReIndexWithTransformSpec() throws Exception + { + final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; + //final String reindexDatasourceWithDruidInputSource = REINDEX_DATASOURCE + "-testIndexData-druidInputSource"; + + try ( + //final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); + //final Closeable ignored3 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) + ) { + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + /* + doReindexTest( + INDEX_DATASOURCE, + reindexDatasource, + REINDEX_TASK, + REINDEX_QUERIES_RESOURCE + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasourceWithDruidInputSource, + REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, + REINDEX_QUERIES_RESOURCE + ); + */ + } + } +} diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json new file mode 100644 index 000000000000..6654f0d84f0d --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json @@ -0,0 +1,54 @@ +[ + { + "description":"having spec on post aggregation", + "query":{ + "queryType":"groupBy", + "dataSource":"%%DATASOURCE%%", + "granularity":"day", + "dimensions":[ + "page" + ], + "filter":{ + "type":"selector", + "dimension":"language", + "value":"language-zh" + }, + "aggregations":[ + { + "type":"count", + "name":"rows" + }, + { + "type":"longSum", + "fieldName":"triple-added", + "name":"added_count" + } + ], + "postAggregations": [ + { + "type":"arithmetic", + "name":"added_count_times_ten", + "fn":"*", + "fields":[ + {"type":"fieldAccess", "name":"added_count", "fieldName":"added_count"}, + {"type":"constant", "name":"const", "value":10} + ] + } + ], + "having":{"type":"greaterThan", "aggregation":"added_count_times_ten", "value":9000}, + "intervals":[ + "2013-08-31T00:00/2013-09-01T00:00" + ] + }, + "expectedResults":[ { + "version" : "v1", + "timestamp" : "2013-08-31T00:00:00.000Z", + "event" : { + "added_count_times_ten" : 9050.0, + "page" : "Crimson Typhoon", + "added_count" : 2715, + "rows" : 1 + } + } ] + } +] \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json new file mode 100644 index 000000000000..89cb82c86a4a --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -0,0 +1,106 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "%%DATASOURCE%%", + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + }, + "parser": { + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + } + } + }, + "transformSpec": { + "transforms": [ + { + "type": "expression", + "name": "language", + "expression": "concat('language-', language)" + }, + { + "type": "expression", + "name": "triple-added", + "expression": "added * 3" + } + ], + "filter": { + "type":"or", + "fields": [ + { "type": "selector", "dimension": "language", "value": "language-zh" }, + { "type": "selector", "dimension": "page", "value": "Crimson Typhoon" } + ] + } + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "local", + "baseDir": "/resources/data/batch_index", + "filter": "wikipedia_index_data*" + } + }, + "tuningConfig": { + "type": "index", + "maxRowsPerSegment": 3 + } + } +} \ No newline at end of file From 045584b0f6fbee9ca5c07117bf6f21d6e2815c8b Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 18:17:57 -0700 Subject: [PATCH 02/15] Add integration test for ingestion with transformSpec --- .../druid/tests/indexer/ITTransformTest.java | 26 +------------------ .../wikipedia_index_task_with_transform.json | 14 +++++----- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 89690abd2578..89d3fee4e926 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -34,21 +34,11 @@ public class ITTransformTest extends AbstractITBatchIndexTest private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; - private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task.json"; - private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task.json"; - private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries.json"; - private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test"; - @Test public void testIndexAndReIndexWithTransformSpec() throws Exception { - final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; - //final String reindexDatasourceWithDruidInputSource = REINDEX_DATASOURCE + "-testIndexData-druidInputSource"; - try ( - //final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()); - //final Closeable ignored3 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()) ) { doIndexTest( INDEX_DATASOURCE, @@ -58,20 +48,6 @@ public void testIndexAndReIndexWithTransformSpec() throws Exception true, true ); - /* - doReindexTest( - INDEX_DATASOURCE, - reindexDatasource, - REINDEX_TASK, - REINDEX_QUERIES_RESOURCE - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasourceWithDruidInputSource, - REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE - ); - */ } } } diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json index 89cb82c86a4a..b96510fa692b 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -13,6 +13,11 @@ "name": "added", "fieldName": "added" }, + { + "type": "doubleSum", + "name": "triple-added", + "fieldName": "triple-added" + }, { "type": "doubleSum", "name": "deleted", @@ -80,14 +85,7 @@ "name": "triple-added", "expression": "added * 3" } - ], - "filter": { - "type":"or", - "fields": [ - { "type": "selector", "dimension": "language", "value": "language-zh" }, - { "type": "selector", "dimension": "page", "value": "Crimson Typhoon" } - ] - } + ] } }, "ioConfig": { From 46ae13bb23811b8337025554ef4e0be2bff6e604 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 20:00:26 -0700 Subject: [PATCH 03/15] WIP almost working tests --- .../quickstart/tutorial/transform-index.json | 2 +- .../druid/tests/indexer/ITTransformTest.java | 74 ++++++++++++- ...ikipedia_index_queries_with_transform.json | 2 +- ...index_task_with_inputsource_transform.json | 103 ++++++++++++++++++ ...uid_input_source_task_with_transforms.json | 96 ++++++++++++++++ ...pedia_reindex_queries_with_transforms.json | 72 ++++++++++++ ...ikipedia_reindex_task_with_transforms.json | 103 ++++++++++++++++++ 7 files changed, 445 insertions(+), 7 deletions(-) create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json create mode 100644 integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json diff --git a/examples/quickstart/tutorial/transform-index.json b/examples/quickstart/tutorial/transform-index.json index bf605fcfdbf0..caebb9ff9c5d 100644 --- a/examples/quickstart/tutorial/transform-index.json +++ b/examples/quickstart/tutorial/transform-index.json @@ -55,7 +55,7 @@ "baseDir" : "quickstart/tutorial", "filter" : "transform-data.json" }, - "inpuFormat" : { + "inputFormat" : { "type" : "json" }, "appendToExisting" : false diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 89d3fee4e926..9b7cb98f268f 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -24,25 +24,88 @@ import org.testng.annotations.Guice; import org.testng.annotations.Test; -import java.io.Closeable; +import java.io.IOException; @Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) @Guice(moduleFactory = DruidTestModuleFactory.class) public class ITTransformTest extends AbstractITBatchIndexTest { - private static final String INDEX_TASK = "/indexer/wikipedia_index_task_with_transform.json"; + private static final String INDEX_TASK_WITH_FIREHOSE = "/indexer/wikipedia_index_task_with_transform.json"; + private static final String INDEX_TASK_WITH_INPUT_SOURCE = "/indexer/wikipedia_index_task_with_inputsource_transform.json"; private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json"; private static final String INDEX_DATASOURCE = "wikipedia_index_test"; + private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task_with_transforms.json"; + private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json"; + private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries_with_transforms.json"; + private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test"; + + @Test + public void testIndexAndReIndexWithTransformSpec() throws IOException + { + final String reindexDatasourceWithDruidInputSource = REINDEX_DATASOURCE + "-druidInputSource"; + + //try ( + // final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + // final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) + //) { + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK_WITH_INPUT_SOURCE, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasourceWithDruidInputSource, + REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, + REINDEX_QUERIES_RESOURCE + ); + //} + } + + /* + // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed + // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! + @Test + @Ignore + public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException + { + final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix()) + ) { + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK_WITH_INPUT_SOURCE, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasource, + REINDEX_TASK, + REINDEX_QUERIES_RESOURCE + ); + } + } + + // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed @Test - public void testIndexAndReIndexWithTransformSpec() throws Exception + @Ignore + public void testIndexWithFirehoseAndTransforms() throws IOException { try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()) + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); ) { doIndexTest( INDEX_DATASOURCE, - INDEX_TASK, + INDEX_TASK_WITH_FIREHOSE, INDEX_QUERIES_RESOURCE, false, true, @@ -50,4 +113,5 @@ public void testIndexAndReIndexWithTransformSpec() throws Exception ); } } + */ } diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json index 6654f0d84f0d..482ec7a342bd 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json @@ -44,7 +44,7 @@ "version" : "v1", "timestamp" : "2013-08-31T00:00:00.000Z", "event" : { - "added_count_times_ten" : 9050.0, + "added_count_times_ten" : 27150.0, "page" : "Crimson Typhoon", "added_count" : 2715, "rows" : 1 diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json new file mode 100644 index 000000000000..bef8f0154425 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json @@ -0,0 +1,103 @@ +{ + "type" : "index", + "spec" : { + "dataSchema" : { + "dataSource" : "%%DATASOURCE%%", + "timestampSpec": { + "column": "timestamp" + }, + "metricsSpec": [ + { + "type": "count", + "name": "count" + }, + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "triple-added", + "fieldName": "triple-added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + }, + { + "name": "thetaSketch", + "type": "thetaSketch", + "fieldName": "user" + }, + { + "name": "quantilesDoublesSketch", + "type": "quantilesDoublesSketch", + "fieldName": "delta" + }, + { + "name": "HLLSketchBuild", + "type": "HLLSketchBuild", + "fieldName": "user" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-02" ] + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "robot", + "anonymous", + "namespace", + "continent", + "country", + "region", + "city" + ] + }, + "transformSpec": { + "transforms": [ + { + "type": "expression", + "name": "language", + "expression": "concat('language-', language)" + }, + { + "type": "expression", + "name": "triple-added", + "expression": "added * 3" + } + ] + } + }, + "ioConfig" : { + "type" : "index", + "inputSource" : { + "type" : "local", + "baseDir" : "/resources/data/batch_index", + "filter" : "wikipedia_index_data*" + }, + "inputFormat" : { + "type" : "json" + }, + "appendToExisting" : false + }, + "tuningConfig" : { + "type" : "index", + "maxRowsPerSegment" : 5000000, + "maxRowsInMemory" : 25000 + } + } +} diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json new file mode 100644 index 000000000000..f3165fd9bef7 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json @@ -0,0 +1,96 @@ +{ + "type": "index", + "spec": { + "ioConfig": { + "type": "index", + "inputSource": { + "type": "druid", + "dataSource": "%%DATASOURCE%%", + "interval": "2013-08-31/2013-09-01" + } + }, + "tuningConfig": { + "type": "index", + "partitionsSpec": { + "type": "dynamic" + } + }, + "dataSchema": { + "dataSource": "%%REINDEX_DATASOURCE%%", + "granularitySpec": { + "type": "uniform", + "queryGranularity": "SECOND", + "segmentGranularity": "DAY" + }, + "timestampSpec": { + "column": "__time", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "anonymous", + "namespace", + "country", + "region", + "city" + ] + }, + "transformSpec": { + "transforms": [ + { + "type": "expression", + "name": "newPage", + "expression": "page" + }, + { + "type": "expression", + "name": "one-plus-triple-added", + "expression": "\"triple-added\" + 1" + }, + { + "type": "expression", + "name": "double-deleted", + "expression": "deleted * 2" + } + ] + }, + "metricsSpec": [ + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "triple-added", + "fieldName": "triple-added" + }, + { + "type": "doubleSum", + "name": "one-plus-triple-added", + "fieldName": "one-plus-triple-added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "double-deleted", + "fieldName": "double-deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + } + ] + } + } +} \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json new file mode 100644 index 000000000000..87d994fd185d --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json @@ -0,0 +1,72 @@ +[ + { + "description": "timeseries, 1 agg, all", + "query":{ + "queryType" : "timeBoundary", + "dataSource": "%%DATASOURCE%%" + }, + "expectedResults":[ + { + "timestamp" : "2013-08-31T01:02:33.000Z", + "result" : { + "minTime" : "2013-08-31T01:02:33.000Z", + "maxTime" : "2013-08-31T12:41:27.000Z" + } + } + ] + }, + + { + "description":"having spec on post aggregation", + "query":{ + "queryType":"groupBy", + "dataSource":"%%DATASOURCE%%", + "granularity":"day", + "dimensions":[ + "newPage" + ], + "filter":{ + "type":"selector", + "dimension":"language", + "value":"language-zh" + }, + "aggregations":[ + { + "type":"longSum", + "fieldName":"one-plus-triple-added", + "name":"added_count" + }, + { + "type":"longSum", + "fieldName":"double-deleted", + "name":"double_deleted_count" + } + ], + "postAggregations": [ + { + "type":"arithmetic", + "name":"added_count_times_ten", + "fn":"*", + "fields":[ + {"type":"fieldAccess", "name":"added_count", "fieldName":"added_count"}, + {"type":"constant", "name":"const", "value":10} + ] + } + ], + "having":{"type":"greaterThan", "aggregation":"added_count_times_ten", "value":9000}, + "intervals":[ + "2013-08-31T00:00/2013-09-01T00:00" + ] + }, + "expectedResults":[ { + "version" : "v1", + "timestamp" : "2013-08-31T00:00:00.000Z", + "event" : { + "added_count_times_ten" : 27160.0, + "newPage" : "Crimson Typhoon", + "double_deleted_count" : 10, + "added_count" : 2716 + } + } ] + } +] diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json new file mode 100644 index 000000000000..6e9d86273f69 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json @@ -0,0 +1,103 @@ +{ + "type": "index", + "spec": { + "dataSchema": { + "dataSource": "%%REINDEX_DATASOURCE%%", + "metricsSpec": [ + { + "type": "doubleSum", + "name": "added", + "fieldName": "added" + }, + { + "type": "doubleSum", + "name": "triple-added", + "fieldName": "triple-added" + }, + { + "type": "doubleSum", + "name": "one-plus-triple-added", + "fieldName": "one-plus-triple-added" + }, + { + "type": "doubleSum", + "name": "deleted", + "fieldName": "deleted" + }, + { + "type": "doubleSum", + "name": "double-deleted", + "fieldName": "double-deleted" + }, + { + "type": "doubleSum", + "name": "delta", + "fieldName": "delta" + } + ], + "granularitySpec": { + "segmentGranularity": "DAY", + "queryGranularity": "second", + "intervals" : [ "2013-08-31/2013-09-01" ] + }, + "parser": { + "parseSpec": { + "format" : "json", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + "page", + {"type": "string", "name": "language", "createBitmapIndex": false}, + "user", + "unpatrolled", + "newPage", + "anonymous", + "namespace", + "country", + "region", + "city" + ] + }, + "transformSpec": { + "transforms": [ + { + "type": "expression", + "name": "newPage", + "expression": "page" + }, + { + "type": "expression", + "name": "country", + "expression": "concat(\"country-\", country)" + }, + { + "type": "expression", + "name": "one-plus-triple-added", + "expression": "\"triple-added\" + 1" + }, + { + "type": "expression", + "name": "double-deleted", + "expression": "deleted * 2" + } + ] + } + } + } + }, + "ioConfig": { + "type": "index", + "firehose": { + "type": "ingestSegment", + "dataSource": "%%DATASOURCE%%", + "interval": "2013-08-31/2013-09-01" + } + }, + "tuningConfig": { + "type": "index" + } + } +} From 608e56846d5c419d6e0f38f92ab73b773239c644 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 20:03:53 -0700 Subject: [PATCH 04/15] Add ignored tests --- .../druid/tests/indexer/ITTransformTest.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 9b7cb98f268f..120b68ff2fb1 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -21,9 +21,11 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; +import org.junit.Ignore; import org.testng.annotations.Guice; import org.testng.annotations.Test; +import java.io.Closeable; import java.io.IOException; @Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE}) @@ -45,10 +47,10 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException { final String reindexDatasourceWithDruidInputSource = REINDEX_DATASOURCE + "-druidInputSource"; - //try ( - // final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); - // final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) - //) { + try ( + final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) + ) { doIndexTest( INDEX_DATASOURCE, INDEX_TASK_WITH_INPUT_SOURCE, @@ -63,10 +65,9 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, REINDEX_QUERIES_RESOURCE ); - //} + } } - /* // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! @Test @@ -113,5 +114,4 @@ public void testIndexWithFirehoseAndTransforms() throws IOException ); } } - */ } From 0f7dd96d66daabb322afd8936a7fca101bd54952 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 20:55:48 -0700 Subject: [PATCH 05/15] checkstyle stuff --- .../druid/tests/indexer/ITTransformTest.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 120b68ff2fb1..4b408990f11d 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -51,20 +51,20 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) ) { - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK_WITH_INPUT_SOURCE, - INDEX_QUERIES_RESOURCE, - false, - true, - true - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasourceWithDruidInputSource, - REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE - ); + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK_WITH_INPUT_SOURCE, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasourceWithDruidInputSource, + REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, + REINDEX_QUERIES_RESOURCE + ); } } From f6f10aaabcce7d2675b92b5ba92b2fe0cc290648 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 20:58:47 -0700 Subject: [PATCH 06/15] remove newPage from index task ingestion spec --- .../resources/indexer/wikipedia_index_task_with_transform.json | 1 - 1 file changed, 1 deletion(-) diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json index b96510fa692b..5f289dc5395a 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json @@ -61,7 +61,6 @@ {"type": "string", "name": "language", "createBitmapIndex": false}, "user", "unpatrolled", - "newPage", "robot", "anonymous", "namespace", From 013150d4e0f49798c48c8e19e6cad7909ed3f2c6 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 21:07:53 -0700 Subject: [PATCH 07/15] more test cleanup --- .../druid/tests/indexer/ITTransformTest.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 4b408990f11d..c974969105ce 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -21,6 +21,7 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; +import org.junit.Assert; import org.junit.Ignore; import org.testng.annotations.Guice; import org.testng.annotations.Test; @@ -68,12 +69,13 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException } } - // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed - // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! @Test @Ignore public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException { + // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed + // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! + Assert.fail(); final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; try ( final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); @@ -96,16 +98,18 @@ public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOExcep } } - // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed @Test @Ignore public void testIndexWithFirehoseAndTransforms() throws IOException { + // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed + Assert.fail(); + final String indexDatasource = INDEX_DATASOURCE + "-firehose"; try ( - final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); + final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); ) { doIndexTest( - INDEX_DATASOURCE, + indexDatasource, INDEX_TASK_WITH_FIREHOSE, INDEX_QUERIES_RESOURCE, false, From db4dc280e35d35c4dc4035c61bc4bfdbeaa59283 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 21:15:06 -0700 Subject: [PATCH 08/15] still not quite working --- .../indexer/wikipedia_reindex_queries_with_transforms.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json index 87d994fd185d..8920e9d33173 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json @@ -23,7 +23,7 @@ "dataSource":"%%DATASOURCE%%", "granularity":"day", "dimensions":[ - "newPage" + "page" ], "filter":{ "type":"selector", @@ -63,7 +63,7 @@ "timestamp" : "2013-08-31T00:00:00.000Z", "event" : { "added_count_times_ten" : 27160.0, - "newPage" : "Crimson Typhoon", + "page" : "Crimson Typhoon", "double_deleted_count" : 10, "added_count" : 2716 } From 8d2c471ac91093f46ae292c42fdddc4a22a20f68 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 21:30:40 -0700 Subject: [PATCH 09/15] Actually disable the tests --- .../org/apache/druid/tests/indexer/ITTransformTest.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index c974969105ce..7e01cff3efb8 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -22,7 +22,6 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; import org.junit.Assert; -import org.junit.Ignore; import org.testng.annotations.Guice; import org.testng.annotations.Test; @@ -69,8 +68,7 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException } } - @Test - @Ignore + @Test(enabled = false) public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException { // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed @@ -98,8 +96,7 @@ public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOExcep } } - @Test - @Ignore + @Test(enabled = false) public void testIndexWithFirehoseAndTransforms() throws IOException { // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed From 9da1c534a5046e1d1af9189405d9b787a300f81d Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 23:29:13 -0700 Subject: [PATCH 10/15] working tests --- .../druid/tests/indexer/ITTransformTest.java | 28 +++++++++---------- ...uid_input_source_task_with_transforms.json | 10 +++---- ...pedia_reindex_queries_with_transforms.json | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 7e01cff3efb8..461e23bcd97e 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -51,20 +51,20 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) ) { - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK_WITH_INPUT_SOURCE, - INDEX_QUERIES_RESOURCE, - false, - true, - true - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasourceWithDruidInputSource, - REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE - ); + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK_WITH_INPUT_SOURCE, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasourceWithDruidInputSource, + REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, + REINDEX_QUERIES_RESOURCE + ); } } diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json index f3165fd9bef7..a3d902eae0a8 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json @@ -28,24 +28,24 @@ }, "dimensionsSpec": { "dimensions": [ - "page", {"type": "string", "name": "language", "createBitmapIndex": false}, "user", "unpatrolled", - "newPage", + "page", "anonymous", "namespace", "country", "region", "city" - ] + ], + "dimensionExclusions" : ["robot", "continent"] }, "transformSpec": { "transforms": [ { "type": "expression", - "name": "newPage", - "expression": "page" + "name": "page", + "expression": "concat('tf_', page)" }, { "type": "expression", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json index 8920e9d33173..dee5cd7a9a26 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json @@ -63,7 +63,7 @@ "timestamp" : "2013-08-31T00:00:00.000Z", "event" : { "added_count_times_ten" : 27160.0, - "page" : "Crimson Typhoon", + "page" : "tf_Crimson Typhoon", "double_deleted_count" : 10, "added_count" : 2716 } From 7afad45495820ca7e297a25316fc3de57d3a6f06 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Mon, 30 Mar 2020 23:30:34 -0700 Subject: [PATCH 11/15] fix codestyle --- .../druid/tests/indexer/ITTransformTest.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 461e23bcd97e..7e01cff3efb8 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -51,20 +51,20 @@ public void testIndexAndReIndexWithTransformSpec() throws IOException final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix()) ) { - doIndexTest( - INDEX_DATASOURCE, - INDEX_TASK_WITH_INPUT_SOURCE, - INDEX_QUERIES_RESOURCE, - false, - true, - true - ); - doReindexTest( - INDEX_DATASOURCE, - reindexDatasourceWithDruidInputSource, - REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, - REINDEX_QUERIES_RESOURCE - ); + doIndexTest( + INDEX_DATASOURCE, + INDEX_TASK_WITH_INPUT_SOURCE, + INDEX_QUERIES_RESOURCE, + false, + true, + true + ); + doReindexTest( + INDEX_DATASOURCE, + reindexDatasourceWithDruidInputSource, + REINDEX_TASK_WITH_DRUID_INPUT_SOURCE, + REINDEX_QUERIES_RESOURCE + ); } } From 65216e3a583e2e1a2f88aebd66ebb9be5fe40535 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Tue, 31 Mar 2020 00:12:06 -0700 Subject: [PATCH 12/15] dont use junit in integration tests --- .../java/org/apache/druid/tests/indexer/ITTransformTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java index 7e01cff3efb8..4a5116402873 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java @@ -21,7 +21,6 @@ import org.apache.druid.testing.guice.DruidTestModuleFactory; import org.apache.druid.tests.TestNGGroup; -import org.junit.Assert; import org.testng.annotations.Guice; import org.testng.annotations.Test; @@ -73,7 +72,6 @@ public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOExcep { // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests! - Assert.fail(); final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData"; try ( final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix()); @@ -100,7 +98,6 @@ public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOExcep public void testIndexWithFirehoseAndTransforms() throws IOException { // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed - Assert.fail(); final String indexDatasource = INDEX_DATASOURCE + "-firehose"; try ( final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix()); From fc8feddc1616c9ecc3b251af918a02dcb1924882 Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Tue, 31 Mar 2020 14:18:12 -0700 Subject: [PATCH 13/15] actually fix the bug --- .../common/ReingestionTimelineUtils.java | 32 +++++++++++++++++++ .../IngestSegmentFirehoseFactory.java | 17 +++------- .../indexing/input/DruidInputSource.java | 16 +++------- ...uid_input_source_task_with_transforms.json | 18 ++++++++--- ...pedia_reindex_queries_with_transforms.json | 12 +++++-- ...ikipedia_reindex_task_with_transforms.json | 11 +++++-- 6 files changed, 74 insertions(+), 32 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java index 1f4820f39427..bd9d21457eb6 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java @@ -22,11 +22,13 @@ import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; import com.google.common.collect.Lists; +import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.partition.PartitionChunk; import javax.annotation.Nullable; +import javax.validation.constraints.NotNull; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -101,4 +103,34 @@ public static List getUniqueMetrics(List getDimensionsToReingest( + @Nullable List explicitDimensions, + @NotNull DimensionsSpec dimensionsSpec, + @NotNull List> timeLineSegments) + { + final List dims; + if (explicitDimensions != null) { + dims = explicitDimensions; + } else if (dimensionsSpec.hasCustomDimensions()) { + dims = dimensionsSpec.getDimensionNames(); + } else { + dims = ReingestionTimelineUtils.getUniqueDimensions( + timeLineSegments, + dimensionsSpec.getDimensionExclusions() + ); + } + return dims; + } } diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java index 41c1c8f258ef..6248828d32b4 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java @@ -214,18 +214,11 @@ public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) } } - final List dims; - if (dimensions != null) { - dims = dimensions; - } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) { - dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames(); - } else { - dims = ReingestionTimelineUtils.getUniqueDimensions( - timeLineSegments, - inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions() - ); - } - + final List dims = ReingestionTimelineUtils.getDimensionsToReingest( + dimensions, + inputRowParser.getParseSpec().getDimensionsSpec(), + timeLineSegments + ); final List metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java index 8fc214263b1b..f5f086a425bc 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java @@ -181,17 +181,11 @@ protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nu .from(partitionHolder) .transform(chunk -> new DruidSegmentInputEntity(segmentLoader, chunk.getObject(), holder.getInterval())); }).iterator(); - final List effectiveDimensions; - if (dimensions == null) { - effectiveDimensions = ReingestionTimelineUtils.getUniqueDimensions( - timeline, - inputRowSchema.getDimensionsSpec().getDimensionExclusions() - ); - } else if (inputRowSchema.getDimensionsSpec().hasCustomDimensions()) { - effectiveDimensions = inputRowSchema.getDimensionsSpec().getDimensionNames(); - } else { - effectiveDimensions = dimensions; - } + final List effectiveDimensions = ReingestionTimelineUtils.getDimensionsToReingest( + dimensions, + inputRowSchema.getDimensionsSpec(), + timeline + ); List effectiveMetrics; if (metrics == null) { diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json index a3d902eae0a8..3e8a44c5c592 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json @@ -32,26 +32,36 @@ "user", "unpatrolled", "page", + "newPage", "anonymous", "namespace", "country", "region", "city" - ], - "dimensionExclusions" : ["robot", "continent"] + ] }, "transformSpec": { "transforms": [ { "type": "expression", - "name": "page", - "expression": "concat('tf_', page)" + "name": "newPage", + "expression": "page" + }, + { + "type": "expression", + "name": "city", + "expression": "concat('city-', city)" }, { "type": "expression", "name": "one-plus-triple-added", "expression": "\"triple-added\" + 1" }, + { + "type": "expression", + "name": "delta", + "expression": "\"delta\" / 2" + }, { "type": "expression", "name": "double-deleted", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json index dee5cd7a9a26..40503121b926 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json @@ -23,7 +23,8 @@ "dataSource":"%%DATASOURCE%%", "granularity":"day", "dimensions":[ - "page" + "newPage", + "city" ], "filter":{ "type":"selector", @@ -40,6 +41,11 @@ "type":"longSum", "fieldName":"double-deleted", "name":"double_deleted_count" + }, + { + "type":"longSum", + "fieldName":"delta", + "name":"delta_overshadowed" } ], "postAggregations": [ @@ -63,8 +69,10 @@ "timestamp" : "2013-08-31T00:00:00.000Z", "event" : { "added_count_times_ten" : 27160.0, - "page" : "tf_Crimson Typhoon", + "newPage" : "Crimson Typhoon", + "city" : "city-Taiyuan", "double_deleted_count" : 10, + "delta_overshadowed" : 450, "added_count" : 2716 } } ] diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json index 6e9d86273f69..029b136d441b 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json @@ -49,10 +49,10 @@ }, "dimensionsSpec": { "dimensions": [ - "page", {"type": "string", "name": "language", "createBitmapIndex": false}, "user", "unpatrolled", + "page", "newPage", "anonymous", "namespace", @@ -70,14 +70,19 @@ }, { "type": "expression", - "name": "country", - "expression": "concat(\"country-\", country)" + "name": "city", + "expression": "concat('city-', city)" }, { "type": "expression", "name": "one-plus-triple-added", "expression": "\"triple-added\" + 1" }, + { + "type": "expression", + "name": "delta", + "expression": "\"delta\" / 2" + }, { "type": "expression", "name": "double-deleted", From 53433a2623d7669e6ca80bf91bbd4bb1572c391d Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Tue, 31 Mar 2020 14:22:44 -0700 Subject: [PATCH 14/15] fix checkstyle --- .../org/apache/druid/indexing/input/DruidInputSource.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java index f5f086a425bc..b9cc5759cd3b 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java @@ -182,10 +182,10 @@ protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nu .transform(chunk -> new DruidSegmentInputEntity(segmentLoader, chunk.getObject(), holder.getInterval())); }).iterator(); final List effectiveDimensions = ReingestionTimelineUtils.getDimensionsToReingest( - dimensions, - inputRowSchema.getDimensionsSpec(), - timeline - ); + dimensions, + inputRowSchema.getDimensionsSpec(), + timeline + ); List effectiveMetrics; if (metrics == null) { From d5ee5ef38e6f706a2e55f12420d0d0409e048bec Mon Sep 17 00:00:00 2001 From: Suneet Saldanha Date: Tue, 31 Mar 2020 14:32:25 -0700 Subject: [PATCH 15/15] bring index tests closer to reindex tests --- .../wikipedia_index_queries_with_transform.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json index 482ec7a342bd..f0cfba677354 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json @@ -6,7 +6,8 @@ "dataSource":"%%DATASOURCE%%", "granularity":"day", "dimensions":[ - "page" + "page", + "city" ], "filter":{ "type":"selector", @@ -22,6 +23,11 @@ "type":"longSum", "fieldName":"triple-added", "name":"added_count" + }, + { + "type":"longSum", + "fieldName":"delta", + "name":"delta_sum" } ], "postAggregations": [ @@ -46,7 +52,9 @@ "event" : { "added_count_times_ten" : 27150.0, "page" : "Crimson Typhoon", + "city" : "Taiyuan", "added_count" : 2715, + "delta_sum" : 900, "rows" : 1 } } ]