From 6c8b10576f474eeb1d523ad2cb945cd7046f272a Mon Sep 17 00:00:00 2001 From: frank chen Date: Mon, 7 Jun 2021 14:08:58 +0800 Subject: [PATCH 01/13] Add jsonPath functions support --- .../common/parsers/JSONFlattenerMaker.java | 12 ++++++- .../common/parsers/JSONPathParserTest.java | 33 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index e799dc5b803f..6df37ae1d117 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -115,7 +115,17 @@ public JsonProvider getJsonProvider() } @Nullable - private Object valueConversionFunction(JsonNode val) + private Object valueConversionFunction(Object val) + { + if (val instanceof JsonNode) { + return convertJsonNode((JsonNode) val); + } else { + return val; + } + } + + @Nullable + private Object convertJsonNode(JsonNode val) { if (val == null || val.isNull()) { return null; diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java index 5d2d7de3f7fa..01b1017fc8c3 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java @@ -27,6 +27,7 @@ import org.junit.rules.ExpectedException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -239,4 +240,36 @@ public void testParseFail() final Parser jsonParser = new JSONPathParser(new JSONPathSpec(true, fields), null, false); jsonParser.parseToMap(NOT_JSON); } + + @Test + public void testJSONPathFunctions() + { + List fields = Arrays.asList( + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-length", "$.met.a.length()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-min", "$.met.a.min()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-max", "$.met.a.max()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-avg", "$.met.a.avg()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-sum", "$.met.a.sum()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-stddev", "$.met.a.stddev()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-append", "$.met.a.append(10)"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "concat", "$.concat($.foo.bar1, $.foo.bar2)") + ); + + final Parser jsonParser = new JSONPathParser(new JSONPathSpec(true, fields), null, false); + final Map jsonMap = jsonParser.parseToMap(NESTED_JSON); + + // values of met.a array are: 7,8,9 + Assert.assertEquals(3, jsonMap.get("met-array-length")); + Assert.assertEquals(7.0, jsonMap.get("met-array-min")); + Assert.assertEquals(9.0, jsonMap.get("met-array-max")); + Assert.assertEquals(8.0, jsonMap.get("met-array-avg")); + Assert.assertEquals(24.0, jsonMap.get("met-array-sum")); + + //deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165 + Assert.assertEquals(0.8165, (double)jsonMap.get("met-array-stddev"), 0.00001); + + Assert.assertEquals(ImmutableList.of(7L, 8L, 9L, 10L), jsonMap.get("met-array-append")); + Assert.assertEquals("aaabbb", jsonMap.get("concat")); + } + } From caf8ae94593963fc2a9422b4c55dd1f67df7bade Mon Sep 17 00:00:00 2001 From: frank chen Date: Tue, 8 Jun 2021 15:07:17 +0800 Subject: [PATCH 02/13] Add jsonPath function test for Avro --- .../common/parsers/JSONPathParserTest.java | 2 +- .../input/avro/AvroFlattenerMakerTest.java | 40 ++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java index 01b1017fc8c3..0afbcb0d812f 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java @@ -266,7 +266,7 @@ public void testJSONPathFunctions() Assert.assertEquals(24.0, jsonMap.get("met-array-sum")); //deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165 - Assert.assertEquals(0.8165, (double)jsonMap.get("met-array-stddev"), 0.00001); + Assert.assertEquals(0.8165, (double) jsonMap.get("met-array-stddev"), 0.00001); Assert.assertEquals(ImmutableList.of(7L, 8L, 9L, 10L), jsonMap.get("met-array-append")); Assert.assertEquals("aaabbb", jsonMap.get("concat")); diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java index 076167269a74..e27783573ab9 100644 --- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java +++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java @@ -23,6 +23,7 @@ import org.apache.druid.data.input.SomeAvroDatum; import org.junit.Assert; import org.junit.Test; + import java.util.Collections; import java.util.List; @@ -146,6 +147,37 @@ public void makeJsonPathExtractor() record.getSomeIntArray(), flattener.makeJsonPathExtractor("$.someIntArray").apply(record) ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).min().getAsInt(), + + //return type of min is double + flattener.makeJsonPathExtractor("$.someIntArray.min()").apply(record) + ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).max().getAsInt(), + + //return type of max is double + flattener.makeJsonPathExtractor("$.someIntArray.max()").apply(record) + ); + Assert.assertEquals( + record.getSomeIntArray().stream().mapToInt(Integer::intValue).average().getAsDouble(), + flattener.makeJsonPathExtractor("$.someIntArray.avg()").apply(record) + ); + Assert.assertEquals( + record.getSomeIntArray().size(), + flattener.makeJsonPathExtractor("$.someIntArray.length()").apply(record) + ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).sum(), + + //return type of sum is double + flattener.makeJsonPathExtractor("$.someIntArray.sum()").apply(record) + ); + Assert.assertEquals( + 2.681, + (double) flattener.makeJsonPathExtractor("$.someIntArray.stddev()").apply(record), + 0.0001 + ); Assert.assertEquals( record.getSomeStringArray(), flattener.makeJsonPathExtractor("$.someStringArray").apply(record) @@ -212,10 +244,14 @@ public void makeJsonPathExtractor() flattener.makeJsonPathExtractor("$.someRecordArray[?(@.nestedString)]").apply(record) ); - List nestedStringArray = Collections.singletonList(record.getSomeRecordArray().get(0).getNestedString().toString()); + List nestedStringArray = Collections.singletonList(record.getSomeRecordArray() + .get(0) + .getNestedString() + .toString()); Assert.assertEquals( nestedStringArray, - flattener.makeJsonPathExtractor("$.someRecordArray[?(@.nestedString=='string in record')].nestedString").apply(record) + flattener.makeJsonPathExtractor("$.someRecordArray[?(@.nestedString=='string in record')].nestedString") + .apply(record) ); } From d4b1ea46681a5add551986b0c49a8d840b387111 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 18 Jun 2021 11:16:11 +0800 Subject: [PATCH 03/13] Add jsonPath function length() to Orc --- .../apache/druid/data/input/orc/OrcStructJsonProvider.java | 2 +- .../java/org/apache/druid/data/input/orc/OrcReaderTest.java | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java index de84bfbd3ba2..780de1930ca3 100644 --- a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java +++ b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java @@ -185,6 +185,6 @@ public String toJson(final Object o) @Override public Object unwrap(final Object o) { - throw new UnsupportedOperationException("Unused"); + return o; } } diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java index 9726c0e14671..093ae7ab87e6 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java @@ -112,6 +112,7 @@ public void testOrcFile11Format() throws IOException ImmutableList.of( new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1") ) @@ -145,6 +146,9 @@ public void testOrcFile11Format() throws IOException Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int"))); Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist")); Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string"))); + + Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_intlistLength"))); + Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength"))); Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp()); while (iterator.hasNext()) { From 5fa7412c7a4950eb1ef58c71524c82ab3737ee51 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 18 Jun 2021 11:31:12 +0800 Subject: [PATCH 04/13] Add jsonPath function length() to Parquet --- .../example/flattening/flat_1_flatten.json | 5 +++++ .../data/input/parquet/simple/ParquetGroupJsonProvider.java | 2 +- .../data/input/parquet/FlattenSpecParquetInputTest.java | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json b/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json index 75caf256f2ab..760dea7757bf 100644 --- a/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json +++ b/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json @@ -46,6 +46,11 @@ "type": "path", "name": "list", "expr": "$.listDim" + }, + { + "type": "path", + "name": "listLength", + "expr": "$.listDim.length()" } ] }, diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java index 3ba15e41a98e..3190c7e28ad4 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java @@ -191,7 +191,7 @@ public String toJson(final Object o) @Override public Object unwrap(final Object o) { - throw new UnsupportedOperationException("Unused"); + return o; } } diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java index 6532ab222df8..38a916c93e08 100644 --- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java +++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java @@ -116,6 +116,7 @@ public void testFlat1Flatten() throws IOException, InterruptedException Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0)); Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0)); Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1)); + Assert.assertEquals("2", rows.get(0).getDimension("listLength").get(0)); Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue()); } From fc5f028c973f3983351c9914b676916983a371cf Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 18 Jun 2021 14:51:00 +0800 Subject: [PATCH 05/13] Add more tests to ORC format --- .../example/test_json_path_functions.orc | Bin 0 -> 473 bytes .../druid/data/input/orc/OrcReaderTest.java | 58 +++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 extensions-core/orc-extensions/example/test_json_path_functions.orc diff --git a/extensions-core/orc-extensions/example/test_json_path_functions.orc b/extensions-core/orc-extensions/example/test_json_path_functions.orc new file mode 100644 index 0000000000000000000000000000000000000000..e3916d6a23308afc61f3c1a4573022ea9aab116b GIT binary patch literal 473 zcmeYdau#G@;9?VE;b074Fk@hNJn3mtDg&dUAfKD6o!+!5XJVWc4Kxo59$;XQ1}fy{ zVrF1qfT-36@`WLM9u8(9HV!@kAqj&31{07ekCT!am_erK+jX|3nwP&d3vUQuZ-gR@EpIhRB>n#>juW@i>K zR!r86JO1d5%ijk*NA`T_IpV|60W|Z|3`0N-JIhhsaGcUQPOA>Pw@?{4L;V_4Ym=ntRp9~%iJ2}X`CMgc~Nh6Vv8CI%i2cLVmOFU-vT HLCz8YkPMkW literal 0 HcmV?d00001 diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java index 093ae7ab87e6..9b2b8e7e6b1d 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java @@ -112,7 +112,11 @@ public void testOrcFile11Format() throws IOException ImmutableList.of( new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), - new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), + new JSONPathFieldSpec( + JSONPathFieldType.PATH, + "struct_list_struct_middleListLength", + "$.middle.list.length()" + ), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1") ) @@ -257,6 +261,58 @@ public void testDate2038() throws IOException } } + /** + * schema: struct, ts:timestamp> + * data: {"dim1","[7,8,9]","2000-03-12 15:00:00"} + */ + @Test + public void testJsonPathFunctions() throws IOException + { + final OrcInputFormat inputFormat = new OrcInputFormat( + new JSONPathSpec( + true, + ImmutableList.of( + new JSONPathFieldSpec(JSONPathFieldType.PATH, "min", "$.list.min()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "max", "$.list.max()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "avg", "$.list.avg()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "len", "$.list.length()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "sum", "$.list.sum()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "stddev", "$.list.stddev()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "append", "$.list.append(10)") + ) + ), + null, + new Configuration() + ); + final InputEntityReader reader = createReader( + new TimestampSpec("ts", "millis", null), + new DimensionsSpec(null), + inputFormat, + "example/test_json_path_functions.orc" + ); + try (CloseableIterator iterator = reader.read()) { + int actualRowCount = 0; + + while (iterator.hasNext()) { + final InputRow row = iterator.next(); + actualRowCount++; + + Assert.assertEquals("7.0", Iterables.getOnlyElement(row.getDimension("min"))); + Assert.assertEquals("8.0", Iterables.getOnlyElement(row.getDimension("avg"))); + Assert.assertEquals("9.0", Iterables.getOnlyElement(row.getDimension("max"))); + Assert.assertEquals("24.0", Iterables.getOnlyElement(row.getDimension("sum"))); + Assert.assertEquals("3", Iterables.getOnlyElement(row.getDimension("len"))); + + //deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165 + Assert.assertEquals(0.8165, Double.parseDouble(Iterables.getOnlyElement(row.getDimension("stddev"))), 0.0001); + + //append is not supported + Assert.assertEquals(Collections.emptyList(), row.getDimension("append")); + } + Assert.assertEquals(1, actualRowCount); + } + } + private InputEntityReader createReader( TimestampSpec timestampSpec, DimensionsSpec dimensionsSpec, From fa8ffe96d35b0be4e0d4efc85a392f760ec3d183 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 18 Jun 2021 15:13:20 +0800 Subject: [PATCH 06/13] update doc --- docs/ingestion/data-formats.md | 13 +++++++++++++ website/.spelling | 1 + 2 files changed, 14 insertions(+) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index d431fead16e1..e9f884acd95c 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -553,6 +553,19 @@ Each entry in the `fields` list can have the following components: * If `useFieldDiscovery` is enabled, any discovered field with the same name as one already defined in the `fields` list will be skipped, rather than added twice. * [http://jsonpath.herokuapp.com/](http://jsonpath.herokuapp.com/) is useful for testing `path`-type expressions. * jackson-jq supports a subset of the full [jq](https://stedolan.github.io/jq/) syntax. Please refer to the [jackson-jq documentation](https://github.com/eiiches/jackson-jq) for details. +* [JsonPath](https://github.com/jayway/JsonPath) supports a bunch of functions, but not all of these functions are supported by Druid now. Following matrix shows the current supported JsonPath functions and corresponding data formats. Please also note the output data type of these functions. + + | Function | Description | Output type | json | orc | avro | parquet | + | :----------| :------------------------------------------------------------------ |:----------- |:-----|:----|:-----|:-----| + | min() | Provides the min value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | max() | Provides the max value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | avg() | Provides the average value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | stddev() | Provides the standard deviation value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | length() | Provides the length of an array | Integer | ✓ | ✓ | ✓ | ✓ | + | sum() | Provides the sum value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | concat(X) | Provides a concatenated version of the path output with a new item | like input | ✓ | ✗ | ✗ | ✗ | + | append(X) | add an item to the json path output array | like input | ✓ | ✗ | ✗ | ✗ | + ## Parser diff --git a/website/.spelling b/website/.spelling index eeba3cbf4bf2..f3fe7d61c864 100644 --- a/website/.spelling +++ b/website/.spelling @@ -37,6 +37,7 @@ BCP Base64 Base64-encoded ByteBuffer +concat CIDR CORS CNF From 5a5484b9ea9d984622149c8113a566269cc10842 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 12:28:24 +0800 Subject: [PATCH 07/13] Fix exception during ingestion --- .../common/parsers/JSONFlattenerMaker.java | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index 6df37ae1d117..18f73fa9def3 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -86,7 +86,14 @@ public Object getRootField(final JsonNode obj, final String key) public Function makeJsonPathExtractor(final String expr) { final JsonPath jsonPath = JsonPath.compile(expr); - return node -> valueConversionFunction(jsonPath.read(node, JSONPATH_CONFIGURATION)); + return node -> { + Object val = jsonPath.read(node, JSONPATH_CONFIGURATION); + if (val instanceof JsonNode) { + return valueConversionFunction((JsonNode) val); + } else { + return val; + } + }; } @Override @@ -115,17 +122,7 @@ public JsonProvider getJsonProvider() } @Nullable - private Object valueConversionFunction(Object val) - { - if (val instanceof JsonNode) { - return convertJsonNode((JsonNode) val); - } else { - return val; - } - } - - @Nullable - private Object convertJsonNode(JsonNode val) + private Object valueConversionFunction(JsonNode val) { if (val == null || val.isNull()) { return null; From c326abc24c20048ff4288e1caebb65b5fdcf9b17 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 13:25:50 +0800 Subject: [PATCH 08/13] Add IT test case --- .../indexer/json_path_index_queries.json | 24 ++++++ .../indexer/json_path_index_task.json | 80 +++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 integration-tests/src/test/resources/indexer/json_path_index_queries.json create mode 100644 integration-tests/src/test/resources/indexer/json_path_index_task.json diff --git a/integration-tests/src/test/resources/indexer/json_path_index_queries.json b/integration-tests/src/test/resources/indexer/json_path_index_queries.json new file mode 100644 index 000000000000..5dc63ae79904 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/json_path_index_queries.json @@ -0,0 +1,24 @@ +[ + { + "description": "timeseries", + "query":{ + "queryType" : "timeseries", + "dataSource": "%%DATASOURCE%%", + "granularity":"day", + "intervals":[ + "1000/3000" + ] + }, + "expectedResults":[ + { + "timestamp" : "2013-08-31T01:02:33Z", + "result" : { + "min":"0", + "max":"4", + "len": 5, + "sum": 10 + } + } + ] + } +] \ No newline at end of file diff --git a/integration-tests/src/test/resources/indexer/json_path_index_task.json b/integration-tests/src/test/resources/indexer/json_path_index_task.json new file mode 100644 index 000000000000..146dd553abad --- /dev/null +++ b/integration-tests/src/test/resources/indexer/json_path_index_task.json @@ -0,0 +1,80 @@ +{ + "type": "index_parallel", + "dataSource": "json_path_index_test", + "spec": { + "dataSchema": { + "dataSource": "json_path_index_test", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "long", + "name": "len" + }, + { + "type": "long", + "name": "min" + }, + { + "type": "long", + "name": "max" + }, + { + "type": "long", + "name": "sum" + } + ], + "dimensionExclusions": [ + "__time", + "timestamp" + ] + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "HOUR", + "queryGranularity": { + "type": "none" + } + } + }, + "ioConfig": { + "type": "index_parallel", + "inputSource": { + "type": "inline", + "data": "{\"timestamp\": \"2013-08-31T01:02:33Z\", \"values\": [0,1,2,3,4] }" + }, + "inputFormat": { + "type": "json", + "flattenSpec": { + "useFieldDiscovery": true, + "fields": [ + { + "type": "path", + "name": "len", + "expr": "$.values.length()" + }, + { + "type": "path", + "name": "min", + "expr": "$.values.min()" + }, + { + "type": "path", + "name": "max", + "expr": "$.values.max()" + }, + { + "type": "path", + "name": "sum", + "expr": "$.values.sum()" + } + ] + } + } + } + } +} \ No newline at end of file From 54eb9edf73affb1409700f5c7f82bf7be9f7a816 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 17:05:58 +0800 Subject: [PATCH 09/13] Revert "Fix exception during ingestion" This reverts commit 5a5484b9ea9d984622149c8113a566269cc10842. --- .../common/parsers/JSONFlattenerMaker.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index 18f73fa9def3..6df37ae1d117 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -86,14 +86,7 @@ public Object getRootField(final JsonNode obj, final String key) public Function makeJsonPathExtractor(final String expr) { final JsonPath jsonPath = JsonPath.compile(expr); - return node -> { - Object val = jsonPath.read(node, JSONPATH_CONFIGURATION); - if (val instanceof JsonNode) { - return valueConversionFunction((JsonNode) val); - } else { - return val; - } - }; + return node -> valueConversionFunction(jsonPath.read(node, JSONPATH_CONFIGURATION)); } @Override @@ -122,7 +115,17 @@ public JsonProvider getJsonProvider() } @Nullable - private Object valueConversionFunction(JsonNode val) + private Object valueConversionFunction(Object val) + { + if (val instanceof JsonNode) { + return convertJsonNode((JsonNode) val); + } else { + return val; + } + } + + @Nullable + private Object convertJsonNode(JsonNode val) { if (val == null || val.isNull()) { return null; From c0feaa62c3baa311c9873d6038d00edfcdcda5c6 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 17:57:52 +0800 Subject: [PATCH 10/13] update IT test case --- .../indexer/json_path_index_queries.json | 51 ++++++++++++++----- .../indexer/json_path_index_task.json | 4 +- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/integration-tests/src/test/resources/indexer/json_path_index_queries.json b/integration-tests/src/test/resources/indexer/json_path_index_queries.json index 5dc63ae79904..1940cc6ea021 100644 --- a/integration-tests/src/test/resources/indexer/json_path_index_queries.json +++ b/integration-tests/src/test/resources/indexer/json_path_index_queries.json @@ -1,24 +1,49 @@ [ { "description": "timeseries", - "query":{ - "queryType" : "timeseries", - "dataSource": "%%DATASOURCE%%", - "granularity":"day", - "intervals":[ + "query": { + "queryType": "timeseries", + "dataSource": "json_path_index_test", + "intervals": [ "1000/3000" - ] + ], + "aggregations": [ + { + "type": "longSum", + "name": "len", + "fieldName": "len" + }, + { + "type": "longSum", + "name": "max", + "fieldName": "max" + }, + { + "type": "longSum", + "name": "min", + "fieldName": "min" + }, + { + "type": "longSum", + "name": "sum", + "fieldName": "sum" + } + ], + "granularity": { + "type": "all" + } }, - "expectedResults":[ + "expectedResults": [ { - "timestamp" : "2013-08-31T01:02:33Z", - "result" : { - "min":"0", - "max":"4", + "timestamp": "2013-08-31T01:02:33.000Z", + "result": { + "sum": 10, + "min": 0, "len": 5, - "sum": 10 + "max": 4 } } ] } -] \ No newline at end of file +] + diff --git a/integration-tests/src/test/resources/indexer/json_path_index_task.json b/integration-tests/src/test/resources/indexer/json_path_index_task.json index 146dd553abad..2fd6990b1162 100644 --- a/integration-tests/src/test/resources/indexer/json_path_index_task.json +++ b/integration-tests/src/test/resources/indexer/json_path_index_task.json @@ -1,5 +1,5 @@ { - "type": "index_parallel", + "type": "index", "dataSource": "json_path_index_test", "spec": { "dataSchema": { @@ -42,7 +42,7 @@ } }, "ioConfig": { - "type": "index_parallel", + "type": "index", "inputSource": { "type": "inline", "data": "{\"timestamp\": \"2013-08-31T01:02:33Z\", \"values\": [0,1,2,3,4] }" From 97c2d8619b93edd11fc1b81f790d33b99be13a9c Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 18:17:54 +0800 Subject: [PATCH 11/13] Add 'keys()' --- docs/ingestion/data-formats.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 8d5ca5d8574a..801303689d75 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -571,6 +571,7 @@ Each entry in the `fields` list can have the following components: | sum() | Provides the sum value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | | concat(X) | Provides a concatenated version of the path output with a new item | like input | ✓ | ✗ | ✗ | ✗ | | append(X) | add an item to the json path output array | like input | ✓ | ✗ | ✗ | ✗ | + | keys() | Provides the property keys (An alternative for terminal tilde ~) | Set | ✗ | ✗ | ✗ | ✗ | ## Parser From 9c68b2960fc594b99f47a4d4b24d5b970b5857a0 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sun, 19 Sep 2021 18:47:31 +0800 Subject: [PATCH 12/13] Commit IT test case --- .../druid/tests/indexer/ITIndexerTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java index 833a0ac5855b..93c8d4621652 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java @@ -362,7 +362,21 @@ public void testGetLockedIntervals() throws Exception waitForAllTasksToCompleteForDataSource(datasourceName); } - } + @Test + public void testJsonFunctions() throws Exception + { + final String taskSpec = getResourceAsString("/indexer/json_path_index_task.json"); + + submitTaskAndWait( + taskSpec, + "json_path_index_test", + false, + true, + new Pair<>(false, false) + ); + + doTestQuery("json_path_index_test", "/indexer/json_path_index_queries.json"); + } } From b517d0c8c643c5569193eef99d096ab4cb63876f Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 19 Nov 2021 15:45:14 +0800 Subject: [PATCH 13/13] Fix UT --- .../test/java/org/apache/druid/data/input/orc/OrcReaderTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java index 9b2b8e7e6b1d..49ebd54d5e3c 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java @@ -151,7 +151,6 @@ public void testOrcFile11Format() throws IOException Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist")); Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string"))); - Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_intlistLength"))); Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength"))); Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());