diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index e799dc5b803f..6df37ae1d117 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -115,7 +115,17 @@ public JsonProvider getJsonProvider() } @Nullable - private Object valueConversionFunction(JsonNode val) + private Object valueConversionFunction(Object val) + { + if (val instanceof JsonNode) { + return convertJsonNode((JsonNode) val); + } else { + return val; + } + } + + @Nullable + private Object convertJsonNode(JsonNode val) { if (val == null || val.isNull()) { return null; diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java index 5d2d7de3f7fa..0afbcb0d812f 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/JSONPathParserTest.java @@ -27,6 +27,7 @@ import org.junit.rules.ExpectedException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -239,4 +240,36 @@ public void testParseFail() final Parser jsonParser = new JSONPathParser(new JSONPathSpec(true, fields), null, false); jsonParser.parseToMap(NOT_JSON); } + + @Test + public void testJSONPathFunctions() + { + List fields = Arrays.asList( + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-length", "$.met.a.length()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-min", "$.met.a.min()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-max", "$.met.a.max()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-avg", "$.met.a.avg()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-sum", "$.met.a.sum()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-stddev", "$.met.a.stddev()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "met-array-append", "$.met.a.append(10)"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "concat", "$.concat($.foo.bar1, $.foo.bar2)") + ); + + final Parser jsonParser = new JSONPathParser(new JSONPathSpec(true, fields), null, false); + final Map jsonMap = jsonParser.parseToMap(NESTED_JSON); + + // values of met.a array are: 7,8,9 + Assert.assertEquals(3, jsonMap.get("met-array-length")); + Assert.assertEquals(7.0, jsonMap.get("met-array-min")); + Assert.assertEquals(9.0, jsonMap.get("met-array-max")); + Assert.assertEquals(8.0, jsonMap.get("met-array-avg")); + Assert.assertEquals(24.0, jsonMap.get("met-array-sum")); + + //deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165 + Assert.assertEquals(0.8165, (double) jsonMap.get("met-array-stddev"), 0.00001); + + Assert.assertEquals(ImmutableList.of(7L, 8L, 9L, 10L), jsonMap.get("met-array-append")); + Assert.assertEquals("aaabbb", jsonMap.get("concat")); + } + } diff --git a/docs/ingestion/data-formats.md b/docs/ingestion/data-formats.md index 2f07d5c2f630..801303689d75 100644 --- a/docs/ingestion/data-formats.md +++ b/docs/ingestion/data-formats.md @@ -559,6 +559,20 @@ Each entry in the `fields` list can have the following components: * If `useFieldDiscovery` is enabled, any discovered field with the same name as one already defined in the `fields` list will be skipped, rather than added twice. * [http://jsonpath.herokuapp.com/](http://jsonpath.herokuapp.com/) is useful for testing `path`-type expressions. * jackson-jq supports a subset of the full [jq](https://stedolan.github.io/jq/) syntax. Please refer to the [jackson-jq documentation](https://github.com/eiiches/jackson-jq) for details. +* [JsonPath](https://github.com/jayway/JsonPath) supports a bunch of functions, but not all of these functions are supported by Druid now. Following matrix shows the current supported JsonPath functions and corresponding data formats. Please also note the output data type of these functions. + + | Function | Description | Output type | json | orc | avro | parquet | + | :----------| :------------------------------------------------------------------ |:----------- |:-----|:----|:-----|:-----| + | min() | Provides the min value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | max() | Provides the max value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | avg() | Provides the average value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | stddev() | Provides the standard deviation value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | length() | Provides the length of an array | Integer | ✓ | ✓ | ✓ | ✓ | + | sum() | Provides the sum value of an array of numbers | Double | ✓ | ✓ | ✓ | ✓ | + | concat(X) | Provides a concatenated version of the path output with a new item | like input | ✓ | ✗ | ✗ | ✗ | + | append(X) | add an item to the json path output array | like input | ✓ | ✗ | ✗ | ✗ | + | keys() | Provides the property keys (An alternative for terminal tilde ~) | Set | ✗ | ✗ | ✗ | ✗ | + ## Parser diff --git a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java index db2fea429c7c..e7d503ab9715 100644 --- a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java +++ b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java @@ -262,6 +262,37 @@ private void makeJsonPathExtractor_common(final SomeAvroDatum record, final Avro record.getSomeIntArray(), flattener.makeJsonPathExtractor("$.someIntArray").apply(record) ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).min().getAsInt(), + + //return type of min is double + flattener.makeJsonPathExtractor("$.someIntArray.min()").apply(record) + ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).max().getAsInt(), + + //return type of max is double + flattener.makeJsonPathExtractor("$.someIntArray.max()").apply(record) + ); + Assert.assertEquals( + record.getSomeIntArray().stream().mapToInt(Integer::intValue).average().getAsDouble(), + flattener.makeJsonPathExtractor("$.someIntArray.avg()").apply(record) + ); + Assert.assertEquals( + record.getSomeIntArray().size(), + flattener.makeJsonPathExtractor("$.someIntArray.length()").apply(record) + ); + Assert.assertEquals( + (double) record.getSomeIntArray().stream().mapToInt(Integer::intValue).sum(), + + //return type of sum is double + flattener.makeJsonPathExtractor("$.someIntArray.sum()").apply(record) + ); + Assert.assertEquals( + 2.681, + (double) flattener.makeJsonPathExtractor("$.someIntArray.stddev()").apply(record), + 0.0001 + ); Assert.assertEquals( record.getSomeStringArray(), flattener.makeJsonPathExtractor("$.someStringArray").apply(record) diff --git a/extensions-core/orc-extensions/example/test_json_path_functions.orc b/extensions-core/orc-extensions/example/test_json_path_functions.orc new file mode 100644 index 000000000000..e3916d6a2330 Binary files /dev/null and b/extensions-core/orc-extensions/example/test_json_path_functions.orc differ diff --git a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java index de84bfbd3ba2..780de1930ca3 100644 --- a/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java +++ b/extensions-core/orc-extensions/src/main/java/org/apache/druid/data/input/orc/OrcStructJsonProvider.java @@ -185,6 +185,6 @@ public String toJson(final Object o) @Override public Object unwrap(final Object o) { - throw new UnsupportedOperationException("Unused"); + return o; } } diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java index 9726c0e14671..49ebd54d5e3c 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java @@ -112,6 +112,11 @@ public void testOrcFile11Format() throws IOException ImmutableList.of( new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), + new JSONPathFieldSpec( + JSONPathFieldType.PATH, + "struct_list_struct_middleListLength", + "$.middle.list.length()" + ), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1") ) @@ -145,6 +150,8 @@ public void testOrcFile11Format() throws IOException Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int"))); Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist")); Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string"))); + + Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength"))); Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp()); while (iterator.hasNext()) { @@ -253,6 +260,58 @@ public void testDate2038() throws IOException } } + /** + * schema: struct, ts:timestamp> + * data: {"dim1","[7,8,9]","2000-03-12 15:00:00"} + */ + @Test + public void testJsonPathFunctions() throws IOException + { + final OrcInputFormat inputFormat = new OrcInputFormat( + new JSONPathSpec( + true, + ImmutableList.of( + new JSONPathFieldSpec(JSONPathFieldType.PATH, "min", "$.list.min()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "max", "$.list.max()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "avg", "$.list.avg()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "len", "$.list.length()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "sum", "$.list.sum()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "stddev", "$.list.stddev()"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "append", "$.list.append(10)") + ) + ), + null, + new Configuration() + ); + final InputEntityReader reader = createReader( + new TimestampSpec("ts", "millis", null), + new DimensionsSpec(null), + inputFormat, + "example/test_json_path_functions.orc" + ); + try (CloseableIterator iterator = reader.read()) { + int actualRowCount = 0; + + while (iterator.hasNext()) { + final InputRow row = iterator.next(); + actualRowCount++; + + Assert.assertEquals("7.0", Iterables.getOnlyElement(row.getDimension("min"))); + Assert.assertEquals("8.0", Iterables.getOnlyElement(row.getDimension("avg"))); + Assert.assertEquals("9.0", Iterables.getOnlyElement(row.getDimension("max"))); + Assert.assertEquals("24.0", Iterables.getOnlyElement(row.getDimension("sum"))); + Assert.assertEquals("3", Iterables.getOnlyElement(row.getDimension("len"))); + + //deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165 + Assert.assertEquals(0.8165, Double.parseDouble(Iterables.getOnlyElement(row.getDimension("stddev"))), 0.0001); + + //append is not supported + Assert.assertEquals(Collections.emptyList(), row.getDimension("append")); + } + Assert.assertEquals(1, actualRowCount); + } + } + private InputEntityReader createReader( TimestampSpec timestampSpec, DimensionsSpec dimensionsSpec, diff --git a/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json b/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json index 75caf256f2ab..760dea7757bf 100644 --- a/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json +++ b/extensions-core/parquet-extensions/example/flattening/flat_1_flatten.json @@ -46,6 +46,11 @@ "type": "path", "name": "list", "expr": "$.listDim" + }, + { + "type": "path", + "name": "listLength", + "expr": "$.listDim.length()" } ] }, diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java index 3ba15e41a98e..3190c7e28ad4 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java @@ -191,7 +191,7 @@ public String toJson(final Object o) @Override public Object unwrap(final Object o) { - throw new UnsupportedOperationException("Unused"); + return o; } } diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java index 6532ab222df8..38a916c93e08 100644 --- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java +++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/FlattenSpecParquetInputTest.java @@ -116,6 +116,7 @@ public void testFlat1Flatten() throws IOException, InterruptedException Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0)); Assert.assertEquals("listDim1v1", rows.get(0).getDimension("list").get(0)); Assert.assertEquals("listDim1v2", rows.get(0).getDimension("list").get(1)); + Assert.assertEquals("2", rows.get(0).getDimension("listLength").get(0)); Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue()); } diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java index 833a0ac5855b..93c8d4621652 100644 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java +++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITIndexerTest.java @@ -362,7 +362,21 @@ public void testGetLockedIntervals() throws Exception waitForAllTasksToCompleteForDataSource(datasourceName); } - } + @Test + public void testJsonFunctions() throws Exception + { + final String taskSpec = getResourceAsString("/indexer/json_path_index_task.json"); + + submitTaskAndWait( + taskSpec, + "json_path_index_test", + false, + true, + new Pair<>(false, false) + ); + + doTestQuery("json_path_index_test", "/indexer/json_path_index_queries.json"); + } } diff --git a/integration-tests/src/test/resources/indexer/json_path_index_queries.json b/integration-tests/src/test/resources/indexer/json_path_index_queries.json new file mode 100644 index 000000000000..1940cc6ea021 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/json_path_index_queries.json @@ -0,0 +1,49 @@ +[ + { + "description": "timeseries", + "query": { + "queryType": "timeseries", + "dataSource": "json_path_index_test", + "intervals": [ + "1000/3000" + ], + "aggregations": [ + { + "type": "longSum", + "name": "len", + "fieldName": "len" + }, + { + "type": "longSum", + "name": "max", + "fieldName": "max" + }, + { + "type": "longSum", + "name": "min", + "fieldName": "min" + }, + { + "type": "longSum", + "name": "sum", + "fieldName": "sum" + } + ], + "granularity": { + "type": "all" + } + }, + "expectedResults": [ + { + "timestamp": "2013-08-31T01:02:33.000Z", + "result": { + "sum": 10, + "min": 0, + "len": 5, + "max": 4 + } + } + ] + } +] + diff --git a/integration-tests/src/test/resources/indexer/json_path_index_task.json b/integration-tests/src/test/resources/indexer/json_path_index_task.json new file mode 100644 index 000000000000..2fd6990b1162 --- /dev/null +++ b/integration-tests/src/test/resources/indexer/json_path_index_task.json @@ -0,0 +1,80 @@ +{ + "type": "index", + "dataSource": "json_path_index_test", + "spec": { + "dataSchema": { + "dataSource": "json_path_index_test", + "timestampSpec": { + "column": "timestamp", + "format": "iso" + }, + "dimensionsSpec": { + "dimensions": [ + { + "type": "long", + "name": "len" + }, + { + "type": "long", + "name": "min" + }, + { + "type": "long", + "name": "max" + }, + { + "type": "long", + "name": "sum" + } + ], + "dimensionExclusions": [ + "__time", + "timestamp" + ] + }, + "metricsSpec": [], + "granularitySpec": { + "type": "uniform", + "segmentGranularity": "HOUR", + "queryGranularity": { + "type": "none" + } + } + }, + "ioConfig": { + "type": "index", + "inputSource": { + "type": "inline", + "data": "{\"timestamp\": \"2013-08-31T01:02:33Z\", \"values\": [0,1,2,3,4] }" + }, + "inputFormat": { + "type": "json", + "flattenSpec": { + "useFieldDiscovery": true, + "fields": [ + { + "type": "path", + "name": "len", + "expr": "$.values.length()" + }, + { + "type": "path", + "name": "min", + "expr": "$.values.min()" + }, + { + "type": "path", + "name": "max", + "expr": "$.values.max()" + }, + { + "type": "path", + "name": "sum", + "expr": "$.values.sum()" + } + ] + } + } + } + } +} \ No newline at end of file diff --git a/website/.spelling b/website/.spelling index 017402cc0dc5..65a311949c8e 100644 --- a/website/.spelling +++ b/website/.spelling @@ -38,6 +38,7 @@ BCP Base64 Base64-encoded ByteBuffer +concat CIDR CORS CNF