Skip to content
5 changes: 4 additions & 1 deletion api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>

<dependency>
<groupId>net.thisptr</groupId>
<artifactId>jackson-jq</artifactId>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
Expand Down
3 changes: 3 additions & 0 deletions api/src/main/java/io/druid/data/input/impl/JSONParseSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ private List<JSONPathParser.FieldSpec> convertFieldSpecs(List<JSONPathFieldSpec>
case PATH:
type = JSONPathParser.FieldType.PATH;
break;
case JQ:
type = JSONPathParser.FieldType.JQ;
break;
default:
throw new IllegalArgumentException("Invalid type for field " + druidSpec.getName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ public static JSONPathFieldSpec createNestedField(String name, String expr)
return new JSONPathFieldSpec(JSONPathFieldType.PATH, name, expr);
}

public static JSONPathFieldSpec createJqField(String name, String expr)
{
return new JSONPathFieldSpec(JSONPathFieldType.JQ, name, expr);
}

public static JSONPathFieldSpec createRootField(String name)
{
return new JSONPathFieldSpec(JSONPathFieldType.ROOT, name, null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
public enum JSONPathFieldType
{
ROOT,
PATH;
PATH,
JQ;

@JsonValue
@Override
Expand Down
18 changes: 18 additions & 0 deletions api/src/test/java/io/druid/data/input/impl/JSONPathSpecTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ public void testSerde() throws IOException
fields.add(JSONPathFieldSpec.createNestedField("hey0barx", "$.hey[0].barx"));
fields.add(JSONPathFieldSpec.createRootField("timestamp"));
fields.add(JSONPathFieldSpec.createRootField("foo.bar1"));
fields.add(JSONPathFieldSpec.createJqField("foobar1", ".foo.bar1"));
fields.add(JSONPathFieldSpec.createJqField("baz0", ".baz[0]"));
fields.add(JSONPathFieldSpec.createJqField("hey0barx", ".hey[0].barx"));

JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);

Expand All @@ -55,6 +58,9 @@ public void testSerde() throws IOException
JSONPathFieldSpec hey0barx = serdeFields.get(2);
JSONPathFieldSpec timestamp = serdeFields.get(3);
JSONPathFieldSpec foodotbar1 = serdeFields.get(4);
JSONPathFieldSpec jqFoobar1 = serdeFields.get(5);
JSONPathFieldSpec jqBaz0 = serdeFields.get(6);
JSONPathFieldSpec jqHey0barx = serdeFields.get(7);

Assert.assertEquals(JSONPathFieldType.PATH, foobar1.getType());
Assert.assertEquals("foobar1", foobar1.getName());
Expand All @@ -68,6 +74,18 @@ public void testSerde() throws IOException
Assert.assertEquals("hey0barx", hey0barx.getName());
Assert.assertEquals("$.hey[0].barx", hey0barx.getExpr());

Assert.assertEquals(JSONPathFieldType.JQ, jqFoobar1.getType());
Assert.assertEquals("foobar1", jqFoobar1.getName());
Assert.assertEquals(".foo.bar1", jqFoobar1.getExpr());

Assert.assertEquals(JSONPathFieldType.JQ, jqBaz0.getType());
Assert.assertEquals("baz0", jqBaz0.getName());
Assert.assertEquals(".baz[0]", jqBaz0.getExpr());

Assert.assertEquals(JSONPathFieldType.JQ, jqHey0barx.getType());
Assert.assertEquals("hey0barx", jqHey0barx.getName());
Assert.assertEquals(".hey[0].barx", jqHey0barx.getExpr());

Assert.assertEquals(JSONPathFieldType.ROOT, timestamp.getType());
Assert.assertEquals("timestamp", timestamp.getName());
Assert.assertEquals(null, timestamp.getExpr());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,15 @@ public class FlattenJSONBenchmark

List<String> flatInputs;
List<String> nestedInputs;
List<String> jqInputs;
Parser flatParser;
Parser nestedParser;
Parser jqParser;
Parser fieldDiscoveryParser;
Parser forcedPathParser;
int flatCounter = 0;
int nestedCounter = 0;
int jqCounter = 0;

@Setup
public void prepare() throws Exception
Expand All @@ -64,9 +67,14 @@ public void prepare() throws Exception
for (int i = 0; i < numEvents; i++) {
nestedInputs.add(gen.generateNestedEvent());
}
jqInputs = new ArrayList<String>();
for (int i = 0; i < numEvents; i++) {
jqInputs.add(gen.generateNestedEvent()); // reuse the same event as "nested"
}

flatParser = gen.getFlatParser();
nestedParser = gen.getNestedParser();
jqParser = gen.getJqParser();
fieldDiscoveryParser = gen.getFieldDiscoveryParser();
forcedPathParser = gen.getForcedPathParser();
}
Expand All @@ -91,6 +99,16 @@ public Map<String, Object> flatten()
return parsed;
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Map<String, Object> jqflatten()
{
Map<String, Object> parsed = jqParser.parse(jqInputs.get(jqCounter));
jqCounter = (jqCounter + 1) % numEvents;
return parsed;
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,46 @@ public Parser getForcedPathParser()
return spec.makeParser();
}

public Parser getJqParser()
{
List<JSONPathFieldSpec> fields = new ArrayList<>();
fields.add(JSONPathFieldSpec.createRootField("ts"));

fields.add(JSONPathFieldSpec.createRootField("d1"));
fields.add(JSONPathFieldSpec.createJqField("e1.d1", ".e1.d1"));
fields.add(JSONPathFieldSpec.createJqField("e1.d2", ".e1.d2"));
fields.add(JSONPathFieldSpec.createJqField("e2.d3", ".e2.d3"));
fields.add(JSONPathFieldSpec.createJqField("e2.d4", ".e2.d4"));
fields.add(JSONPathFieldSpec.createJqField("e2.d5", ".e2.d5"));
fields.add(JSONPathFieldSpec.createJqField("e2.d6", ".e2.d6"));
fields.add(JSONPathFieldSpec.createJqField("e2.ad1[0]", ".e2.ad1[0]"));
fields.add(JSONPathFieldSpec.createJqField("e2.ad1[1]", ".e2.ad1[1]"));
fields.add(JSONPathFieldSpec.createJqField("e2.ad1[2]", ".e2.ad1[2]"));
fields.add(JSONPathFieldSpec.createJqField("ae1[0].d1", ".ae1[0].d1"));
fields.add(JSONPathFieldSpec.createJqField("ae1[1].d1", ".ae1[1].d1"));
fields.add(JSONPathFieldSpec.createJqField("ae1[2].e1.d2", ".ae1[2].e1.d2"));

fields.add(JSONPathFieldSpec.createRootField("m3"));
fields.add(JSONPathFieldSpec.createJqField("e3.m1", ".e3.m1"));
fields.add(JSONPathFieldSpec.createJqField("e3.m2", ".e3.m2"));
fields.add(JSONPathFieldSpec.createJqField("e3.m3", ".e3.m3"));
fields.add(JSONPathFieldSpec.createJqField("e3.m4", ".e3.m4"));
fields.add(JSONPathFieldSpec.createJqField("e3.am1[0]", ".e3.am1[0]"));
fields.add(JSONPathFieldSpec.createJqField("e3.am1[1]", ".e3.am1[1]"));
fields.add(JSONPathFieldSpec.createJqField("e3.am1[2]", ".e3.am1[2]"));
fields.add(JSONPathFieldSpec.createJqField("e3.am1[3]", ".e3.am1[3]"));
fields.add(JSONPathFieldSpec.createJqField("e4.e4.m4", ".e4.e4.m4"));

JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
JSONParseSpec spec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
flattenSpec,
null
);

return spec.makeParser();
}

public String generateFlatEvent() throws Exception
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,15 @@ public void testOne() throws Exception {

Parser flatParser = eventGen.getFlatParser();
Parser nestedParser = eventGen.getNestedParser();
Parser jqParser = eventGen.getJqParser();

Map<String, Object> event = flatParser.parse(newEvent);
Map<String, Object> event2 = nestedParser.parse(newEvent2);
Map<String, Object> event3 = jqParser.parse(newEvent2); // reuse the same event as "nested"

checkEvent1(event);
checkEvent2(event2);
checkEvent2(event3); // make sure JQ parser output matches with JSONPath parser output
}

public void checkEvent1(Map<String, Object> event) {
Expand Down
15 changes: 13 additions & 2 deletions docs/content/ingestion/flatten-json.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ Defining the JSON Flatten Spec allows nested JSON fields to be flattened during

| Field | Type | Description | Required |
|-------|------|-------------|----------|
| type | String | Type of the field, "root" or "path". | yes |
| type | String | Type of the field, "root", "path" or "jq". | yes |
| name | String | This string will be used as the column name when the data has been ingested. | yes |
| expr | String | Defines an expression for accessing the field within the JSON object, using [JsonPath](https://github.com/jayway/JsonPath) notation. Only used for type "path", otherwise ignored. | only for type "path" |
| expr | String | Defines an expression for accessing the field within the JSON object, using [JsonPath](https://github.com/jayway/JsonPath) notation for type "path", and [jackson-jq](https://github.com/eiiches/jackson-jq) for type "jq". This field is only used for type "path" and "jq", otherwise ignored. | only for type "path" or "jq" |

Suppose the event JSON has the following form:

Expand Down Expand Up @@ -99,6 +99,16 @@ To flatten this JSON, the parseSpec could be defined as follows:
"type": "path",
"name": "second-food",
"expr": "$.thing.food[1]"
},
{
"type": "jq",
"name": "first-food-by-jq",
"expr": ".thing.food[1]"
},
{
"type": "jq",
"name": "hello-total",
"expr": ".hello | sum"
}
]
},
Expand Down Expand Up @@ -147,3 +157,4 @@ Note that:
* If auto field discovery is enabled, any discovered field with the same name as one already defined in the field specs will be skipped and not added twice.
* The JSON input must be a JSON object at the root, not an array. e.g., {"valid": "true"} and {"valid":[1,2,3]} are supported but [{"invalid": "true"}] and [1,2,3] are not.
* [http://jsonpath.herokuapp.com/](http://jsonpath.herokuapp.com/) is useful for testing the path expressions.
* jackson-jq supports subset of [./jq](https://stedolan.github.io/jq/) syntax. Please refer jackson-jq document.
4 changes: 4 additions & 0 deletions java-util/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@
<scope>test</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>net.thisptr</groupId>
<artifactId>jackson-jq</artifactId>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.java.util.common.parsers;

import com.fasterxml.jackson.databind.JsonNode;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import net.thisptr.jackson.jq.JsonQuery;
import net.thisptr.jackson.jq.exception.JsonQueryException;


public class FlattenExpr
{
private JsonPath jsonPathExpr;
private JsonQuery jsonQueryExpr;


FlattenExpr(JsonPath jsonPathExpr)
{
this.jsonPathExpr = jsonPathExpr;
}

FlattenExpr(JsonQuery jsonQueryExpr)
{
this.jsonQueryExpr = jsonQueryExpr;
}

public JsonNode readPath(JsonNode document, Configuration jsonConfig)
{
return this.jsonPathExpr.read(document, jsonConfig);
}

public JsonNode readJq(JsonNode document)
{
try {
return this.jsonQueryExpr.apply(document).get(0);
}
catch (JsonQueryException e) {
// ignore errors.
}
return null;
}
}
Loading