diff --git a/pom.xml b/pom.xml
index bd09c711..1c2a0d0d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -113,6 +113,11 @@
rhino
1.7R5
+
+ com.jayway.jsonpath
+ json-path
+ 2.0.0
+
diff --git a/src/main/java/com/metamx/common/parsers/JSONPathParser.java b/src/main/java/com/metamx/common/parsers/JSONPathParser.java
new file mode 100644
index 00000000..b6cbf499
--- /dev/null
+++ b/src/main/java/com/metamx/common/parsers/JSONPathParser.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to Metamarkets Group Inc. (Metamarkets) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Metamarkets licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.metamx.common.parsers;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Charsets;
+import com.jayway.jsonpath.Configuration;
+import com.jayway.jsonpath.JsonPath;
+import com.jayway.jsonpath.Option;
+import com.metamx.common.Pair;
+import com.metamx.common.StringUtils;
+
+import java.math.BigInteger;
+import java.nio.charset.CharsetEncoder;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * JSON parser class that uses the JsonPath library to access fields via path expressions.
+ */
+public class JSONPathParser implements Parser
+{
+ private final Map> fieldPathMap;
+ private final List fieldSpecs;
+ private final boolean useFieldDiscovery;
+ private final ObjectMapper mapper;
+ private final CharsetEncoder enc = Charsets.UTF_8.newEncoder();
+ private final Configuration jsonPathConfig;
+
+ /**
+ * Constructor
+ *
+ * @param fieldSpecs List of field specifications.
+ * @param useFieldDiscovery If true, automatically add root fields seen in the JSON document to the parsed object Map.
+ * Only fields that contain a singular value or flat list (list containing no subobjects or lists) are automatically added.
+ * @param mapper Optionally provide an ObjectMapper, used by the parser for reading the input JSON.
+ */
+ public JSONPathParser(List fieldSpecs, boolean useFieldDiscovery, ObjectMapper mapper)
+ {
+ this.fieldSpecs = fieldSpecs;
+ this.fieldPathMap = generateFieldPaths(fieldSpecs);
+ this.useFieldDiscovery = useFieldDiscovery;
+ this.mapper = mapper == null ? new ObjectMapper() : mapper;
+ this.jsonPathConfig = Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS);
+ }
+
+ @Override
+ public List getFieldNames()
+ {
+ return null;
+ }
+
+ @Override
+ public void setFieldNames(Iterable fieldNames)
+ {
+ }
+
+ /**
+ *
+ * @param input JSON string. The root must be a JSON object, not an array.
+ * e.g., {"valid": "true"} and {"valid":[1,2,3]} are supported
+ * but [{"invalid": "true"}] and [1,2,3] are not.
+ * @return A map of field names and values
+ */
+ @Override
+ public Map parse(String input)
+ {
+ try {
+ Map map = new LinkedHashMap<>();
+ Map document = mapper.readValue(input, new TypeReference>() {});
+ for (Map.Entry> entry : fieldPathMap.entrySet()) {
+ String fieldName = entry.getKey();
+ Pair pair = entry.getValue();
+ JsonPath path = pair.rhs;
+ Object parsedVal;
+ if (pair.lhs == FieldType.ROOT) {
+ parsedVal = document.get(fieldName);
+ } else {
+ parsedVal = path.read(document, jsonPathConfig);
+ }
+ if (parsedVal == null) {
+ continue;
+ }
+ parsedVal = valueConversionFunction(parsedVal);
+ map.put(fieldName, parsedVal);
+ }
+ if (useFieldDiscovery) {
+ discoverFields(map, document);
+ }
+ return map;
+ }
+ catch (Exception e) {
+ throw new ParseException(e, "Unable to parse row [%s]", input);
+ }
+ }
+
+ private Map> generateFieldPaths(List fieldSpecs)
+ {
+ Map> map = new LinkedHashMap<>();
+ for (FieldSpec fieldSpec : fieldSpecs) {
+ String fieldName = fieldSpec.getName();
+ if(map.get(fieldName) != null) {
+ throw new IllegalArgumentException("Cannot have duplicate field definition: " + fieldName);
+ }
+ JsonPath path = JsonPath.compile(fieldSpec.getExpr());
+ Pair pair = new Pair<>(fieldSpec.getType(), path);
+ map.put(fieldName, pair);
+ }
+ return map;
+ }
+
+ private void discoverFields(Map map, Map document)
+ {
+ for (String field : document.keySet()) {
+ if (!map.containsKey(field)) {
+ Object val = document.get(field);
+ if (val == null) {
+ continue;
+ }
+ if (val instanceof Map) {
+ continue;
+ }
+ if (val instanceof List) {
+ if (!isFlatList((List) val)) {
+ continue;
+ }
+ }
+ val = valueConversionFunction(val);
+ map.put(field, val);
+ }
+ }
+ }
+
+ private Object valueConversionFunction(Object val)
+ {
+ if (val instanceof Integer) {
+ return Long.valueOf((Integer) val);
+ }
+
+ if (val instanceof BigInteger) {
+ return Double.valueOf(((BigInteger) val).doubleValue());
+ }
+
+ if (val instanceof String) {
+ return charsetFix((String) val);
+ }
+
+ if (val instanceof List) {
+ List newList = new ArrayList<>();
+ for(Object entry : ((List) val)) {
+ newList.add(valueConversionFunction(entry));
+ }
+ return newList;
+ }
+
+ if (val instanceof Map) {
+ Map newMap = new LinkedHashMap<>();
+ Map valMap = (Map) val;
+ for(Map.Entry entry : valMap.entrySet()) {
+ newMap.put(entry.getKey(), valueConversionFunction(entry.getValue()));
+ }
+ return newMap;
+ }
+
+ return val;
+ }
+
+ private String charsetFix(String s)
+ {
+ if (s != null && !enc.canEncode(s)) {
+ // Some whacky characters are in this string (e.g. \uD900). These are problematic because they are decodeable
+ // by new String(...) but will not encode into the same character. This dance here will replace these
+ // characters with something more sane.
+ return StringUtils.fromUtf8(StringUtils.toUtf8(s));
+ } else {
+ return s;
+ }
+ }
+
+ private boolean isFlatList(List list)
+ {
+ for (Object obj : list) {
+ if ((obj instanceof Map) || (obj instanceof List)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Specifies access behavior for a field.
+ */
+ public enum FieldType
+ {
+ /**
+ * A ROOT field is read directly from the JSON document root without using the JsonPath library.
+ */
+ ROOT,
+
+ /**
+ * A PATH field uses a JsonPath expression to retrieve the field value
+ */
+ PATH;
+ }
+
+ /**
+ * Specifies a field to be added to the parsed object Map, using JsonPath notation.
+ *
+ * See https://github.com/jayway/JsonPath for more information.
+ */
+ public static class FieldSpec
+ {
+ private final FieldType type;
+ private final String name;
+ private final String expr;
+
+ /**
+ * Constructor
+ *
+ * @param type Specifies how this field should be retrieved.
+ * @param name Name of the field, used as the key in the Object map returned by the parser.
+ * For ROOT fields, this must match the field name as it appears in the JSON document.
+ * @param expr Only used by PATH type fields, specifies the JsonPath expression used to access the field.
+ */
+ public FieldSpec(
+ FieldType type,
+ String name,
+ String expr
+ )
+ {
+ this.type = type;
+ this.name = name;
+ this.expr = expr;
+ }
+
+ public FieldType getType()
+ {
+ return type;
+ }
+
+ public String getName()
+ {
+ return name;
+ }
+
+ public String getExpr()
+ {
+ return expr;
+ }
+ }
+
+}
diff --git a/src/test/java/com/metamx/common/parsers/JSONPathParserTest.java b/src/test/java/com/metamx/common/parsers/JSONPathParserTest.java
new file mode 100644
index 00000000..b7afb6a5
--- /dev/null
+++ b/src/test/java/com/metamx/common/parsers/JSONPathParserTest.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to Metamarkets Group Inc. (Metamarkets) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Metamarkets licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package com.metamx.common.parsers;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class JSONPathParserTest
+{
+ private static final String json =
+ "{\"one\": \"foo\", \"two\" : [\"bar\", \"baz\"], \"three\" : \"qux\", \"four\" : null}";
+ private static final String numbersJson =
+ "{\"five\" : 5.0, \"six\" : 6, \"many\" : 1234567878900, \"toomany\" : 1234567890000000000000}";
+ private static final String whackyCharacterJson =
+ "{\"one\": \"foo\\uD900\"}";
+ private static final String nestedJson =
+ "{\"simpleVal\":\"text\", \"ignore_me\":[1, {\"x\":2}], \"blah\":[4,5,6], \"newmet\":5, " +
+ "\"foo\":{\"bar1\":\"aaa\", \"bar2\":\"bbb\"}, " +
+ "\"baz\":[1,2,3], \"timestamp\":\"2999\", \"foo.bar1\":\"Hello world!\", " +
+ "\"testListConvert\":[1234567890000000000000, \"foo\\uD900\"], " +
+ "\"testListConvert2\":[1234567890000000000000, \"foo\\uD900\", [1234567890000000000000]], " +
+ "\"testMapConvert\":{\"big\": 1234567890000000000000, \"big2\":{\"big2\":1234567890000000000000}}, " +
+ "\"testEmptyList\": [], " +
+ "\"hey\":[{\"barx\":\"asdf\"}], \"met\":{\"a\":[7,8,9]}}";
+ private static final String notJson = "***@#%R#*(TG@(*H(#@(#@((H#(@TH@(#TH(@SDHGKJDSKJFBSBJK";
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ @Test
+ public void testSimple()
+ {
+ List fields = new ArrayList<>();
+ final Parser jsonParser = new JSONPathParser(fields, true, null);
+ final Map jsonMap = jsonParser.parse(json);
+ Assert.assertEquals(
+ "jsonMap",
+ ImmutableMap.of("one", "foo", "two", ImmutableList.of("bar", "baz"), "three", "qux"),
+ jsonMap
+ );
+ }
+
+ @Test
+ public void testWithNumbers()
+ {
+ List fields = new ArrayList<>();
+ final Parser jsonParser = new JSONPathParser(fields, true, null);
+ final Map jsonMap = jsonParser.parse(numbersJson);
+ Assert.assertEquals(
+ "jsonMap",
+ ImmutableMap.of("five", 5.0, "six", 6L, "many", 1234567878900L, "toomany", 1.23456789E21),
+ jsonMap
+ );
+ }
+
+ @Test
+ public void testWithWhackyCharacters()
+ {
+ List fields = new ArrayList<>();
+ final Parser jsonParser = new JSONPathParser(fields, true, null);
+ final Map jsonMap = jsonParser.parse(whackyCharacterJson);
+ Assert.assertEquals(
+ "jsonMap",
+ ImmutableMap.of("one", "foo?"),
+ jsonMap
+ );
+ }
+
+ @Test
+ public void testNestingWithFieldDiscovery()
+ {
+ List fields = new ArrayList<>();
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "baz", "baz"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "nested-foo.bar1", "$.foo.bar1"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "nested-foo.bar2", "$.foo.bar2"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "heybarx0", "$.hey[0].barx"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "met-array", "$.met.a"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "testListConvert2", "testListConvert2"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "testMapConvert", "testMapConvert"));
+
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "INVALID_ROOT", "INVALID_ROOT_EXPR"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "INVALID_PATH", "INVALID_PATH_EXPR"));
+
+
+ final Parser jsonParser = new JSONPathParser(fields, true, null);
+ final Map jsonMap = jsonParser.parse(nestedJson);
+
+ // Root fields
+ Assert.assertEquals(ImmutableList.of(1L, 2L, 3L), jsonMap.get("baz"));
+ Assert.assertEquals(ImmutableList.of(4L, 5L, 6L), jsonMap.get("blah"));
+ Assert.assertEquals("text", jsonMap.get("simpleVal"));
+ Assert.assertEquals(5L, jsonMap.get("newmet"));
+ Assert.assertEquals("2999", jsonMap.get("timestamp"));
+ Assert.assertEquals("Hello world!", jsonMap.get("foo.bar1"));
+
+ List testListConvert = (List)jsonMap.get("testListConvert");
+ Assert.assertEquals(1.23456789E21, testListConvert.get(0));
+ Assert.assertEquals("foo?", testListConvert.get(1));
+
+ List testListConvert2 = (List)jsonMap.get("testListConvert2");
+ Assert.assertEquals(1.23456789E21, testListConvert2.get(0));
+ Assert.assertEquals("foo?", testListConvert2.get(1));
+ Assert.assertEquals(1.23456789E21, ((List) testListConvert2.get(2)).get(0));
+
+ Map testMapConvert = (Map) jsonMap.get("testMapConvert");
+ Assert.assertEquals(1.23456789E21, testMapConvert.get("big"));
+ Assert.assertEquals(1.23456789E21, ((Map) testMapConvert.get("big2")).get("big2"));
+
+ Assert.assertEquals(ImmutableList.of(), jsonMap.get("testEmptyList"));
+
+ // Nested fields
+ Assert.assertEquals("aaa", jsonMap.get("nested-foo.bar1"));
+ Assert.assertEquals("bbb", jsonMap.get("nested-foo.bar2"));
+ Assert.assertEquals("asdf", jsonMap.get("heybarx0"));
+ Assert.assertEquals(ImmutableList.of(7L, 8L, 9L), jsonMap.get("met-array"));
+
+ // Fields that should not be discovered
+ Assert.assertNull(jsonMap.get("hey"));
+ Assert.assertNull(jsonMap.get("met"));
+ Assert.assertNull(jsonMap.get("ignore_me"));
+ Assert.assertNull(jsonMap.get("foo"));
+
+ // Invalid fields
+ Assert.assertNull(jsonMap.get("INVALID_ROOT"));
+ Assert.assertNull(jsonMap.get("INVALID_PATH"));
+ }
+
+ @Test
+ public void testNestingNoDiscovery()
+ {
+ List fields = new ArrayList<>();
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "simpleVal", "simpleVal"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.ROOT, "timestamp", "timestamp"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "nested-foo.bar2", "$.foo.bar2"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "heybarx0", "$.hey[0].barx"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "met-array", "$.met.a"));
+
+ final Parser jsonParser = new JSONPathParser(fields, false, null);
+ final Map jsonMap = jsonParser.parse(nestedJson);
+
+ // Root fields
+ Assert.assertEquals("text", jsonMap.get("simpleVal"));
+ Assert.assertEquals("2999", jsonMap.get("timestamp"));
+
+ // Nested fields
+ Assert.assertEquals("bbb", jsonMap.get("nested-foo.bar2"));
+ Assert.assertEquals("asdf", jsonMap.get("heybarx0"));
+ Assert.assertEquals(ImmutableList.of(7L, 8L, 9L), jsonMap.get("met-array"));
+
+ // Fields that should not be discovered
+ Assert.assertNull(jsonMap.get("newmet"));
+ Assert.assertNull(jsonMap.get("foo.bar1"));
+ Assert.assertNull(jsonMap.get("baz"));
+ Assert.assertNull(jsonMap.get("blah"));
+ Assert.assertNull(jsonMap.get("nested-foo.bar1"));
+ Assert.assertNull(jsonMap.get("hey"));
+ Assert.assertNull(jsonMap.get("met"));
+ Assert.assertNull(jsonMap.get("ignore_me"));
+ Assert.assertNull(jsonMap.get("foo"));
+ }
+
+ @Test
+ public void testRejectDuplicates()
+ {
+ List fields = new ArrayList<>();
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "met-array", "$.met.a"));
+ fields.add(new JSONPathParser.FieldSpec(JSONPathParser.FieldType.PATH, "met-array", "$.met.a"));
+
+ thrown.expect(IllegalArgumentException.class);
+ thrown.expectMessage("Cannot have duplicate field definition: met-array");
+
+ final Parser jsonParser = new JSONPathParser(fields, false, null);
+ final Map jsonMap = jsonParser.parse(nestedJson);
+ }
+
+ @Test
+ public void testParseFail()
+ {
+ List fields = new ArrayList<>();
+
+ thrown.expect(ParseException.class);
+ thrown.expectMessage("Unable to parse row [" + notJson + "]");
+
+ final Parser jsonParser = new JSONPathParser(fields, true, null);
+ final Map jsonMap = jsonParser.parse(notJson);
+ }
+}