Skip to content

JSON parsing bad metrics fails ignoring ignoreInvalidRows #3879

@drcrallen

Description

@drcrallen

io.druid.data.input.impl.MapInputRowParser#parse tries to parse into a Map<String, Object> which will succeed for all manner of input. But the parsing is expected to fail early in order to catch parse errors.

For a JSON map which has a string in place of where a metric should be, the failure is later at the aggregation stage, bypassing the check at

if (config.isIgnoreInvalidRows()) {

Stack trace below

2017-01-24T03:45:19,041 WARN [Thread-4] org.apache.hadoop.mapred.LocalJobRunner - job_local1725905616_0001
java.lang.Exception: com.metamx.common.RE: Failure on row[{"INTERVALSTARTTIME_GMT":"2016-05-06T20:25:00-00:00","INTERVALENDTIME_GMT":"2016-05-06T20:30:00-00:00","OPR_DT":"2016-05-06","OPR_HR":"1
4","NODE_ID_XML":"BELMONT_1_N006","NODE_ID":"BELMONT_1_N006","NODE":"BELMONT_1_N006","MARKET_RUN_ID":"RTM","LMP_TYPE":"LMP","XML_DATA_ITEM":"LMP_PRC","PNODE_RESMRID":"BELMONT_1_N006","GRP_TYPE":"BELLPGE_1_GN006","POS":"BELLPGE_1_GN006","VALUE":"RTM","OPR_INTERVAL":"LMP","GROUP":"LMP_PRC","lat":37.52323862187675,"loc":"CA","lon":-122.26490790641564,"node_id":"BELMONT_1_N006","type":"LOAD"}]
        at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
        at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) [hadoop-mapreduce-client-common-2.7.3.jar:?]
Caused by: com.metamx.common.RE: Failure on row[{"INTERVALSTARTTIME_GMT":"2016-05-06T20:25:00-00:00","INTERVALENDTIME_GMT":"2016-05-06T20:30:00-00:00","OPR_DT":"2016-05-06","OPR_HR":"14","NODE_ID_XML":"BELMONT_1_N006","NODE_ID":"BELMONT_1_N006","NODE":"BELMONT_1_N006","MARKET_RUN_ID":"RTM","LMP_TYPE":"LMP","XML_DATA_ITEM":"LMP_PRC","PNODE_RESMRID":"BELMONT_1_N006","GRP_TYPE":"BELLPGE_1_GN006","POS":"BELLPGE_1_GN006","VALUE":"RTM","OPR_INTERVAL":"LMP","GROUP":"LMP_PRC","lat":37.52323862187675,"loc":"CA","lon":-122.26490790641564,"node_id":"BELMONT_1_N006","type":"LOAD"}]
        at io.druid.indexer.HadoopDruidIndexerMapper.map(HadoopDruidIndexerMapper.java:88) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_121]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_121]
        at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121]
Caused by: com.metamx.common.parsers.ParseException: Unable to parse metrics[VALUE], value[RTM]
        at io.druid.data.input.MapBasedRow.getFloatMetric(MapBasedRow.java:130) ~[druid-api-0.9.1.1.jar:0.9.1.1]
        at io.druid.segment.incremental.IncrementalIndex$4$3.get(IncrementalIndex.java:201) ~[druid-processing-0.9.1.1.jar:0.9.1.1]
        at io.druid.query.aggregation.DoubleSumAggregator.aggregate(DoubleSumAggregator.java:62) ~[druid-processing-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.InputRowSerde.toBytes(InputRowSerde.java:94) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.IndexGeneratorJob$IndexGeneratorMapper.innerMap(IndexGeneratorJob.java:292) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.HadoopDruidIndexerMapper.map(HadoopDruidIndexerMapper.java:84) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_121]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_121]
        at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121]
Caused by: java.lang.NumberFormatException: For input string: "RTM"
        at sun.misc.FloatingDecimal.readJavaFormatString(FloatingDecimal.java:2043) ~[?:1.8.0_121]
        at sun.misc.FloatingDecimal.parseFloat(FloatingDecimal.java:122) ~[?:1.8.0_121]
        at java.lang.Float.parseFloat(Float.java:451) ~[?:1.8.0_121]
        at java.lang.Float.valueOf(Float.java:416) ~[?:1.8.0_121]
        at io.druid.data.input.MapBasedRow.getFloatMetric(MapBasedRow.java:127) ~[druid-api-0.9.1.1.jar:0.9.1.1]
        at io.druid.segment.incremental.IncrementalIndex$4$3.get(IncrementalIndex.java:201) ~[druid-processing-0.9.1.1.jar:0.9.1.1]
        at io.druid.query.aggregation.DoubleSumAggregator.aggregate(DoubleSumAggregator.java:62) ~[druid-processing-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.InputRowSerde.toBytes(InputRowSerde.java:94) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.IndexGeneratorJob$IndexGeneratorMapper.innerMap(IndexGeneratorJob.java:292) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at io.druid.indexer.HadoopDruidIndexerMapper.map(HadoopDruidIndexerMapper.java:84) ~[druid-indexing-hadoop-0.9.1.1.jar:0.9.1.1]
        at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:146) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) ~[hadoop-mapreduce-client-core-2.7.3.jar:?]
        at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) ~[hadoop-mapreduce-client-common-2.7.3.jar:?]
        at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[?:1.8.0_121]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) ~[?:1.8.0_121]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) ~[?:1.8.0_121]
        at java.lang.Thread.run(Thread.java:745) ~[?:1.8.0_121]

In this example "VALUE":"RTM" is expected to parse to be a float, not a string.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions