-
Notifications
You must be signed in to change notification settings - Fork 3.8k
More ParseException handling for numeric dimensions #5312
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,17 +22,24 @@ | |
| import com.google.common.base.Supplier; | ||
| import com.google.common.collect.Lists; | ||
| import com.google.common.collect.Maps; | ||
| import com.google.common.io.ByteArrayDataInput; | ||
| import com.google.common.io.ByteArrayDataOutput; | ||
| import com.google.common.io.ByteStreams; | ||
|
|
||
| import io.druid.data.input.InputRow; | ||
| import io.druid.data.input.MapBasedInputRow; | ||
| import io.druid.data.input.Rows; | ||
| import io.druid.data.input.impl.DimensionSchema; | ||
| import io.druid.data.input.impl.DimensionsSpec; | ||
| import io.druid.java.util.common.IAE; | ||
| import io.druid.java.util.common.StringUtils; | ||
| import io.druid.java.util.common.logger.Logger; | ||
| import io.druid.java.util.common.parsers.ParseException; | ||
| import io.druid.query.aggregation.Aggregator; | ||
| import io.druid.query.aggregation.AggregatorFactory; | ||
| import io.druid.segment.DimensionHandlerUtils; | ||
| import io.druid.segment.VirtualColumns; | ||
| import io.druid.segment.column.ValueType; | ||
| import io.druid.segment.incremental.IncrementalIndex; | ||
| import io.druid.segment.serde.ComplexMetricSerde; | ||
| import io.druid.segment.serde.ComplexMetrics; | ||
|
|
@@ -49,7 +56,165 @@ public class InputRowSerde | |
| { | ||
| private static final Logger log = new Logger(InputRowSerde.class); | ||
|
|
||
| public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, boolean reportParseExceptions) | ||
| private static final IndexSerdeTypeHelper STRING_HELPER = new StringIndexSerdeTypeHelper(); | ||
| private static final IndexSerdeTypeHelper LONG_HELPER = new LongIndexSerdeTypeHelper(); | ||
| private static final IndexSerdeTypeHelper FLOAT_HELPER = new FloatIndexSerdeTypeHelper(); | ||
| private static final IndexSerdeTypeHelper DOUBLE_HELPER = new DoubleIndexSerdeTypeHelper(); | ||
|
|
||
| public interface IndexSerdeTypeHelper<T> | ||
| { | ||
| ValueType getType(); | ||
|
|
||
| void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions); | ||
|
|
||
| T deserialize(ByteArrayDataInput in); | ||
| } | ||
|
|
||
| public static Map<String, IndexSerdeTypeHelper> getTypeHelperMap(DimensionsSpec dimensionsSpec) | ||
| { | ||
| Map<String, IndexSerdeTypeHelper> typeHelperMap = Maps.newHashMap(); | ||
| for (DimensionSchema dimensionSchema : dimensionsSpec.getDimensions()) { | ||
| IndexSerdeTypeHelper typeHelper; | ||
| switch (dimensionSchema.getValueType()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace the switch with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept the switch statement but dropped the array, it wasn't being used anymore after removing the type ordinals from the serialized form |
||
| case STRING: | ||
| typeHelper = STRING_HELPER; | ||
| break; | ||
| case LONG: | ||
| typeHelper = LONG_HELPER; | ||
| break; | ||
| case FLOAT: | ||
| typeHelper = FLOAT_HELPER; | ||
| break; | ||
| case DOUBLE: | ||
| typeHelper = DOUBLE_HELPER; | ||
| break; | ||
| default: | ||
| throw new IAE("Invalid type: [%s]", dimensionSchema.getValueType()); | ||
| } | ||
| typeHelperMap.put(dimensionSchema.getName(), typeHelper); | ||
| } | ||
| return typeHelperMap; | ||
| } | ||
|
|
||
| public static class StringIndexSerdeTypeHelper implements IndexSerdeTypeHelper<List<String>> | ||
| { | ||
| @Override | ||
| public ValueType getType() | ||
| { | ||
| return ValueType.STRING; | ||
| } | ||
|
|
||
| @Override | ||
| public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) | ||
| { | ||
| List<String> values = Rows.objectToStrings(value); | ||
| try { | ||
| writeStringArray(values, out); | ||
| } | ||
| catch (IOException ioe) { | ||
| throw new RuntimeException(ioe); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public List<String> deserialize(ByteArrayDataInput in) | ||
| { | ||
| try { | ||
| return readStringArray(in); | ||
| } | ||
| catch (IOException ioe) { | ||
| throw new RuntimeException(ioe); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public static class LongIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Long> | ||
| { | ||
| @Override | ||
| public ValueType getType() | ||
| { | ||
| return ValueType.LONG; | ||
| } | ||
|
|
||
| @Override | ||
| public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) | ||
| { | ||
| Long ret = DimensionHandlerUtils.convertObjectToLong(value, reportParseExceptions); | ||
| if (ret == null) { | ||
| // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged | ||
| // we'll also need to change the serialized encoding so that it can represent numeric nulls | ||
| ret = DimensionHandlerUtils.ZERO_LONG; | ||
| } | ||
| out.writeLong(ret); | ||
| } | ||
|
|
||
| @Override | ||
| public Long deserialize(ByteArrayDataInput in) | ||
| { | ||
| return in.readLong(); | ||
| } | ||
| } | ||
|
|
||
| public static class FloatIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Float> | ||
| { | ||
| @Override | ||
| public ValueType getType() | ||
| { | ||
| return ValueType.FLOAT; | ||
| } | ||
|
|
||
| @Override | ||
| public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) | ||
| { | ||
| Float ret = DimensionHandlerUtils.convertObjectToFloat(value, reportParseExceptions); | ||
| if (ret == null) { | ||
| // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged | ||
| // we'll also need to change the serialized encoding so that it can represent numeric nulls | ||
| ret = DimensionHandlerUtils.ZERO_FLOAT; | ||
| } | ||
| out.writeFloat(ret); | ||
| } | ||
|
|
||
| @Override | ||
| public Float deserialize(ByteArrayDataInput in) | ||
| { | ||
| return in.readFloat(); | ||
| } | ||
| } | ||
|
|
||
| public static class DoubleIndexSerdeTypeHelper implements IndexSerdeTypeHelper<Double> | ||
| { | ||
| @Override | ||
| public ValueType getType() | ||
| { | ||
| return ValueType.DOUBLE; | ||
| } | ||
|
|
||
| @Override | ||
| public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) | ||
| { | ||
| Double ret = DimensionHandlerUtils.convertObjectToDouble(value, reportParseExceptions); | ||
| if (ret == null) { | ||
| // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged | ||
| // we'll also need to change the serialized encoding so that it can represent numeric nulls | ||
| ret = DimensionHandlerUtils.ZERO_DOUBLE; | ||
| } | ||
| out.writeDouble(ret); | ||
| } | ||
|
|
||
| @Override | ||
| public Double deserialize(ByteArrayDataInput in) | ||
| { | ||
| return in.readDouble(); | ||
| } | ||
| } | ||
|
|
||
| public static final byte[] toBytes( | ||
| final Map<String, IndexSerdeTypeHelper> typeHelperMap, | ||
| final InputRow row, | ||
| AggregatorFactory[] aggs, | ||
| boolean reportParseExceptions | ||
| ) | ||
| { | ||
| try { | ||
| ByteArrayDataOutput out = ByteStreams.newDataOutput(); | ||
|
|
@@ -63,9 +228,12 @@ public static final byte[] toBytes(final InputRow row, AggregatorFactory[] aggs, | |
| WritableUtils.writeVInt(out, dimList.size()); | ||
| if (dimList != null) { | ||
| for (String dim : dimList) { | ||
| List<String> dimValues = row.getDimension(dim); | ||
| IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dim); | ||
| if (typeHelper == null) { | ||
| typeHelper = STRING_HELPER; | ||
| } | ||
| writeString(dim, out); | ||
| writeStringArray(dimValues, out); | ||
| typeHelper.serialize(out, row.getRaw(dim), reportParseExceptions); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -176,10 +344,14 @@ private static List<String> readStringArray(DataInput in) throws IOException | |
| return values; | ||
| } | ||
|
|
||
| public static final InputRow fromBytes(byte[] data, AggregatorFactory[] aggs) | ||
| public static final InputRow fromBytes( | ||
| final Map<String, IndexSerdeTypeHelper> typeHelperMap, | ||
| byte[] data, | ||
| AggregatorFactory[] aggs | ||
| ) | ||
| { | ||
| try { | ||
| DataInput in = ByteStreams.newDataInput(data); | ||
| ByteArrayDataInput in = ByteStreams.newDataInput(data); | ||
|
|
||
| //Read timestamp | ||
| long timestamp = in.readLong(); | ||
|
|
@@ -192,14 +364,25 @@ public static final InputRow fromBytes(byte[] data, AggregatorFactory[] aggs) | |
| for (int i = 0; i < dimNum; i++) { | ||
| String dimension = readString(in); | ||
| dimensions.add(dimension); | ||
| List<String> dimensionValues = readStringArray(in); | ||
| if (dimensionValues == null) { | ||
|
|
||
| IndexSerdeTypeHelper typeHelper = typeHelperMap.get(dimension); | ||
| if (typeHelper == null) { | ||
| typeHelper = STRING_HELPER; | ||
| } | ||
| Object dimValues = typeHelper.deserialize(in); | ||
| if (dimValues == null) { | ||
| continue; | ||
| } | ||
| if (dimensionValues.size() == 1) { | ||
| event.put(dimension, dimensionValues.get(0)); | ||
|
|
||
| if (typeHelper.getType() == ValueType.STRING) { | ||
| List<String> dimensionValues = (List<String>) dimValues; | ||
| if (dimensionValues.size() == 1) { | ||
| event.put(dimension, dimensionValues.get(0)); | ||
| } else { | ||
| event.put(dimension, dimensionValues); | ||
| } | ||
| } else { | ||
| event.put(dimension, dimensionValues); | ||
| event.put(dimension, dimValues); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why'd you need to add these? Why would the counters not be set?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It happened on a task failure when I was testing the ParseExceptions, didn't look further into why the job counters weren't set