From 5f5b54c38ebfb45a817999a986f51597782b5898 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 2 Jan 2024 22:41:52 -0800 Subject: [PATCH 1/3] Cache value selectors in RowBasedColumnSelectorFactory. There was already caching for dimension selectors. This patch adds caching for value (object and number) selectors. It's helpful when the same field is read multiple times during processing of a single row (for example, by being an input to both MIN and MAX aggregations). --- .../druid/common/config/NullHandling.java | 5 +- .../org/apache/druid/data/input/Rows.java | 48 +++++++++--- .../RowBasedColumnSelectorFactory.java | 78 +++++++++++++------ 3 files changed, 96 insertions(+), 35 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java index f18242db8fef..a19e9a100133 100644 --- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java +++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java @@ -182,10 +182,11 @@ public static T defaultValueForClass(final Class clazz) * May be null or non-null based on the current SQL-compatible null handling mode. */ @Nullable - @SuppressWarnings("unchecked") public static Object defaultValueForType(ValueType type) { - if (type == ValueType.FLOAT) { + if (sqlCompatible()) { + return null; + } else if (type == ValueType.FLOAT) { return defaultFloatValue(); } else if (type == ValueType.DOUBLE) { return defaultDoubleValue(); diff --git a/processing/src/main/java/org/apache/druid/data/input/Rows.java b/processing/src/main/java/org/apache/druid/data/input/Rows.java index add6781b81b1..eb7e6bc7a211 100644 --- a/processing/src/main/java/org/apache/druid/data/input/Rows.java +++ b/processing/src/main/java/org/apache/druid/data/input/Rows.java @@ -25,6 +25,7 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.ParseException; +import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; import java.util.Arrays; @@ -90,6 +91,7 @@ public static List objectToStrings(final Object inputValue) * * @param name field name of the object being converted (may be used for exception messages) * @param inputValue the actual object being converted + * @param expectedType expected numeric type, or null if it should be automatically detected * @param throwParseExceptions whether this method should throw a {@link ParseException} or use a default/null value * when {@param inputValue} is not numeric * @@ -98,14 +100,15 @@ public static List objectToStrings(final Object inputValue) * @throws ParseException if the input cannot be converted to a number and {@code throwParseExceptions} is true */ @Nullable - public static Number objectToNumber( + public static Number objectToNumber( final String name, final Object inputValue, + @Nullable final ValueType expectedType, final boolean throwParseExceptions ) { if (inputValue == null) { - return NullHandling.defaultLongValue(); + return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); } else if (inputValue instanceof Number) { return (Number) inputValue; } else if (inputValue instanceof String) { @@ -113,30 +116,57 @@ public static Number objectToNumber( String metricValueString = StringUtils.removeChar(((String) inputValue).trim(), ','); // Longs.tryParse() doesn't support leading '+', so we need to trim it ourselves metricValueString = trimLeadingPlusOfLongString(metricValueString); - Long v = Longs.tryParse(metricValueString); - // Do NOT use ternary operator here, because it makes Java to convert Long to Double - if (v != null) { - return v; + + Number v = null; + + // Try parsing as Long first, since it's significantly faster than Double parsing, and also there are various + // integer numbers that can be represented as Long but cannot be represented as Double. + if (expectedType == null || expectedType == ValueType.LONG) { + v = Longs.tryParse(metricValueString); + } + + if ((expectedType == null && v == null) || expectedType == ValueType.DOUBLE) { + v = Double.valueOf(metricValueString); + } + + if (v == null) { + if (throwParseExceptions) { + throw new ParseException(String.valueOf(inputValue), "Unable to parse value[%s] for field[%s] as type[%s]", inputValue.getClass(), name, expectedType); + } else { + return (Number) NullHandling.defaultValueForType(expectedType); + } } else { - return Double.valueOf(metricValueString); + return v; } } catch (Exception e) { if (throwParseExceptions) { throw new ParseException(String.valueOf(inputValue), e, "Unable to parse value[%s] for field[%s]", inputValue, name); } else { - return NullHandling.defaultLongValue(); + return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); } } } else { if (throwParseExceptions) { throw new ParseException(String.valueOf(inputValue), "Unknown type[%s] for field[%s]", inputValue.getClass(), name); } else { - return NullHandling.defaultLongValue(); + return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); } } } + /** + * Shorthand for {@link #objectToNumber(String, Object, ValueType, boolean)} with null expectedType. + */ + public static Number objectToNumber( + final String name, + final Object inputValue, + final boolean throwParseExceptions + ) + { + return objectToNumber(name, inputValue, null, throwParseExceptions); + } + private static String trimLeadingPlusOfLongString(String metricValueString) { if (metricValueString.length() > 1 && metricValueString.charAt(0) == '+') { diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index d0712e9c3155..0ee3e44621e4 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -32,6 +32,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.RangeIndexedInts; import org.apache.druid.segment.nested.StructuredData; @@ -445,44 +446,55 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) return new TimeLongColumnSelector(); } else { final Function columnFunction = adapter.columnFunction(columnName); + final ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(columnName); + final ValueType expectedType = capabilities == null ? null : capabilities.getType(); return new ColumnValueSelector() { + private long currentValueId = RowIdSupplier.INIT; + private long currentValueAsNumberId = RowIdSupplier.INIT; + @Nullable + private Object currentValue; + @Nullable + private Number currentValueAsNumber; + @Override public boolean isNull() { - return !NullHandling.replaceWithDefault() && getCurrentValueAsNumber() == null; + updateCurrentValueAsNumber(); + return !NullHandling.replaceWithDefault() && currentValueAsNumber == null; } @Override public double getDouble() { - final Number n = getCurrentValueAsNumber(); - assert NullHandling.replaceWithDefault() || n != null; - return n != null ? n.doubleValue() : 0d; + updateCurrentValueAsNumber(); + assert NullHandling.replaceWithDefault() || currentValueAsNumber != null; + return currentValueAsNumber != null ? currentValueAsNumber.doubleValue() : 0d; } @Override public float getFloat() { - final Number n = getCurrentValueAsNumber(); - assert NullHandling.replaceWithDefault() || n != null; - return n != null ? n.floatValue() : 0f; + updateCurrentValueAsNumber(); + assert NullHandling.replaceWithDefault() || currentValueAsNumber != null; + return currentValueAsNumber != null ? currentValueAsNumber.floatValue() : 0f; } @Override public long getLong() { - final Number n = getCurrentValueAsNumber(); - assert NullHandling.replaceWithDefault() || n != null; - return n != null ? n.longValue() : 0L; + updateCurrentValueAsNumber(); + assert NullHandling.replaceWithDefault() || currentValueAsNumber != null; + return currentValueAsNumber != null ? currentValueAsNumber.longValue() : 0L; } @Nullable @Override public Object getObject() { - return getCurrentValue(); + updateCurrentValue(); + return currentValue; } @Override @@ -497,24 +509,42 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) inspector.visit("row", rowSupplier); } - @Nullable - private Object getCurrentValue() + private void updateCurrentValue() { - return columnFunction.apply(rowSupplier.get()); + if (rowIdSupplier == null || rowIdSupplier.getRowId() != currentValueId) { + try { + currentValue = columnFunction.apply(rowSupplier.get()); + } + catch (Throwable e) { + currentValueId = RowIdSupplier.INIT; + throw e; + } + + if (rowIdSupplier != null) { + currentValueId = rowIdSupplier.getRowId(); + } + } } - @Nullable - private Number getCurrentValueAsNumber() + private void updateCurrentValueAsNumber() { - final Object currentValue = getCurrentValue(); - if (currentValue instanceof StructuredData) { - return Rows.objectToNumber(columnName, ((StructuredData) currentValue).getValue(), throwParseExceptions); + updateCurrentValue(); + + if (rowIdSupplier == null || rowIdSupplier.getRowId() != currentValueAsNumberId) { + try { + final Object valueToUse = + currentValue instanceof StructuredData ? ((StructuredData) currentValue).getValue() : currentValue; + currentValueAsNumber = Rows.objectToNumber(columnName, valueToUse, expectedType, throwParseExceptions); + } + catch (Throwable e) { + currentValueAsNumberId = RowIdSupplier.INIT; + throw e; + } + + if (rowIdSupplier != null) { + currentValueAsNumberId = rowIdSupplier.getRowId(); + } } - return Rows.objectToNumber( - columnName, - currentValue, - throwParseExceptions - ); } }; } From ca4864326dd9f135f27e9536e34254ccb5c9b82d Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 3 Jan 2024 08:55:17 -0800 Subject: [PATCH 2/3] Fix typing. --- .../org/apache/druid/data/input/Rows.java | 46 ++++++++++++++----- .../RowBasedColumnSelectorFactory.java | 5 +- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/Rows.java b/processing/src/main/java/org/apache/druid/data/input/Rows.java index eb7e6bc7a211..c8555cd332e0 100644 --- a/processing/src/main/java/org/apache/druid/data/input/Rows.java +++ b/processing/src/main/java/org/apache/druid/data/input/Rows.java @@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableSortedSet; import com.google.common.primitives.Longs; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.parsers.ParseException; import org.apache.druid.segment.column.ValueType; @@ -91,7 +92,7 @@ public static List objectToStrings(final Object inputValue) * * @param name field name of the object being converted (may be used for exception messages) * @param inputValue the actual object being converted - * @param expectedType expected numeric type, or null if it should be automatically detected + * @param outputType expected return type, or null if it should be automatically detected * @param throwParseExceptions whether this method should throw a {@link ParseException} or use a default/null value * when {@param inputValue} is not numeric * @@ -103,12 +104,16 @@ public static List objectToStrings(final Object inputValue) public static Number objectToNumber( final String name, final Object inputValue, - @Nullable final ValueType expectedType, + @Nullable final ValueType outputType, final boolean throwParseExceptions ) { + if (outputType != null && !outputType.isNumeric()) { + throw new IAE("Output type[%s] must be numeric", outputType); + } + if (inputValue == null) { - return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); + return (Number) NullHandling.defaultValueForType(outputType != null ? outputType : ValueType.LONG); } else if (inputValue instanceof Number) { return (Number) inputValue; } else if (inputValue instanceof String) { @@ -121,36 +126,53 @@ public static Number objectToNumber( // Try parsing as Long first, since it's significantly faster than Double parsing, and also there are various // integer numbers that can be represented as Long but cannot be represented as Double. - if (expectedType == null || expectedType == ValueType.LONG) { + if (outputType == null || outputType == ValueType.LONG) { v = Longs.tryParse(metricValueString); } - if ((expectedType == null && v == null) || expectedType == ValueType.DOUBLE) { + if (outputType != ValueType.LONG) { v = Double.valueOf(metricValueString); } if (v == null) { if (throwParseExceptions) { - throw new ParseException(String.valueOf(inputValue), "Unable to parse value[%s] for field[%s] as type[%s]", inputValue.getClass(), name, expectedType); + throw new ParseException( + String.valueOf(inputValue), + "Unable to parse value[%s] for field[%s] as type[%s]", + inputValue.getClass(), + name, + outputType + ); } else { - return (Number) NullHandling.defaultValueForType(expectedType); + return (Number) NullHandling.defaultValueForType(outputType); } } else { - return v; + return outputType == ValueType.FLOAT ? Float.valueOf(v.floatValue()) : v; } } catch (Exception e) { if (throwParseExceptions) { - throw new ParseException(String.valueOf(inputValue), e, "Unable to parse value[%s] for field[%s]", inputValue, name); + throw new ParseException( + String.valueOf(inputValue), + e, + "Unable to parse value[%s] for field[%s]", + inputValue, + name + ); } else { - return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); + return (Number) NullHandling.defaultValueForType(outputType != null ? outputType : ValueType.LONG); } } } else { if (throwParseExceptions) { - throw new ParseException(String.valueOf(inputValue), "Unknown type[%s] for field[%s]", inputValue.getClass(), name); + throw new ParseException( + String.valueOf(inputValue), + "Unknown type[%s] for field[%s]", + inputValue.getClass(), + name + ); } else { - return (Number) NullHandling.defaultValueForType(expectedType != null ? expectedType : ValueType.LONG); + return (Number) NullHandling.defaultValueForType(outputType != null ? outputType : ValueType.LONG); } } } diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index 0ee3e44621e4..b0a717349f74 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -447,7 +447,8 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } else { final Function columnFunction = adapter.columnFunction(columnName); final ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(columnName); - final ValueType expectedType = capabilities == null ? null : capabilities.getType(); + final ValueType numberType = + capabilities != null && capabilities.getType().isNumeric() ? capabilities.getType() : null; return new ColumnValueSelector() { @@ -534,7 +535,7 @@ private void updateCurrentValueAsNumber() try { final Object valueToUse = currentValue instanceof StructuredData ? ((StructuredData) currentValue).getValue() : currentValue; - currentValueAsNumber = Rows.objectToNumber(columnName, valueToUse, expectedType, throwParseExceptions); + currentValueAsNumber = Rows.objectToNumber(columnName, valueToUse, numberType, throwParseExceptions); } catch (Throwable e) { currentValueAsNumberId = RowIdSupplier.INIT; From 9113050814e46e300bbc9bcf572ab826bb80059f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 3 Jan 2024 11:41:42 -0800 Subject: [PATCH 3/3] Fix logic. --- processing/src/main/java/org/apache/druid/data/input/Rows.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/Rows.java b/processing/src/main/java/org/apache/druid/data/input/Rows.java index c8555cd332e0..d2a7ded76120 100644 --- a/processing/src/main/java/org/apache/druid/data/input/Rows.java +++ b/processing/src/main/java/org/apache/druid/data/input/Rows.java @@ -130,7 +130,7 @@ public static Number objectToNumber( v = Longs.tryParse(metricValueString); } - if (outputType != ValueType.LONG) { + if (v == null && outputType != ValueType.LONG) { v = Double.valueOf(metricValueString); }