From 190f6e237aa71212aa1f02e70ca503c69753e2ad Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 3 Sep 2024 12:29:35 -0700 Subject: [PATCH 1/7] Frame writers: use getObject when reading string/complex types. This mirrors similar logic in numeric aggregators. (The same method from AggregatorUtil is even used to determine when to apply the logic.) The idea is that when an underlying selector is STRING or COMPLEX typed, we should call getObject and cast the result to number, rather than using the primitive numeric accessor methods. This fixes an issue where a column would be read as all-zeroes when its SQL type is numeric, and its physical type is string. This can happen when evolving a column's type from string to number. --- .../druid/frame/field/FieldWriters.java | 25 +- .../ObjectToDoubleColumnValueSelector.java | 127 +++++++++ .../ObjectToFloatColumnValueSelector.java | 127 +++++++++ .../cast/ObjectToLongColumnValueSelector.java | 127 +++++++++ .../frame/write/cast/TypeCastSelectors.java | 72 +++++ .../write/columnar/FrameColumnWriters.java | 25 +- .../query/aggregation/AggregatorUtil.java | 2 +- .../SimpleDoubleAggregatorFactory.java | 6 +- .../SimpleFloatAggregatorFactory.java | 6 +- .../SimpleLongAggregatorFactory.java | 6 +- .../write/cast/TypeCastSelectorsTest.java | 248 ++++++++++++++++++ 11 files changed, 745 insertions(+), 26 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java create mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java create mode 100644 processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java diff --git a/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java b/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java index 028c9fd39c56..dde61a4d2ea9 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java +++ b/processing/src/main/java/org/apache/druid/frame/field/FieldWriters.java @@ -22,6 +22,7 @@ import org.apache.druid.frame.key.RowKey; import org.apache.druid.frame.write.RowBasedFrameWriterFactory; import org.apache.druid.frame.write.UnsupportedColumnTypeException; +import org.apache.druid.frame.write.cast.TypeCastSelectors; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.ColumnSelectorFactory; @@ -101,7 +102,8 @@ private static FieldWriter makeLongWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.LONG); return LongFieldWriter.forPrimitive(selector); } @@ -110,7 +112,8 @@ private static FieldWriter makeFloatWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.FLOAT); return FloatFieldWriter.forPrimitive(selector); } @@ -119,7 +122,8 @@ private static FieldWriter makeDoubleWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.DOUBLE); return DoubleFieldWriter.forPrimitive(selector); } @@ -139,7 +143,8 @@ private static FieldWriter makeStringArrayWriter( final boolean removeNullBytes ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.STRING_ARRAY); return new StringArrayFieldWriter(selector, removeNullBytes); } @@ -148,7 +153,8 @@ private static FieldWriter makeLongArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.LONG_ARRAY); return NumericArrayFieldWriter.getLongArrayFieldWriter(selector); } @@ -157,7 +163,8 @@ private static FieldWriter makeFloatArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.FLOAT_ARRAY); return NumericArrayFieldWriter.getFloatArrayFieldWriter(selector); } @@ -166,7 +173,8 @@ private static FieldWriter makeDoubleArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.DOUBLE_ARRAY); return NumericArrayFieldWriter.getDoubleArrayFieldWriter(selector); } @@ -185,7 +193,8 @@ private static FieldWriter makeComplexWriter( throw new ISE("No serde for complexTypeName[%s], cannot write column [%s]", columnTypeName, columnName); } - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.ofComplex(columnTypeName)); return new ComplexFieldWriter(serde, selector); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java new file mode 100644 index 000000000000..59ef1183cf10 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.RowIdSupplier; + +import javax.annotation.Nullable; + +public class ObjectToDoubleColumnValueSelector implements ColumnValueSelector +{ + private final ColumnValueSelector objectSelector; + + @Nullable + private final RowIdSupplier rowIdSupplier; + + @Nullable + private Double currentValue; + private long currentRowId = RowIdSupplier.INIT; + + public ObjectToDoubleColumnValueSelector( + final ColumnValueSelector objectSelector, + @Nullable final RowIdSupplier rowIdSupplier + ) + { + this.objectSelector = objectSelector; + this.rowIdSupplier = rowIdSupplier; + } + + @Override + public double getDouble() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_DOUBLE : n.doubleValue(); + } + + @Override + public float getFloat() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_FLOAT : n.floatValue(); + } + + @Override + public long getLong() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_LONG : n.longValue(); + } + + @Override + public boolean isNull() + { + return computeIfNeeded() == null; + } + + @Nullable + @Override + public Double getObject() + { + return computeIfNeeded(); + } + + @Override + public Class classOfObject() + { + return Double.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("objectSelector", objectSelector); + inspector.visit("rowIdSupplier", rowIdSupplier); + } + + @Nullable + private Double computeIfNeeded() + { + if (rowIdSupplier == null) { + return eval(); + } else { + final long rowId = rowIdSupplier.getRowId(); + + if (currentRowId != rowId) { + currentValue = eval(); + currentRowId = rowId; + } + + return currentValue; + } + } + + @Nullable + private Double eval() + { + final Object obj = objectSelector.getObject(); + if (obj == null) { + return null; + } else if (obj instanceof Number) { + return ((Number) obj).doubleValue(); + } else { + final ExprEval eval = ExprEval.bestEffortOf(obj); + return eval.isNumericNull() ? null : eval.asDouble(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java new file mode 100644 index 000000000000..d4a1ad4dfe44 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.RowIdSupplier; + +import javax.annotation.Nullable; + +public class ObjectToFloatColumnValueSelector implements ColumnValueSelector +{ + private final ColumnValueSelector objectSelector; + + @Nullable + private final RowIdSupplier rowIdSupplier; + + @Nullable + private Float currentValue; + private long currentRowId = RowIdSupplier.INIT; + + public ObjectToFloatColumnValueSelector( + final ColumnValueSelector objectSelector, + @Nullable final RowIdSupplier rowIdSupplier + ) + { + this.objectSelector = objectSelector; + this.rowIdSupplier = rowIdSupplier; + } + + @Override + public double getDouble() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_DOUBLE : n.floatValue(); + } + + @Override + public float getFloat() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_FLOAT : n.floatValue(); + } + + @Override + public long getLong() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_LONG : n.longValue(); + } + + @Override + public boolean isNull() + { + return computeIfNeeded() == null; + } + + @Nullable + @Override + public Float getObject() + { + return computeIfNeeded(); + } + + @Override + public Class classOfObject() + { + return Float.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("objectSelector", objectSelector); + inspector.visit("rowIdSupplier", rowIdSupplier); + } + + @Nullable + private Float computeIfNeeded() + { + if (rowIdSupplier == null) { + return eval(); + } else { + final long rowId = rowIdSupplier.getRowId(); + + if (currentRowId != rowId) { + currentValue = eval(); + currentRowId = rowId; + } + + return currentValue; + } + } + + @Nullable + private Float eval() + { + final Object obj = objectSelector.getObject(); + if (obj == null) { + return null; + } else if (obj instanceof Number) { + return ((Number) obj).floatValue(); + } else { + final ExprEval eval = ExprEval.bestEffortOf(obj); + return eval.isNumericNull() ? null : (float) eval.asDouble(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java new file mode 100644 index 000000000000..3e18e1eb63d7 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.RowIdSupplier; + +import javax.annotation.Nullable; + +public class ObjectToLongColumnValueSelector implements ColumnValueSelector +{ + private final ColumnValueSelector objectSelector; + + @Nullable + private final RowIdSupplier rowIdSupplier; + + @Nullable + private Long currentValue; + private long currentRowId = RowIdSupplier.INIT; + + public ObjectToLongColumnValueSelector( + final ColumnValueSelector objectSelector, + @Nullable final RowIdSupplier rowIdSupplier + ) + { + this.objectSelector = objectSelector; + this.rowIdSupplier = rowIdSupplier; + } + + @Override + public double getDouble() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_DOUBLE : n.doubleValue(); + } + + @Override + public float getFloat() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_FLOAT : n.floatValue(); + } + + @Override + public long getLong() + { + final Number n = computeIfNeeded(); + return n == null ? NullHandling.ZERO_LONG : n.longValue(); + } + + @Override + public boolean isNull() + { + return computeIfNeeded() == null; + } + + @Nullable + @Override + public Long getObject() + { + return computeIfNeeded(); + } + + @Override + public Class classOfObject() + { + return Long.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("objectSelector", objectSelector); + inspector.visit("rowIdSupplier", rowIdSupplier); + } + + @Nullable + private Long computeIfNeeded() + { + if (rowIdSupplier == null) { + return eval(); + } else { + final long rowId = rowIdSupplier.getRowId(); + + if (currentRowId != rowId) { + currentValue = eval(); + currentRowId = rowId; + } + + return currentValue; + } + } + + @Nullable + private Long eval() + { + final Object obj = objectSelector.getObject(); + if (obj == null) { + return null; + } else if (obj instanceof Number) { + return ((Number) obj).longValue(); + } else { + final ExprEval eval = ExprEval.bestEffortOf(obj); + return eval.isNumericNull() ? null : eval.asLong(); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java new file mode 100644 index 000000000000..3f68c75ec37d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import org.apache.druid.error.DruidException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; + +public class TypeCastSelectors +{ + /** + * Create a {@link ColumnValueSelector} that does its own typecasting if necessary. If typecasting is not necessary, + * returns a selector directly from the underlying {@link ColumnSelectorFactory}. + * + * @param columnSelectorFactory underlying factory + * @param column column name + * @param desiredType desired type of selector. Can be anything except {@link ColumnType#STRING}. + * For strings, use {@link DimensionSelector} rather than {@link ColumnValueSelector}. + */ + public static ColumnValueSelector makeColumnValueSelector( + final ColumnSelectorFactory columnSelectorFactory, + final String column, + final ColumnType desiredType + ) + { + final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(column); + + if (desiredType.is(ValueType.STRING)) { + throw DruidException.defensive("Unexpected type[%s]", column); + } else if (desiredType.isNumeric() && AggregatorUtil.shouldUseGetObjectForNumbers(column, columnSelectorFactory)) { + final RowIdSupplier rowIdSupplier = columnSelectorFactory.getRowIdSupplier(); + + switch (desiredType.getType()) { + case LONG: + return new ObjectToLongColumnValueSelector(selector, rowIdSupplier); + + case DOUBLE: + return new ObjectToDoubleColumnValueSelector(selector, rowIdSupplier); + + case FLOAT: + return new ObjectToFloatColumnValueSelector(selector, rowIdSupplier); + + default: + throw DruidException.defensive("No implementation for desiredType[%s]", desiredType); + } + } else { + return selector; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java index 93f0c12bae6f..6efe1870816a 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java +++ b/processing/src/main/java/org/apache/druid/frame/write/columnar/FrameColumnWriters.java @@ -21,6 +21,7 @@ import org.apache.druid.frame.allocation.MemoryAllocator; import org.apache.druid.frame.write.UnsupportedColumnTypeException; +import org.apache.druid.frame.write.cast.TypeCastSelectors; import org.apache.druid.java.util.common.ISE; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.ColumnSelectorFactory; @@ -102,7 +103,8 @@ private static LongFrameColumnWriter makeLongWriter( ) { final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(columnName); - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.LONG); return new LongFrameColumnWriter(selector, allocator, hasNullsForNumericWriter(capabilities)); } @@ -113,7 +115,8 @@ private static FloatFrameColumnWriter makeFloatWriter( ) { final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(columnName); - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.FLOAT); return new FloatFrameColumnWriter(selector, allocator, hasNullsForNumericWriter(capabilities)); } @@ -124,7 +127,8 @@ private static DoubleFrameColumnWriter makeDoubleWriter( ) { final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(columnName); - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.DOUBLE); return new DoubleFrameColumnWriter(selector, allocator, hasNullsForNumericWriter(capabilities)); } @@ -149,7 +153,8 @@ private static StringFrameColumnWriter makeStringArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.STRING_ARRAY); return new StringArrayFrameColumnWriterImpl(selector, allocator); } @@ -159,7 +164,8 @@ private static NumericArrayFrameColumnWriter makeLongArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.LONG_ARRAY); return new LongArrayFrameColumnWriter(selector, allocator); } @@ -169,7 +175,8 @@ private static NumericArrayFrameColumnWriter makeFloatArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.FLOAT_ARRAY); return new FloatArrayFrameColumnWriter(selector, allocator); } @@ -179,7 +186,8 @@ private static NumericArrayFrameColumnWriter makeDoubleArrayWriter( final String columnName ) { - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.DOUBLE_ARRAY); return new DoubleArrayFrameColumnWriter(selector, allocator); } @@ -199,7 +207,8 @@ private static ComplexFrameColumnWriter makeComplexWriter( throw new ISE("No serde for complexTypeName[%s], cannot write column [%s]", columnTypeName, columnName); } - final ColumnValueSelector selector = selectorFactory.makeColumnValueSelector(columnName); + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(selectorFactory, columnName, ColumnType.ofComplex(columnTypeName)); return new ComplexFrameColumnWriter(selector, allocator, serde); } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index c4c9a7875ef0..d4bb909d8014 100755 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -440,7 +440,7 @@ public static Supplier getSimpleAggregatorCacheKeySupplier( * @param fieldName field name, or null if the aggregator is expression-based * @param columnSelectorFactory column selector factory */ - public static boolean shouldUseObjectColumnAggregatorWrapper( + public static boolean shouldUseGetObjectForNumbers( @Nullable final String fieldName, final ColumnSelectorFactory columnSelectorFactory ) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java index 0fa96e226eae..5c60572ceb2e 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java @@ -83,7 +83,7 @@ public SimpleDoubleAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnDoubleAggregatorWrapper( selector, SimpleDoubleAggregatorFactory.this::buildAggregator, @@ -100,7 +100,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnDoubleBufferAggregatorWrapper( selector, SimpleDoubleAggregatorFactory.this::buildBufferAggregator, @@ -131,7 +131,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java index 5268c454ce1b..bcd1fc18dafe 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java @@ -73,7 +73,7 @@ public SimpleFloatAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnFloatAggregatorWrapper( selector, SimpleFloatAggregatorFactory.this::buildAggregator, @@ -90,7 +90,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnFloatBufferAggregatorWrapper( selector, SimpleFloatAggregatorFactory.this::buildBufferAggregator, @@ -121,7 +121,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java index c4bc5307ed48..c8583a127a96 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java @@ -79,7 +79,7 @@ public SimpleLongAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnLongAggregatorWrapper( selector, SimpleLongAggregatorFactory.this::buildAggregator, @@ -96,7 +96,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { return new ObjectColumnLongBufferAggregatorWrapper( selector, SimpleLongAggregatorFactory.this::buildBufferAggregator, @@ -127,7 +127,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java new file mode 100644 index 000000000000..38960d1d8109 --- /dev/null +++ b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import com.google.common.collect.ImmutableMap; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.util.Map; + +public class TypeCastSelectorsTest extends InitializedNullHandlingTest +{ + private final ColumnSelectorFactory testColumnSelectorFactory = new TestColumnSelectorFactory( + RowSignature.builder() + .add("x", ColumnType.STRING) + .add("y", ColumnType.STRING) + .add("z", ColumnType.STRING) + .build(), + ImmutableMap.builder() + .put("x", "12.3") + .put("y", "abc") + .build() // z is null + ); + + @Test + public void test_readXAsLong() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.LONG); + + Assert.assertEquals(12L, selector.getLong()); + Assert.assertEquals(12d, selector.getDouble(), 0); + Assert.assertEquals(12f, selector.getFloat(), 0); + Assert.assertFalse(selector.isNull()); + Assert.assertEquals(12L, selector.getObject()); + } + + @Test + public void test_readXAsDouble() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.DOUBLE); + + Assert.assertEquals(12L, selector.getLong()); + Assert.assertEquals(12.3d, selector.getDouble(), 0); + Assert.assertEquals(12.3f, selector.getFloat(), 0); + Assert.assertFalse(selector.isNull()); + Assert.assertEquals(12.3d, selector.getObject()); + } + + @Test + public void test_readXAsFloat() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.FLOAT); + + Assert.assertEquals(12L, selector.getLong()); + Assert.assertEquals(12.3d, selector.getDouble(), 0.001); + Assert.assertEquals(12.3f, selector.getFloat(), 0); + Assert.assertFalse(selector.isNull()); + Assert.assertEquals(12.3f, selector.getObject()); + } + + @Test + public void test_readYAsLong() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.LONG); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readYAsDouble() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.DOUBLE); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readYAsFloat() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.FLOAT); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readZAsLong() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.LONG); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readZAsDouble() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.DOUBLE); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readZAsFloat() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.FLOAT); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + /** + * Implementation that returns a fixed value per column from {@link ColumnValueSelector#getObject()}. Other + * methods, such as {@link ColumnValueSelector#getLong()} throw exceptions. This is meant to help validate + * that those other methods are *not* called. + */ + private static class TestColumnSelectorFactory implements ColumnSelectorFactory + { + private final RowSignature signature; + private final Map columnValues; + + public TestColumnSelectorFactory(final RowSignature signature, final Map columnValues) + { + this.signature = signature; + this.columnValues = columnValues; + } + + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + { + throw new UnsupportedOperationException(); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) + { + return new ColumnValueSelector() + { + @Override + public double getDouble() + { + throw new UnsupportedOperationException("Should not be called"); + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("Should not be called"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("Should not be called"); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + throw new UnsupportedOperationException("Should not be called"); + } + + @Override + public boolean isNull() + { + throw new UnsupportedOperationException("Should not be called"); + } + + @Nullable + @Override + public Object getObject() + { + return columnValues.get(columnName); + } + + @Override + public Class classOfObject() + { + throw new UnsupportedOperationException("Should not be called"); + } + }; + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return signature.getColumnCapabilities(column); + } + } +} From f3aea170f59d5c9ab6a119597259f5ead6de192f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 4 Sep 2024 09:49:28 -0700 Subject: [PATCH 2/7] Use ExprEval.ofType rather than bestEffortOf. Add casting for arrays too. --- ...ssignmentWithValidationDisabled_select.sql | 2 +- .../field/NumericArrayFieldSelector.java | 10 +- .../frame/field/NumericArrayFieldWriter.java | 22 +--- .../druid/frame/write/FrameWriterUtils.java | 55 +-------- .../ObjectToDoubleColumnValueSelector.java | 25 ++-- .../ObjectToExprEvalColumnValueSelector.java | 110 ++++++++++++++++++ .../ObjectToFloatColumnValueSelector.java | 23 +++- .../cast/ObjectToLongColumnValueSelector.java | 23 +++- .../frame/write/cast/TypeCastSelectors.java | 44 +++++-- .../NumericArrayFrameColumnWriter.java | 11 +- .../org/apache/druid/math/expr/ExprEval.java | 27 ++--- .../query/aggregation/AggregatorUtil.java | 2 +- .../SimpleDoubleAggregatorFactory.java | 6 +- .../SimpleFloatAggregatorFactory.java | 6 +- .../SimpleLongAggregatorFactory.java | 6 +- .../field/StringArrayFieldWriterTest.java | 5 +- .../frame/field/StringFieldReaderTest.java | 5 +- .../write/cast/TypeCastSelectorsTest.java | 78 +++++++++++++ 18 files changed, 323 insertions(+), 137 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java diff --git a/integration-tests-ex/cases/src/test/resources/catalog/incompatibleTypeAssignmentWithValidationDisabled_select.sql b/integration-tests-ex/cases/src/test/resources/catalog/incompatibleTypeAssignmentWithValidationDisabled_select.sql index 4b6fafdae916..ef3e474736cd 100644 --- a/integration-tests-ex/cases/src/test/resources/catalog/incompatibleTypeAssignmentWithValidationDisabled_select.sql +++ b/integration-tests-ex/cases/src/test/resources/catalog/incompatibleTypeAssignmentWithValidationDisabled_select.sql @@ -4,7 +4,7 @@ "expectedResults": [ { "__time": 1672058096000, - "double_col": 0.0 + "double_col": null } ] } diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldSelector.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldSelector.java index f15361d47ea1..f1bcb2e5c705 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldSelector.java @@ -39,7 +39,7 @@ * * @param Type of the individual array elements */ -public abstract class NumericArrayFieldSelector implements ColumnValueSelector +public abstract class NumericArrayFieldSelector implements ColumnValueSelector { /** * Memory containing the serialized values of the array @@ -81,15 +81,15 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Nullable @Override - public Object getObject() + public Object[] getObject() { return computeCurrentArray(); } @Override - public Class classOfObject() + public Class classOfObject() { - return Object.class; + return Object[].class; } @Override @@ -131,7 +131,7 @@ public boolean isNull() public abstract int getIndividualFieldSize(); @Nullable - private Number[] computeCurrentArray() + private Object[] computeCurrentArray() { final long fieldPosition = fieldPointer.position(); final long fieldLength = fieldPointer.length(); diff --git a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java index e220d6bdc519..8096e323dc32 100644 --- a/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/field/NumericArrayFieldWriter.java @@ -21,12 +21,10 @@ import org.apache.datasketches.memory.WritableMemory; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import javax.annotation.Nullable; -import java.util.List; import java.util.concurrent.atomic.AtomicInteger; /** @@ -128,7 +126,7 @@ public NumericArrayFieldWriter(final ColumnValueSelector selector, NumericFieldW @Override public long writeTo(WritableMemory memory, long position, long maxSize) { - Object row = selector.getObject(); + final Object[] row = (Object[]) selector.getObject(); if (row == null) { int requiredSize = Byte.BYTES; if (requiredSize > maxSize) { @@ -137,18 +135,6 @@ public long writeTo(WritableMemory memory, long position, long maxSize) memory.putByte(position, NULL_ROW); return requiredSize; } else { - - List list = FrameWriterUtils.getNumericArrayFromObject(row); - - if (list == null) { - int requiredSize = Byte.BYTES; - if (requiredSize > maxSize) { - return -1; - } - memory.putByte(position, NULL_ROW); - return requiredSize; - } - // Create a columnValueSelector to write the individual elements re-using the NumericFieldWriter AtomicInteger index = new AtomicInteger(0); ColumnValueSelector columnValueSelector = new ColumnValueSelector() @@ -199,7 +185,7 @@ public boolean isNull() @Override public Number getObject() { - return list.get(index.get()); + return (Number) row[index.get()]; } @Override @@ -215,7 +201,7 @@ public Class classOfObject() // Next [(1 + Numeric Size) x Number of elements of array] bytes are reserved for the elements of the array and // their null markers // Last byte is reserved for array termination - int requiredSize = Byte.BYTES + (writer.getNumericSizeBytes() + Byte.BYTES) * list.size() + Byte.BYTES; + int requiredSize = Byte.BYTES + (writer.getNumericSizeBytes() + Byte.BYTES) * row.length + Byte.BYTES; if (requiredSize > maxSize) { return -1; @@ -225,7 +211,7 @@ public Class classOfObject() memory.putByte(position + offset, NON_NULL_ROW); offset += Byte.BYTES; - for (; index.get() < list.size(); index.incrementAndGet()) { + for (; index.get() < row.length; index.incrementAndGet()) { writer.writeTo( memory, position + offset, diff --git a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java index d857cf33e03c..9ba2b29fe51b 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java +++ b/processing/src/main/java/org/apache/druid/frame/write/FrameWriterUtils.java @@ -144,60 +144,16 @@ public static List getUtf8ByteBuffersFromStringArraySelector( @SuppressWarnings("rawtypes") final BaseObjectColumnValueSelector selector ) { - Object row = selector.getObject(); + final Object[] row = (Object[]) selector.getObject(); if (row == null) { return null; - } else if (row instanceof String) { - return Collections.singletonList(getUtf8ByteBufferFromString((String) row)); - } - - final List retVal = new ArrayList<>(); - if (row instanceof List) { - for (int i = 0; i < ((List) row).size(); i++) { - retVal.add(getUtf8ByteBufferFromString(((List) row).get(i))); - } - } else if (row instanceof Object[]) { - for (Object value : (Object[]) row) { - retVal.add(getUtf8ByteBufferFromString((String) value)); - } } else { - throw new ISE("Unexpected type %s found", row.getClass().getName()); - } - return retVal; - } - - /** - * Retrieves a numeric list from a Java object, given that the object is an instance of something that can be returned - * from {@link ColumnValueSelector#getObject()} of valid numeric array selectors representations - * - * While {@link BaseObjectColumnValueSelector} specifies that only instances of {@code Object[]} can be returned from - * the numeric array selectors, this method also handles a few more cases which can be encountered if the selector is - * directly implemented on top of the group by stuff - */ - @Nullable - public static List getNumericArrayFromObject(Object row) - { - if (row == null) { - return null; - } else if (row instanceof Number) { - return Collections.singletonList((Number) row); - } - - final List retVal = new ArrayList<>(); - - if (row instanceof List) { - for (int i = 0; i < ((List) row).size(); i++) { - retVal.add((Number) ((List) row).get(i)); - } - } else if (row instanceof Object[]) { - for (Object value : (Object[]) row) { - retVal.add((Number) value); + final List retVal = new ArrayList<>(); + for (Object value : row) { + retVal.add(getUtf8ByteBufferFromString((String) value)); } - } else { - throw new ISE("Unexpected type %s found", row.getClass().getName()); + return retVal; } - - return retVal; } /** @@ -275,6 +231,7 @@ public static void copyByteBufferToMemoryDisallowingNullBytes( * Whenever "allowNullBytes" is true, "removeNullBytes" must be false. Use the methods {@link #copyByteBufferToMemoryAllowingNullBytes} * and {@link #copyByteBufferToMemoryDisallowingNullBytes} to copy between the memory *

+ * * @throws InvalidNullByteException if "allowNullBytes" and "removeNullBytes" is false and a null byte is encountered */ private static void copyByteBufferToMemory( diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java index 59ef1183cf10..37b236edba44 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java @@ -21,15 +21,24 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; +/** + * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric + * accessors based on that object value. The object is interpreted as a double. + */ public class ObjectToDoubleColumnValueSelector implements ColumnValueSelector { - private final ColumnValueSelector objectSelector; + private final ColumnValueSelector selector; + + @Nullable + private final ExpressionType selectorType; @Nullable private final RowIdSupplier rowIdSupplier; @@ -39,11 +48,13 @@ public class ObjectToDoubleColumnValueSelector implements ColumnValueSelector objectSelector, + final ColumnValueSelector selector, + @Nullable final ColumnType selectorType, @Nullable final RowIdSupplier rowIdSupplier ) { - this.objectSelector = objectSelector; + this.selector = selector; + this.selectorType = ExpressionType.fromColumnType(selectorType); this.rowIdSupplier = rowIdSupplier; } @@ -90,7 +101,7 @@ public Class classOfObject() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("objectSelector", objectSelector); + inspector.visit("selector", selector); inspector.visit("rowIdSupplier", rowIdSupplier); } @@ -114,13 +125,11 @@ private Double computeIfNeeded() @Nullable private Double eval() { - final Object obj = objectSelector.getObject(); + final Object obj = selector.getObject(); if (obj == null) { return null; - } else if (obj instanceof Number) { - return ((Number) obj).doubleValue(); } else { - final ExprEval eval = ExprEval.bestEffortOf(obj); + final ExprEval eval = ExprEval.ofType(selectorType, obj); return eval.isNumericNull() ? null : eval.asDouble(); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java new file mode 100644 index 000000000000..7470947b700c --- /dev/null +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.frame.write.cast; + +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnType; + +import javax.annotation.Nullable; + +/** + * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()}, and casts that value using + * {@link ExprEval}. The object is interpreted using {@link ExprEval#ofType}. + */ +public class ObjectToExprEvalColumnValueSelector implements ColumnValueSelector +{ + private final ColumnValueSelector selector; + @Nullable + private final ExpressionType selectorType; + private final ExpressionType desiredType; + @Nullable + private final RowIdSupplier rowIdSupplier; + + public ObjectToExprEvalColumnValueSelector( + final ColumnValueSelector selector, + @Nullable final ColumnType selectorType, + final ColumnType desiredType, + @Nullable final RowIdSupplier rowIdSupplier + ) + { + this.selector = selector; + this.selectorType = ExpressionType.fromColumnType(selectorType); + this.desiredType = ExpressionType.fromColumnType(desiredType); + this.rowIdSupplier = rowIdSupplier; + } + + @Override + public double getDouble() + { + return eval().asDouble(); + } + + @Override + public float getFloat() + { + return (float) eval().asDouble(); + } + + @Override + public long getLong() + { + return eval().asLong(); + } + + @Override + public boolean isNull() + { + return eval().isNumericNull(); + } + + @Nullable + @Override + public Object getObject() + { + return eval().value(); + } + + @Override + public Class classOfObject() + { + return Object.class; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + inspector.visit("rowIdSupplier", rowIdSupplier); + } + + private ExprEval eval() + { + final Object obj = selector.getObject(); + if (obj == null) { + return ExprEval.of(null); + } else { + return ExprEval.ofType(selectorType, obj).castTo(desiredType); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java index d4a1ad4dfe44..7cfa67ff2d90 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java @@ -21,15 +21,24 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; +/** + * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric + * accessors based on that object value. The object is interpreted as a float. + */ public class ObjectToFloatColumnValueSelector implements ColumnValueSelector { - private final ColumnValueSelector objectSelector; + private final ColumnValueSelector selector; + + @Nullable + private final ExpressionType selectorType; @Nullable private final RowIdSupplier rowIdSupplier; @@ -39,11 +48,13 @@ public class ObjectToFloatColumnValueSelector implements ColumnValueSelector objectSelector, + final ColumnValueSelector selector, + @Nullable final ColumnType selectorType, @Nullable final RowIdSupplier rowIdSupplier ) { - this.objectSelector = objectSelector; + this.selector = selector; + this.selectorType = ExpressionType.fromColumnType(selectorType); this.rowIdSupplier = rowIdSupplier; } @@ -90,7 +101,7 @@ public Class classOfObject() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("objectSelector", objectSelector); + inspector.visit("selector", selector); inspector.visit("rowIdSupplier", rowIdSupplier); } @@ -114,13 +125,13 @@ private Float computeIfNeeded() @Nullable private Float eval() { - final Object obj = objectSelector.getObject(); + final Object obj = selector.getObject(); if (obj == null) { return null; } else if (obj instanceof Number) { return ((Number) obj).floatValue(); } else { - final ExprEval eval = ExprEval.bestEffortOf(obj); + final ExprEval eval = ExprEval.ofType(selectorType, obj); return eval.isNumericNull() ? null : (float) eval.asDouble(); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java index 3e18e1eb63d7..245cacf8902a 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java @@ -21,15 +21,24 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; +/** + * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric + * accessors based on that object value. The object is interpreted as a long. + */ public class ObjectToLongColumnValueSelector implements ColumnValueSelector { - private final ColumnValueSelector objectSelector; + private final ColumnValueSelector selector; + + @Nullable + private final ExpressionType selectorType; @Nullable private final RowIdSupplier rowIdSupplier; @@ -39,11 +48,13 @@ public class ObjectToLongColumnValueSelector implements ColumnValueSelector objectSelector, + final ColumnValueSelector selector, + @Nullable final ColumnType selectorType, @Nullable final RowIdSupplier rowIdSupplier ) { - this.objectSelector = objectSelector; + this.selector = selector; + this.selectorType = ExpressionType.fromColumnType(selectorType); this.rowIdSupplier = rowIdSupplier; } @@ -90,7 +101,7 @@ public Class classOfObject() @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - inspector.visit("objectSelector", objectSelector); + inspector.visit("selector", selector); inspector.visit("rowIdSupplier", rowIdSupplier); } @@ -114,13 +125,13 @@ private Long computeIfNeeded() @Nullable private Long eval() { - final Object obj = objectSelector.getObject(); + final Object obj = selector.getObject(); if (obj == null) { return null; } else if (obj instanceof Number) { return ((Number) obj).longValue(); } else { - final ExprEval eval = ExprEval.bestEffortOf(obj); + final ExprEval eval = ExprEval.ofType(selectorType, obj); return eval.isNumericNull() ? null : eval.asLong(); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java index 3f68c75ec37d..a9699939a608 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -20,14 +20,16 @@ package org.apache.druid.frame.write.cast; import org.apache.druid.error.DruidException; -import org.apache.druid.query.aggregation.AggregatorUtil; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; +import javax.annotation.Nullable; + public class TypeCastSelectors { /** @@ -46,26 +48,52 @@ public static ColumnValueSelector makeColumnValueSelector( ) { final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(column); + final ColumnCapabilities selectorCapabilities = columnSelectorFactory.getColumnCapabilities(column); + + return wrapColumnValueSelectorIfNeeded( + selector, + selectorCapabilities, + columnSelectorFactory.getRowIdSupplier(), + desiredType + ); + } + public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( + final ColumnValueSelector selector, + @Nullable final ColumnCapabilities selectorCapabilities, + @Nullable final RowIdSupplier rowIdSupplier, + final ColumnType desiredType + ) + { if (desiredType.is(ValueType.STRING)) { - throw DruidException.defensive("Unexpected type[%s]", column); - } else if (desiredType.isNumeric() && AggregatorUtil.shouldUseGetObjectForNumbers(column, columnSelectorFactory)) { - final RowIdSupplier rowIdSupplier = columnSelectorFactory.getRowIdSupplier(); + throw DruidException.defensive("Type[%s] should be read using a DimensionSelector", desiredType); + } else if (desiredType.isNumeric() + && (selectorCapabilities == null || !selectorCapabilities.isNumeric())) { + // When capabilities are unknown, or known to be non-numeric, fall back to getObject() and explicit typecasting. + // This avoids using primitive numeric accessors (getLong / getDouble / getFloat / isNull) on a selector that + // may not support them. + final ColumnType selectorType = selectorCapabilities != null ? selectorCapabilities.toColumnType() : null; switch (desiredType.getType()) { case LONG: - return new ObjectToLongColumnValueSelector(selector, rowIdSupplier); + return new ObjectToLongColumnValueSelector(selector, selectorType, rowIdSupplier); case DOUBLE: - return new ObjectToDoubleColumnValueSelector(selector, rowIdSupplier); + return new ObjectToDoubleColumnValueSelector(selector, selectorType, rowIdSupplier); case FLOAT: - return new ObjectToFloatColumnValueSelector(selector, rowIdSupplier); + return new ObjectToFloatColumnValueSelector(selector, selectorType, rowIdSupplier); default: - throw DruidException.defensive("No implementation for desiredType[%s]", desiredType); + throw DruidException.defensive("Unexpected numeric desiredType[%s]", desiredType); } + } else if (desiredType.isArray() + && (selectorCapabilities == null || !selectorCapabilities.toColumnType().equals(desiredType))) { + // When reading arrays, wrap if the underlying type does not match the desired array type. + final ColumnType columnType = selectorCapabilities != null ? selectorCapabilities.toColumnType() : null; + return new ObjectToExprEvalColumnValueSelector(selector, columnType, desiredType, rowIdSupplier); } else { + // OK to return the original selector. return selector; } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/columnar/NumericArrayFrameColumnWriter.java b/processing/src/main/java/org/apache/druid/frame/write/columnar/NumericArrayFrameColumnWriter.java index 619bf53b8d3d..432ce8b97aa3 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/columnar/NumericArrayFrameColumnWriter.java +++ b/processing/src/main/java/org/apache/druid/frame/write/columnar/NumericArrayFrameColumnWriter.java @@ -24,11 +24,8 @@ import org.apache.druid.frame.allocation.AppendableMemory; import org.apache.druid.frame.allocation.MemoryAllocator; import org.apache.druid.frame.allocation.MemoryRange; -import org.apache.druid.frame.write.FrameWriterUtils; import org.apache.druid.segment.ColumnValueSelector; -import java.util.List; - /** * Parent class for the family of writers writing numeric arrays in columnar frames. Since the numeric primitives are * fixed width, we don't need to store the width of each element. The memory layout of a column written by this writer @@ -119,8 +116,8 @@ public NumericArrayFrameColumnWriter( @Override public boolean addSelection() { - List numericArray = FrameWriterUtils.getNumericArrayFromObject(selector.getObject()); - int rowLength = numericArray == null ? 0 : numericArray.size(); + final Object[] row = (Object[]) selector.getObject(); + int rowLength = row == null ? 0 : row.length; // Begin memory allocations before writing if ((long) lastCumulativeRowLength + rowLength > Integer.MAX_VALUE) { @@ -142,7 +139,7 @@ public boolean addSelection() final MemoryRange rowLengthsCursor = cumulativeRowLengths.cursor(); - if (numericArray == null) { + if (row == null) { rowLengthsCursor.memory().putInt(rowLengthsCursor.start(), -(lastCumulativeRowLength + rowLength) - 1); } else { rowLengthsCursor.memory().putInt(rowLengthsCursor.start(), lastCumulativeRowLength + rowLength); @@ -155,7 +152,7 @@ public boolean addSelection() final MemoryRange rowDataCursor = rowLength > 0 ? rowData.cursor() : null; for (int i = 0; i < rowLength; ++i) { - final Number element = numericArray.get(i); + final Number element = (Number) row[i]; final long memoryOffset = rowDataCursor.start() + ((long) elementSizeBytes() * i); if (element == null) { rowNullityDataCursor.memory() diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 4bde15192217..bf3af4a65439 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -1153,21 +1153,18 @@ public final ExprEval castTo(ExpressionType castTo) if (value == null) { return new ArrayExprEval(castTo, null); } - final Number number = computeNumber(); - switch (castTo.getElementType().getType()) { - case DOUBLE: - return ExprEval.ofDoubleArray( - new Object[]{number == null ? null : number.doubleValue()} - ); - case LONG: - return ExprEval.ofLongArray( - new Object[]{number == null ? null : number.longValue()} - ); - case STRING: - return ExprEval.ofStringArray(new Object[]{value}); - default: - ExpressionType elementType = (ExpressionType) castTo.getElementType(); - return new ArrayExprEval(castTo, new Object[]{castTo(elementType).value()}); + ExprType type = castTo.getElementType().getType(); + if (type == ExprType.DOUBLE) { + final Number number = computeNumber(); + return ExprEval.ofDoubleArray(new Object[]{number == null ? null : number.doubleValue()}); + } else if (type == ExprType.LONG) { + final Number number = computeNumber(); + return ExprEval.ofLongArray(new Object[]{number == null ? null : number.longValue()}); + } else if (type == ExprType.STRING) { + return ExprEval.ofStringArray(new Object[]{value}); + } else { + ExpressionType elementType = (ExpressionType) castTo.getElementType(); + return new ArrayExprEval(castTo, new Object[]{castTo(elementType).value()}); } case COMPLEX: if (ExpressionType.NESTED_DATA.equals(castTo)) { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index d4bb909d8014..c4c9a7875ef0 100755 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -440,7 +440,7 @@ public static Supplier getSimpleAggregatorCacheKeySupplier( * @param fieldName field name, or null if the aggregator is expression-based * @param columnSelectorFactory column selector factory */ - public static boolean shouldUseGetObjectForNumbers( + public static boolean shouldUseObjectColumnAggregatorWrapper( @Nullable final String fieldName, final ColumnSelectorFactory columnSelectorFactory ) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java index 5c60572ceb2e..0fa96e226eae 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleDoubleAggregatorFactory.java @@ -83,7 +83,7 @@ public SimpleDoubleAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnDoubleAggregatorWrapper( selector, SimpleDoubleAggregatorFactory.this::buildAggregator, @@ -100,7 +100,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnDoubleBufferAggregatorWrapper( selector, SimpleDoubleAggregatorFactory.this::buildBufferAggregator, @@ -131,7 +131,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java index bcd1fc18dafe..5268c454ce1b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleFloatAggregatorFactory.java @@ -73,7 +73,7 @@ public SimpleFloatAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnFloatAggregatorWrapper( selector, SimpleFloatAggregatorFactory.this::buildAggregator, @@ -90,7 +90,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnFloatBufferAggregatorWrapper( selector, SimpleFloatAggregatorFactory.this::buildBufferAggregator, @@ -121,7 +121,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java index c8583a127a96..c4bc5307ed48 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/SimpleLongAggregatorFactory.java @@ -79,7 +79,7 @@ public SimpleLongAggregatorFactory( @Override protected Aggregator factorize(ColumnSelectorFactory metricFactory, ColumnValueSelector selector) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnLongAggregatorWrapper( selector, SimpleLongAggregatorFactory.this::buildAggregator, @@ -96,7 +96,7 @@ protected BufferAggregator factorizeBuffered( ColumnValueSelector selector ) { - if (AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, metricFactory)) { + if (AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, metricFactory)) { return new ObjectColumnLongBufferAggregatorWrapper( selector, SimpleLongAggregatorFactory.this::buildBufferAggregator, @@ -127,7 +127,7 @@ protected VectorValueSelector vectorSelector(VectorColumnSelectorFactory columnS @Override protected boolean useGetObject(ColumnSelectorFactory columnSelectorFactory) { - return AggregatorUtil.shouldUseGetObjectForNumbers(fieldName, columnSelectorFactory); + return AggregatorUtil.shouldUseObjectColumnAggregatorWrapper(fieldName, columnSelectorFactory); } @Override diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java index 6aba25ddf22c..44d8ed5a26d4 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringArrayFieldWriterTest.java @@ -50,7 +50,7 @@ public class StringArrayFieldWriterTest extends InitializedNullHandlingTest public MockitoRule mockitoRule = MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS); @Mock - public BaseObjectColumnValueSelector> selector; + public BaseObjectColumnValueSelector selector; private WritableMemory memory; private FieldWriter fieldWriter; @@ -115,7 +115,8 @@ private void doTest(@Nullable final List values) private void mockSelector(@Nullable final List values) { - Mockito.when(selector.getObject()).thenReturn(values); + final Object[] arr = values == null ? null : values.toArray(); + Mockito.when(selector.getObject()).thenReturn(arr); } private long writeToMemory(final FieldWriter writer) diff --git a/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java b/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java index b0f589ed4804..200c469269ff 100644 --- a/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java +++ b/processing/src/test/java/org/apache/druid/frame/field/StringFieldReaderTest.java @@ -60,7 +60,7 @@ public class StringFieldReaderTest extends InitializedNullHandlingTest public MockitoRule mockitoRule = MockitoJUnit.rule().strictness(Strictness.STRICT_STUBS); @Mock - public BaseObjectColumnValueSelector> writeSelector; + public BaseObjectColumnValueSelector writeSelector; private WritableMemory memory; private FieldWriter fieldWriter; @@ -277,7 +277,8 @@ public void test_makeDimensionSelector_multiString_withExtractionFn() private void writeToMemory(@Nullable final List values) { - Mockito.when(writeSelector.getObject()).thenReturn(values); + final Object[] arr = values == null ? null : values.toArray(); + Mockito.when(writeSelector.getObject()).thenReturn(arr); if (fieldWriter.writeTo(memory, MEMORY_POSITION, memory.getCapacity() - MEMORY_POSITION) < 0) { throw new ISE("Could not write"); diff --git a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java index 38960d1d8109..30a9608461b6 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java @@ -88,6 +88,32 @@ public void test_readXAsFloat() Assert.assertEquals(12.3f, selector.getObject()); } + @Test + public void test_readXAsLongArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.LONG_ARRAY); + + Assert.assertEquals(12L, selector.getLong()); + Assert.assertEquals(12.0d, selector.getDouble(), 0.001); + Assert.assertEquals(12.0f, selector.getFloat(), 0); + Assert.assertFalse(selector.isNull()); + Assert.assertArrayEquals(new Object[]{12L}, (Object[]) selector.getObject()); + } + + @Test + public void test_readXAsStringArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.STRING_ARRAY); + + Assert.assertEquals(12L, selector.getLong()); + Assert.assertEquals(12.3d, selector.getDouble(), 0.001); + Assert.assertEquals(12.3f, selector.getFloat(), 0); + Assert.assertFalse(selector.isNull()); + Assert.assertArrayEquals(new Object[]{"12.3"}, (Object[]) selector.getObject()); + } + @Test public void test_readYAsLong() { @@ -127,6 +153,32 @@ public void test_readYAsFloat() Assert.assertNull(selector.getObject()); } + @Test + public void test_readYAsLongArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.LONG_ARRAY); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertArrayEquals(new Object[]{null}, (Object[]) selector.getObject()); + } + + @Test + public void test_readYAsStringArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.STRING_ARRAY); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertArrayEquals(new Object[]{"abc"}, (Object[]) selector.getObject()); + } + @Test public void test_readZAsLong() { @@ -166,6 +218,32 @@ public void test_readZAsFloat() Assert.assertNull(selector.getObject()); } + @Test + public void test_readZAsLongArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.LONG_ARRAY); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readZAsStringArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.STRING_ARRAY); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + /** * Implementation that returns a fixed value per column from {@link ColumnValueSelector#getObject()}. Other * methods, such as {@link ColumnValueSelector#getLong()} throw exceptions. This is meant to help validate From e224671f194ca461e71e63e218e08e01b9b9575a Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 4 Sep 2024 15:09:55 -0700 Subject: [PATCH 3/7] Coerce in RowBasedColumnSelectorFactory when desired type doesn't match the object from the row adapter. --- ...ava => ObjectEvalColumnValueSelector.java} | 14 ++---- .../frame/write/cast/TypeCastSelectors.java | 2 +- .../org/apache/druid/math/expr/ExprEval.java | 14 ++++-- .../RowBasedColumnSelectorFactory.java | 46 +++++++++++++++++-- 4 files changed, 56 insertions(+), 20 deletions(-) rename processing/src/main/java/org/apache/druid/frame/write/cast/{ObjectToExprEvalColumnValueSelector.java => ObjectEvalColumnValueSelector.java} (86%) diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java similarity index 86% rename from processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java rename to processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java index 7470947b700c..b814250865cf 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToExprEvalColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java @@ -29,10 +29,10 @@ import javax.annotation.Nullable; /** - * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()}, and casts that value using - * {@link ExprEval}. The object is interpreted using {@link ExprEval#ofType}. + * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()}, interprets that value using + * {@link ExprEval#ofType}, and casts it using {@link ExprEval#castTo}. */ -public class ObjectToExprEvalColumnValueSelector implements ColumnValueSelector +public class ObjectEvalColumnValueSelector implements ColumnValueSelector { private final ColumnValueSelector selector; @Nullable @@ -41,7 +41,7 @@ public class ObjectToExprEvalColumnValueSelector implements ColumnValueSelector< @Nullable private final RowIdSupplier rowIdSupplier; - public ObjectToExprEvalColumnValueSelector( + public ObjectEvalColumnValueSelector( final ColumnValueSelector selector, @Nullable final ColumnType selectorType, final ColumnType desiredType, @@ -101,10 +101,6 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) private ExprEval eval() { final Object obj = selector.getObject(); - if (obj == null) { - return ExprEval.of(null); - } else { - return ExprEval.ofType(selectorType, obj).castTo(desiredType); - } + return ExprEval.ofType(selectorType, obj).castTo(desiredType); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java index a9699939a608..4d3c8f3a7165 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -91,7 +91,7 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( && (selectorCapabilities == null || !selectorCapabilities.toColumnType().equals(desiredType))) { // When reading arrays, wrap if the underlying type does not match the desired array type. final ColumnType columnType = selectorCapabilities != null ? selectorCapabilities.toColumnType() : null; - return new ObjectToExprEvalColumnValueSelector(selector, columnType, desiredType, rowIdSupplier); + return new ObjectEvalColumnValueSelector(selector, columnType, desiredType, rowIdSupplier); } else { // OK to return the original selector. return selector; diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index bf3af4a65439..283bc651af6e 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -406,7 +406,7 @@ public static ExprEval bestEffortArray(@Nullable List theList) public static ExprEval bestEffortOf(@Nullable Object val) { if (val == null) { - return new StringExprEval(null); + return StringExprEval.OF_NULL; } if (val instanceof ExprEval) { return (ExprEval) val; @@ -522,7 +522,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object value) { if (type == null) { - return bestEffortOf(value); + return StringExprEval.OF_NULL; } switch (type.getType()) { case STRING: @@ -1145,10 +1145,13 @@ public final ExprEval castTo(ExpressionType castTo) switch (castTo.getType()) { case DOUBLE: return ExprEval.ofDouble(computeNumber()); + case LONG: return ExprEval.ofLong(computeNumber()); + case STRING: return this; + case ARRAY: if (value == null) { return new ArrayExprEval(castTo, null); @@ -1162,10 +1165,11 @@ public final ExprEval castTo(ExpressionType castTo) return ExprEval.ofLongArray(new Object[]{number == null ? null : number.longValue()}); } else if (type == ExprType.STRING) { return ExprEval.ofStringArray(new Object[]{value}); - } else { - ExpressionType elementType = (ExpressionType) castTo.getElementType(); - return new ArrayExprEval(castTo, new Object[]{castTo(elementType).value()}); } + + ExpressionType elementType = (ExpressionType) castTo.getElementType(); + return new ArrayExprEval(castTo, new Object[]{castTo(elementType).value()}); + case COMPLEX: if (ExpressionType.NESTED_DATA.equals(castTo)) { return new NestedDataExprEval(value); diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index 43ae6ae14646..ec1e28eab477 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -22,6 +22,8 @@ import com.google.common.base.Preconditions; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Rows; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.DruidObjectPredicate; @@ -32,7 +34,6 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.RangeIndexedInts; import org.apache.druid.segment.nested.StructuredData; @@ -445,8 +446,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } else { final Function columnFunction = adapter.columnFunction(columnName); final ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(columnName); - final ValueType numberType = - capabilities != null && capabilities.getType().isNumeric() ? capabilities.getType() : null; + final ExpressionType desiredType = ExpressionType.fromColumnType(capabilities); return new ColumnValueSelector() { @@ -512,7 +512,7 @@ private void updateCurrentValue() { if (rowIdSupplier == null || rowIdSupplier.getRowId() != currentValueId) { try { - currentValue = columnFunction.apply(rowSupplier.get()); + currentValue = coerce(columnFunction.apply(rowSupplier.get())); } catch (Throwable e) { currentValueId = RowIdSupplier.INIT; @@ -533,7 +533,12 @@ private void updateCurrentValueAsNumber() try { final Object valueToUse = currentValue instanceof StructuredData ? ((StructuredData) currentValue).getValue() : currentValue; - currentValueAsNumber = Rows.objectToNumber(columnName, valueToUse, numberType, throwParseExceptions); + currentValueAsNumber = Rows.objectToNumber( + columnName, + valueToUse, + capabilities != null && capabilities.isNumeric() ? capabilities.getType() : null, + throwParseExceptions + ); } catch (Throwable e) { currentValueAsNumberId = RowIdSupplier.INIT; @@ -545,6 +550,37 @@ private void updateCurrentValueAsNumber() } } } + + @Nullable + private Object coerce(@Nullable final Object obj) + { + if (needsCoerce(obj)) { + return ExprEval.bestEffortOf(obj).castTo(desiredType).value(); + } else { + return obj; + } + } + + private boolean needsCoerce(@Nullable final Object obj) + { + if (obj == null || desiredType == null) { + return false; + } + + switch (desiredType.getType()) { + case LONG: + case DOUBLE: + return !(obj instanceof Number); + case STRING: + return !(obj instanceof String || obj instanceof List || obj instanceof Object[]); + case ARRAY: + return !(obj instanceof Object[]); + case COMPLEX: + return false; + default: + return true; + } + } }; } } From 5f58b59ce287f43daef9e30c82f42a673a7bf78d Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 4 Sep 2024 15:50:52 -0700 Subject: [PATCH 4/7] Fix error. --- .../src/main/java/org/apache/druid/math/expr/ExprEval.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index 283bc651af6e..dbccc6d9fe87 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -522,7 +522,7 @@ public static ExprEval bestEffortOf(@Nullable Object val) public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object value) { if (type == null) { - return StringExprEval.OF_NULL; + return bestEffortOf(value); } switch (type.getType()) { case STRING: From f1985d8b5276442a5bb82be31d1f047978ebb68a Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 4 Sep 2024 22:52:49 -0700 Subject: [PATCH 5/7] Undo RowBasedColumnSelectorFactory change. Account for it in TypeCastSelectors. --- .../GroupByPostShuffleFrameProcessor.java | 2 +- ... => ObjectToArrayColumnValueSelector.java} | 40 +++-- .../ObjectToFloatColumnValueSelector.java | 138 ------------------ .../cast/ObjectToLongColumnValueSelector.java | 138 ------------------ ...=> ObjectToNumberColumnValueSelector.java} | 45 +++--- .../frame/write/cast/TypeCastSelectors.java | 83 ++++++++--- .../RowBasedColumnSelectorFactory.java | 46 +----- .../write/cast/TypeCastSelectorsTest.java | 104 ++++++++++--- 8 files changed, 198 insertions(+), 398 deletions(-) rename processing/src/main/java/org/apache/druid/frame/write/cast/{ObjectEvalColumnValueSelector.java => ObjectToArrayColumnValueSelector.java} (67%) delete mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java delete mode 100644 processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java rename processing/src/main/java/org/apache/druid/frame/write/cast/{ObjectToDoubleColumnValueSelector.java => ObjectToNumberColumnValueSelector.java} (75%) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java index 7861a45f61d0..5b0a3ddefd2e 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java @@ -109,7 +109,7 @@ public GroupByPostShuffleFrameProcessor( RowBasedGrouperHelper.createResultRowBasedColumnSelectorFactory( query, () -> outputRow, - RowSignature.Finalization.YES + GroupByQueryKit.isFinalize(query) ? RowSignature.Finalization.YES : RowSignature.Finalization.NO ) ); } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java similarity index 67% rename from processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java rename to processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java index b814250865cf..614c761549a0 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectEvalColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java @@ -19,12 +19,13 @@ package org.apache.druid.frame.write.cast; +import org.apache.druid.error.DruidException; import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; @@ -32,63 +33,64 @@ * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()}, interprets that value using * {@link ExprEval#ofType}, and casts it using {@link ExprEval#castTo}. */ -public class ObjectEvalColumnValueSelector implements ColumnValueSelector +public class ObjectToArrayColumnValueSelector implements ColumnValueSelector { private final ColumnValueSelector selector; @Nullable - private final ExpressionType selectorType; - private final ExpressionType desiredType; + private final ColumnType desiredType; @Nullable private final RowIdSupplier rowIdSupplier; - public ObjectEvalColumnValueSelector( + public ObjectToArrayColumnValueSelector( final ColumnValueSelector selector, - @Nullable final ColumnType selectorType, final ColumnType desiredType, @Nullable final RowIdSupplier rowIdSupplier ) { this.selector = selector; - this.selectorType = ExpressionType.fromColumnType(selectorType); - this.desiredType = ExpressionType.fromColumnType(desiredType); + this.desiredType = desiredType; this.rowIdSupplier = rowIdSupplier; + + if (!desiredType.is(ValueType.ARRAY) || desiredType.getElementType() == null) { + throw DruidException.defensive("Expected array with nonnull element type, got[%s]", desiredType); + } } @Override public double getDouble() { - return eval().asDouble(); + throw DruidException.defensive("Unexpected call to getDouble on array selector"); } @Override public float getFloat() { - return (float) eval().asDouble(); + throw DruidException.defensive("Unexpected call to getFloat on array selector"); } @Override public long getLong() { - return eval().asLong(); + throw DruidException.defensive("Unexpected call to getLong on array selector"); } @Override public boolean isNull() { - return eval().isNumericNull(); + throw DruidException.defensive("Unexpected call to isNull on array selector"); } @Nullable @Override - public Object getObject() + public Object[] getObject() { - return eval().value(); + return (Object[]) TypeCastSelectors.bestEffortCoerce(selector.getObject(), desiredType); } @Override - public Class classOfObject() + public Class classOfObject() { - return Object.class; + return Object[].class; } @Override @@ -97,10 +99,4 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) inspector.visit("selector", selector); inspector.visit("rowIdSupplier", rowIdSupplier); } - - private ExprEval eval() - { - final Object obj = selector.getObject(); - return ExprEval.ofType(selectorType, obj).castTo(desiredType); - } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java deleted file mode 100644 index 7cfa67ff2d90..000000000000 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToFloatColumnValueSelector.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.frame.write.cast; - -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.RowIdSupplier; -import org.apache.druid.segment.column.ColumnType; - -import javax.annotation.Nullable; - -/** - * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric - * accessors based on that object value. The object is interpreted as a float. - */ -public class ObjectToFloatColumnValueSelector implements ColumnValueSelector -{ - private final ColumnValueSelector selector; - - @Nullable - private final ExpressionType selectorType; - - @Nullable - private final RowIdSupplier rowIdSupplier; - - @Nullable - private Float currentValue; - private long currentRowId = RowIdSupplier.INIT; - - public ObjectToFloatColumnValueSelector( - final ColumnValueSelector selector, - @Nullable final ColumnType selectorType, - @Nullable final RowIdSupplier rowIdSupplier - ) - { - this.selector = selector; - this.selectorType = ExpressionType.fromColumnType(selectorType); - this.rowIdSupplier = rowIdSupplier; - } - - @Override - public double getDouble() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_DOUBLE : n.floatValue(); - } - - @Override - public float getFloat() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_FLOAT : n.floatValue(); - } - - @Override - public long getLong() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_LONG : n.longValue(); - } - - @Override - public boolean isNull() - { - return computeIfNeeded() == null; - } - - @Nullable - @Override - public Float getObject() - { - return computeIfNeeded(); - } - - @Override - public Class classOfObject() - { - return Float.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("selector", selector); - inspector.visit("rowIdSupplier", rowIdSupplier); - } - - @Nullable - private Float computeIfNeeded() - { - if (rowIdSupplier == null) { - return eval(); - } else { - final long rowId = rowIdSupplier.getRowId(); - - if (currentRowId != rowId) { - currentValue = eval(); - currentRowId = rowId; - } - - return currentValue; - } - } - - @Nullable - private Float eval() - { - final Object obj = selector.getObject(); - if (obj == null) { - return null; - } else if (obj instanceof Number) { - return ((Number) obj).floatValue(); - } else { - final ExprEval eval = ExprEval.ofType(selectorType, obj); - return eval.isNumericNull() ? null : (float) eval.asDouble(); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java deleted file mode 100644 index 245cacf8902a..000000000000 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToLongColumnValueSelector.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.frame.write.cast; - -import org.apache.druid.common.config.NullHandling; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.RowIdSupplier; -import org.apache.druid.segment.column.ColumnType; - -import javax.annotation.Nullable; - -/** - * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric - * accessors based on that object value. The object is interpreted as a long. - */ -public class ObjectToLongColumnValueSelector implements ColumnValueSelector -{ - private final ColumnValueSelector selector; - - @Nullable - private final ExpressionType selectorType; - - @Nullable - private final RowIdSupplier rowIdSupplier; - - @Nullable - private Long currentValue; - private long currentRowId = RowIdSupplier.INIT; - - public ObjectToLongColumnValueSelector( - final ColumnValueSelector selector, - @Nullable final ColumnType selectorType, - @Nullable final RowIdSupplier rowIdSupplier - ) - { - this.selector = selector; - this.selectorType = ExpressionType.fromColumnType(selectorType); - this.rowIdSupplier = rowIdSupplier; - } - - @Override - public double getDouble() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_DOUBLE : n.doubleValue(); - } - - @Override - public float getFloat() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_FLOAT : n.floatValue(); - } - - @Override - public long getLong() - { - final Number n = computeIfNeeded(); - return n == null ? NullHandling.ZERO_LONG : n.longValue(); - } - - @Override - public boolean isNull() - { - return computeIfNeeded() == null; - } - - @Nullable - @Override - public Long getObject() - { - return computeIfNeeded(); - } - - @Override - public Class classOfObject() - { - return Long.class; - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - inspector.visit("selector", selector); - inspector.visit("rowIdSupplier", rowIdSupplier); - } - - @Nullable - private Long computeIfNeeded() - { - if (rowIdSupplier == null) { - return eval(); - } else { - final long rowId = rowIdSupplier.getRowId(); - - if (currentRowId != rowId) { - currentValue = eval(); - currentRowId = rowId; - } - - return currentValue; - } - } - - @Nullable - private Long eval() - { - final Object obj = selector.getObject(); - if (obj == null) { - return null; - } else if (obj instanceof Number) { - return ((Number) obj).longValue(); - } else { - final ExprEval eval = ExprEval.ofType(selectorType, obj); - return eval.isNumericNull() ? null : eval.asLong(); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java similarity index 75% rename from processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java rename to processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java index 37b236edba44..d35c70905b4a 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToDoubleColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java @@ -20,8 +20,7 @@ package org.apache.druid.frame.write.cast; import org.apache.druid.common.config.NullHandling; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.error.DruidException; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; @@ -31,31 +30,37 @@ /** * Wraps a {@link ColumnValueSelector}, calls {@link ColumnValueSelector#getObject()} and provides primitive numeric - * accessors based on that object value. The object is interpreted as a double. + * accessors based on that object value. */ -public class ObjectToDoubleColumnValueSelector implements ColumnValueSelector +public class ObjectToNumberColumnValueSelector implements ColumnValueSelector { private final ColumnValueSelector selector; - - @Nullable - private final ExpressionType selectorType; + private final ColumnType desiredType; @Nullable private final RowIdSupplier rowIdSupplier; @Nullable - private Double currentValue; + private Number currentValue; private long currentRowId = RowIdSupplier.INIT; - public ObjectToDoubleColumnValueSelector( + /** + * Package-private; create using {@link TypeCastSelectors#makeColumnValueSelector} or + * {@link TypeCastSelectors#wrapColumnValueSelectorIfNeeded}. + */ + ObjectToNumberColumnValueSelector( final ColumnValueSelector selector, - @Nullable final ColumnType selectorType, + final ColumnType desiredType, @Nullable final RowIdSupplier rowIdSupplier ) { this.selector = selector; - this.selectorType = ExpressionType.fromColumnType(selectorType); + this.desiredType = desiredType; this.rowIdSupplier = rowIdSupplier; + + if (!desiredType.isNumeric()) { + throw DruidException.defensive("Expected numeric type, got[%s]", desiredType); + } } @Override @@ -87,15 +92,15 @@ public boolean isNull() @Nullable @Override - public Double getObject() + public Number getObject() { return computeIfNeeded(); } @Override - public Class classOfObject() + public Class classOfObject() { - return Double.class; + return Number.class; } @Override @@ -106,7 +111,7 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } @Nullable - private Double computeIfNeeded() + private Number computeIfNeeded() { if (rowIdSupplier == null) { return eval(); @@ -123,14 +128,8 @@ private Double computeIfNeeded() } @Nullable - private Double eval() + private Number eval() { - final Object obj = selector.getObject(); - if (obj == null) { - return null; - } else { - final ExprEval eval = ExprEval.ofType(selectorType, obj); - return eval.isNumericNull() ? null : eval.asDouble(); - } + return (Number) TypeCastSelectors.bestEffortCoerce(selector.getObject(), desiredType); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java index 4d3c8f3a7165..f01f93cc2388 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -20,15 +20,19 @@ package org.apache.druid.frame.write.cast; import org.apache.druid.error.DruidException; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.RowIdSupplier; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; +import java.util.List; public class TypeCastSelectors { @@ -72,29 +76,74 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( // When capabilities are unknown, or known to be non-numeric, fall back to getObject() and explicit typecasting. // This avoids using primitive numeric accessors (getLong / getDouble / getFloat / isNull) on a selector that // may not support them. - final ColumnType selectorType = selectorCapabilities != null ? selectorCapabilities.toColumnType() : null; + return new ObjectToNumberColumnValueSelector(selector, desiredType, rowIdSupplier); + } else if (desiredType.isArray()) { + // Always wrap if desiredType is an array. Even if the underlying selector claims to offer the same type as + // desiredType, it may fail to respect the BaseObjectColumnValueSelector contract. For example, it may return + // List rather than Object[]. (RowBasedColumnSelectorFactory can do this if used incorrectly, i.e., if the + // ColumnInspector declares type ARRAY for a column, but the RowAdapter does not provide Object[].) + return new ObjectToArrayColumnValueSelector(selector, desiredType, rowIdSupplier); + } else { + // OK to return the original selector. + return selector; + } + } - switch (desiredType.getType()) { - case LONG: - return new ObjectToLongColumnValueSelector(selector, selectorType, rowIdSupplier); + /** + * Coerce an object to an object compatible with what {@link BaseObjectColumnValueSelector#getObject()} for a column + * of the provided desiredType. Never throws an exception. If coercion fails, replaces the object that failed to + * coerce with null. + * + * @param obj object + * @param desiredType desired type + */ + @Nullable + public static Object bestEffortCoerce( + @Nullable final Object obj, + @Nullable final TypeSignature desiredType + ) + { + if (obj == null || desiredType == null) { + return obj; + } - case DOUBLE: - return new ObjectToDoubleColumnValueSelector(selector, selectorType, rowIdSupplier); + ValueType type = desiredType.getType(); - case FLOAT: - return new ObjectToFloatColumnValueSelector(selector, selectorType, rowIdSupplier); + if (type == ValueType.STRING) { + return ExprEval.bestEffortOf(obj).asString(); + } else if (type == ValueType.LONG) { + final ExprEval n = ExprEval.bestEffortOf(obj); + return n.isNumericNull() ? null : n.asLong(); + } else if (type == ValueType.DOUBLE) { + final ExprEval n = ExprEval.bestEffortOf(obj); + return n.isNumericNull() ? null : n.asDouble(); + } else if (type == ValueType.FLOAT) { + final ExprEval n = ExprEval.bestEffortOf(obj); + return n.isNumericNull() ? null : (float) n.asDouble(); + } else if (type == ValueType.ARRAY) { + final TypeSignature elementType = desiredType.getElementType(); - default: - throw DruidException.defensive("Unexpected numeric desiredType[%s]", desiredType); + if (obj instanceof List) { + final List list = (List) obj; + final Object[] retVal = new Object[list.size()]; + for (int i = 0; i < list.size(); i++) { + retVal[i] = bestEffortCoerce(list.get(i), elementType); + } + return retVal; + } else if (obj instanceof Object[]) { + final Object[] arr = (Object[]) obj; + final Object[] retVal = new Object[arr.length]; + for (int i = 0; i < arr.length; i++) { + retVal[i] = bestEffortCoerce(arr[i], elementType); + } + return retVal; + } else { + // Wrap scalar types in singleton Object[]. + return new Object[]{bestEffortCoerce(obj, elementType)}; } - } else if (desiredType.isArray() - && (selectorCapabilities == null || !selectorCapabilities.toColumnType().equals(desiredType))) { - // When reading arrays, wrap if the underlying type does not match the desired array type. - final ColumnType columnType = selectorCapabilities != null ? selectorCapabilities.toColumnType() : null; - return new ObjectEvalColumnValueSelector(selector, columnType, desiredType, rowIdSupplier); } else { - // OK to return the original selector. - return selector; + // No coercion for COMPLEX, hope the reader knows how to deal with whatever we have here. + return obj; } } } diff --git a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java index ec1e28eab477..43ae6ae14646 100644 --- a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java @@ -22,8 +22,6 @@ import com.google.common.base.Preconditions; import org.apache.druid.common.config.NullHandling; import org.apache.druid.data.input.Rows; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.extraction.ExtractionFn; import org.apache.druid.query.filter.DruidObjectPredicate; @@ -34,6 +32,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.RangeIndexedInts; import org.apache.druid.segment.nested.StructuredData; @@ -446,7 +445,8 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } else { final Function columnFunction = adapter.columnFunction(columnName); final ColumnCapabilities capabilities = columnInspector.getColumnCapabilities(columnName); - final ExpressionType desiredType = ExpressionType.fromColumnType(capabilities); + final ValueType numberType = + capabilities != null && capabilities.getType().isNumeric() ? capabilities.getType() : null; return new ColumnValueSelector() { @@ -512,7 +512,7 @@ private void updateCurrentValue() { if (rowIdSupplier == null || rowIdSupplier.getRowId() != currentValueId) { try { - currentValue = coerce(columnFunction.apply(rowSupplier.get())); + currentValue = columnFunction.apply(rowSupplier.get()); } catch (Throwable e) { currentValueId = RowIdSupplier.INIT; @@ -533,12 +533,7 @@ private void updateCurrentValueAsNumber() try { final Object valueToUse = currentValue instanceof StructuredData ? ((StructuredData) currentValue).getValue() : currentValue; - currentValueAsNumber = Rows.objectToNumber( - columnName, - valueToUse, - capabilities != null && capabilities.isNumeric() ? capabilities.getType() : null, - throwParseExceptions - ); + currentValueAsNumber = Rows.objectToNumber(columnName, valueToUse, numberType, throwParseExceptions); } catch (Throwable e) { currentValueAsNumberId = RowIdSupplier.INIT; @@ -550,37 +545,6 @@ private void updateCurrentValueAsNumber() } } } - - @Nullable - private Object coerce(@Nullable final Object obj) - { - if (needsCoerce(obj)) { - return ExprEval.bestEffortOf(obj).castTo(desiredType).value(); - } else { - return obj; - } - } - - private boolean needsCoerce(@Nullable final Object obj) - { - if (obj == null || desiredType == null) { - return false; - } - - switch (desiredType.getType()) { - case LONG: - case DOUBLE: - return !(obj instanceof Number); - case STRING: - return !(obj instanceof String || obj instanceof List || obj instanceof Object[]); - case ARRAY: - return !(obj instanceof Object[]); - case COMPLEX: - return false; - default: - return true; - } - } }; } } diff --git a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java index 30a9608461b6..f48aa2755a77 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java @@ -20,6 +20,7 @@ package org.apache.druid.frame.write.cast; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnSelectorFactory; @@ -42,10 +43,12 @@ public class TypeCastSelectorsTest extends InitializedNullHandlingTest .add("x", ColumnType.STRING) .add("y", ColumnType.STRING) .add("z", ColumnType.STRING) + .add("da", ColumnType.DOUBLE_ARRAY) .build(), ImmutableMap.builder() .put("x", "12.3") .put("y", "abc") + .put("da", new Object[]{1.2d, 2.3d}) .build() // z is null ); @@ -94,10 +97,10 @@ public void test_readXAsLongArray() final ColumnValueSelector selector = TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.LONG_ARRAY); - Assert.assertEquals(12L, selector.getLong()); - Assert.assertEquals(12.0d, selector.getDouble(), 0.001); - Assert.assertEquals(12.0f, selector.getFloat(), 0); - Assert.assertFalse(selector.isNull()); + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); Assert.assertArrayEquals(new Object[]{12L}, (Object[]) selector.getObject()); } @@ -107,10 +110,10 @@ public void test_readXAsStringArray() final ColumnValueSelector selector = TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "x", ColumnType.STRING_ARRAY); - Assert.assertEquals(12L, selector.getLong()); - Assert.assertEquals(12.3d, selector.getDouble(), 0.001); - Assert.assertEquals(12.3f, selector.getFloat(), 0); - Assert.assertFalse(selector.isNull()); + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); Assert.assertArrayEquals(new Object[]{"12.3"}, (Object[]) selector.getObject()); } @@ -159,10 +162,10 @@ public void test_readYAsLongArray() final ColumnValueSelector selector = TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.LONG_ARRAY); - Assert.assertEquals(0L, selector.getLong()); - Assert.assertEquals(0d, selector.getDouble(), 0); - Assert.assertEquals(0f, selector.getFloat(), 0); - Assert.assertTrue(selector.isNull()); + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); Assert.assertArrayEquals(new Object[]{null}, (Object[]) selector.getObject()); } @@ -172,10 +175,10 @@ public void test_readYAsStringArray() final ColumnValueSelector selector = TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "y", ColumnType.STRING_ARRAY); - Assert.assertEquals(0L, selector.getLong()); - Assert.assertEquals(0d, selector.getDouble(), 0); - Assert.assertEquals(0f, selector.getFloat(), 0); - Assert.assertTrue(selector.isNull()); + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); Assert.assertArrayEquals(new Object[]{"abc"}, (Object[]) selector.getObject()); } @@ -224,6 +227,32 @@ public void test_readZAsLongArray() final ColumnValueSelector selector = TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.LONG_ARRAY); + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readZAsStringArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.STRING_ARRAY); + + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readDaAsLong() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "da", ColumnType.LONG); + Assert.assertEquals(0L, selector.getLong()); Assert.assertEquals(0d, selector.getDouble(), 0); Assert.assertEquals(0f, selector.getFloat(), 0); @@ -232,10 +261,10 @@ public void test_readZAsLongArray() } @Test - public void test_readZAsStringArray() + public void test_readDaAsDouble() { final ColumnValueSelector selector = - TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "z", ColumnType.STRING_ARRAY); + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "da", ColumnType.DOUBLE); Assert.assertEquals(0L, selector.getLong()); Assert.assertEquals(0d, selector.getDouble(), 0); @@ -244,6 +273,45 @@ public void test_readZAsStringArray() Assert.assertNull(selector.getObject()); } + @Test + public void test_readDaAsFloat() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "da", ColumnType.FLOAT); + + Assert.assertEquals(0L, selector.getLong()); + Assert.assertEquals(0d, selector.getDouble(), 0); + Assert.assertEquals(0f, selector.getFloat(), 0); + Assert.assertTrue(selector.isNull()); + Assert.assertNull(selector.getObject()); + } + + @Test + public void test_readDaAsLongArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "da", ColumnType.LONG_ARRAY); + + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); + Assert.assertArrayEquals(new Object[]{1L, 2L}, (Object[]) selector.getObject()); + } + + @Test + public void test_readDaAsStringArray() + { + final ColumnValueSelector selector = + TypeCastSelectors.makeColumnValueSelector(testColumnSelectorFactory, "da", ColumnType.STRING_ARRAY); + + Assert.assertThrows(DruidException.class, selector::getLong); + Assert.assertThrows(DruidException.class, selector::getDouble); + Assert.assertThrows(DruidException.class, selector::getFloat); + Assert.assertThrows(DruidException.class, selector::isNull); + Assert.assertArrayEquals(new Object[]{"1.2", "2.3"}, (Object[]) selector.getObject()); + } + /** * Implementation that returns a fixed value per column from {@link ColumnValueSelector#getObject()}. Other * methods, such as {@link ColumnValueSelector#getLong()} throw exceptions. This is meant to help validate From c0522e51b5c4bcd8f770d2daef72106bd9bbc819 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 5 Sep 2024 10:47:58 -0700 Subject: [PATCH 6/7] Use ExprEval#ofType. --- .../ObjectToArrayColumnValueSelector.java | 9 ++-- .../ObjectToNumberColumnValueSelector.java | 6 +-- .../frame/write/cast/TypeCastSelectors.java | 53 +++---------------- .../org/apache/druid/math/expr/ExprEval.java | 6 +++ .../write/cast/TypeCastSelectorsTest.java | 2 +- 5 files changed, 22 insertions(+), 54 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java index 614c761549a0..30537a7ecd9b 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToArrayColumnValueSelector.java @@ -21,11 +21,10 @@ import org.apache.druid.error.DruidException; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; @@ -37,13 +36,13 @@ public class ObjectToArrayColumnValueSelector implements ColumnValueSelector selector; @Nullable - private final ColumnType desiredType; + private final ExpressionType desiredType; @Nullable private final RowIdSupplier rowIdSupplier; public ObjectToArrayColumnValueSelector( final ColumnValueSelector selector, - final ColumnType desiredType, + final ExpressionType desiredType, @Nullable final RowIdSupplier rowIdSupplier ) { @@ -51,7 +50,7 @@ public ObjectToArrayColumnValueSelector( this.desiredType = desiredType; this.rowIdSupplier = rowIdSupplier; - if (!desiredType.is(ValueType.ARRAY) || desiredType.getElementType() == null) { + if (!desiredType.isArray() || desiredType.getElementType() == null) { throw DruidException.defensive("Expected array with nonnull element type, got[%s]", desiredType); } } diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java index d35c70905b4a..ae09cfd0f20c 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/ObjectToNumberColumnValueSelector.java @@ -21,10 +21,10 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.RowIdSupplier; -import org.apache.druid.segment.column.ColumnType; import javax.annotation.Nullable; @@ -35,7 +35,7 @@ public class ObjectToNumberColumnValueSelector implements ColumnValueSelector { private final ColumnValueSelector selector; - private final ColumnType desiredType; + private final ExpressionType desiredType; @Nullable private final RowIdSupplier rowIdSupplier; @@ -50,7 +50,7 @@ public class ObjectToNumberColumnValueSelector implements ColumnValueSelector selector, - final ColumnType desiredType, + final ExpressionType desiredType, @Nullable final RowIdSupplier rowIdSupplier ) { diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java index f01f93cc2388..3b43647475e5 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -21,6 +21,7 @@ import org.apache.druid.error.DruidException; import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.segment.BaseObjectColumnValueSelector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; @@ -28,11 +29,9 @@ import org.apache.druid.segment.RowIdSupplier; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.TypeSignature; import org.apache.druid.segment.column.ValueType; import javax.annotation.Nullable; -import java.util.List; public class TypeCastSelectors { @@ -69,6 +68,8 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( final ColumnType desiredType ) { + final ExpressionType desiredExpressionType = ExpressionType.fromColumnType(desiredType); + if (desiredType.is(ValueType.STRING)) { throw DruidException.defensive("Type[%s] should be read using a DimensionSelector", desiredType); } else if (desiredType.isNumeric() @@ -76,13 +77,13 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( // When capabilities are unknown, or known to be non-numeric, fall back to getObject() and explicit typecasting. // This avoids using primitive numeric accessors (getLong / getDouble / getFloat / isNull) on a selector that // may not support them. - return new ObjectToNumberColumnValueSelector(selector, desiredType, rowIdSupplier); + return new ObjectToNumberColumnValueSelector(selector, desiredExpressionType, rowIdSupplier); } else if (desiredType.isArray()) { // Always wrap if desiredType is an array. Even if the underlying selector claims to offer the same type as // desiredType, it may fail to respect the BaseObjectColumnValueSelector contract. For example, it may return // List rather than Object[]. (RowBasedColumnSelectorFactory can do this if used incorrectly, i.e., if the // ColumnInspector declares type ARRAY for a column, but the RowAdapter does not provide Object[].) - return new ObjectToArrayColumnValueSelector(selector, desiredType, rowIdSupplier); + return new ObjectToArrayColumnValueSelector(selector, desiredExpressionType, rowIdSupplier); } else { // OK to return the original selector. return selector; @@ -91,8 +92,7 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( /** * Coerce an object to an object compatible with what {@link BaseObjectColumnValueSelector#getObject()} for a column - * of the provided desiredType. Never throws an exception. If coercion fails, replaces the object that failed to - * coerce with null. + * of the provided desiredType. * * @param obj object * @param desiredType desired type @@ -100,50 +100,13 @@ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( @Nullable public static Object bestEffortCoerce( @Nullable final Object obj, - @Nullable final TypeSignature desiredType + @Nullable final ExpressionType desiredType ) { if (obj == null || desiredType == null) { return obj; } - ValueType type = desiredType.getType(); - - if (type == ValueType.STRING) { - return ExprEval.bestEffortOf(obj).asString(); - } else if (type == ValueType.LONG) { - final ExprEval n = ExprEval.bestEffortOf(obj); - return n.isNumericNull() ? null : n.asLong(); - } else if (type == ValueType.DOUBLE) { - final ExprEval n = ExprEval.bestEffortOf(obj); - return n.isNumericNull() ? null : n.asDouble(); - } else if (type == ValueType.FLOAT) { - final ExprEval n = ExprEval.bestEffortOf(obj); - return n.isNumericNull() ? null : (float) n.asDouble(); - } else if (type == ValueType.ARRAY) { - final TypeSignature elementType = desiredType.getElementType(); - - if (obj instanceof List) { - final List list = (List) obj; - final Object[] retVal = new Object[list.size()]; - for (int i = 0; i < list.size(); i++) { - retVal[i] = bestEffortCoerce(list.get(i), elementType); - } - return retVal; - } else if (obj instanceof Object[]) { - final Object[] arr = (Object[]) obj; - final Object[] retVal = new Object[arr.length]; - for (int i = 0; i < arr.length; i++) { - retVal[i] = bestEffortCoerce(arr[i], elementType); - } - return retVal; - } else { - // Wrap scalar types in singleton Object[]. - return new Object[]{bestEffortCoerce(obj, elementType)}; - } - } else { - // No coercion for COMPLEX, hope the reader knows how to deal with whatever we have here. - return obj; - } + return ExprEval.ofType(desiredType, obj).value(); } } diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java index dbccc6d9fe87..a16b563c77ca 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExprEval.java @@ -519,6 +519,12 @@ public static ExprEval bestEffortOf(@Nullable Object val) return ofComplex(ExpressionType.UNKNOWN_COMPLEX, val); } + /** + * Create an eval of the provided type. Coerces the provided object to the desired type. + * + * @param type type, or null to be equivalent to {@link #bestEffortOf(Object)} + * @param value object to be coerced to the type + */ public static ExprEval ofType(@Nullable ExpressionType type, @Nullable Object value) { if (type == null) { diff --git a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java index f48aa2755a77..dc2a4a5b46cf 100644 --- a/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java +++ b/processing/src/test/java/org/apache/druid/frame/write/cast/TypeCastSelectorsTest.java @@ -88,7 +88,7 @@ public void test_readXAsFloat() Assert.assertEquals(12.3d, selector.getDouble(), 0.001); Assert.assertEquals(12.3f, selector.getFloat(), 0); Assert.assertFalse(selector.isNull()); - Assert.assertEquals(12.3f, selector.getObject()); + Assert.assertEquals(12.3d, selector.getObject()); } @Test From 28d36e120272504e1f2907d0092b5b455da9b24c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 5 Sep 2024 10:50:03 -0700 Subject: [PATCH 7/7] Javadoc. --- .../apache/druid/frame/write/cast/TypeCastSelectors.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java index 3b43647475e5..ec6f16aad759 100644 --- a/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java +++ b/processing/src/main/java/org/apache/druid/frame/write/cast/TypeCastSelectors.java @@ -61,6 +61,15 @@ public static ColumnValueSelector makeColumnValueSelector( ); } + /** + * Wraps a {@link ColumnValueSelector} with a type casting selector if necessary. If typecasting is not necessary, + * returns the original selector. + * + * @param selector selector + * @param selectorCapabilities capabilities for the selector, from {@link ColumnSelectorFactory#getColumnCapabilities} + * @param rowIdSupplier row id supplier, from {@link ColumnSelectorFactory#getRowIdSupplier()} + * @param desiredType desired type for the returned selector + */ public static ColumnValueSelector wrapColumnValueSelectorIfNeeded( final ColumnValueSelector selector, @Nullable final ColumnCapabilities selectorCapabilities,