diff --git a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java index 0a869a92271a..5f613e59642e 100644 --- a/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java +++ b/processing/src/main/java/org/apache/druid/query/dimension/DefaultDimensionSpec.java @@ -41,6 +41,11 @@ public static DefaultDimensionSpec of(String dimensionName) return new DefaultDimensionSpec(dimensionName, dimensionName); } + public static DefaultDimensionSpec of(String dimensionName, ColumnType columnType) + { + return new DefaultDimensionSpec(dimensionName, dimensionName, columnType); + } + private static final byte CACHE_TYPE_ID = 0x0; private final String dimension; private final String outputName; diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index b32f54186def..e79baa00b1bf 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -257,10 +257,10 @@ default VectorObjectSelector makeVectorObjectSelector( /** * Return the {@link ColumnCapabilities} which best describe the optimal selector to read from this virtual column. - * + *

* The {@link ColumnInspector} (most likely corresponding to an underlying {@link ColumnSelectorFactory} of a query) * allows the virtual column to consider this information if necessary to compute its output type details. - * + *

* Examples of this include the {@link ExpressionVirtualColumn}, which takes input from other columns and uses the * {@link ColumnInspector} to infer the output type of expressions based on the types of the inputs. * @@ -268,6 +268,7 @@ default VectorObjectSelector makeVectorObjectSelector( * @param columnName the name this virtual column was referenced with * @return capabilities, must not be null */ + @Nullable default ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { return capabilities(columnName); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java index 2cc9cfb7b08b..2ee1a5808e90 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexColumn.java @@ -20,12 +20,9 @@ package org.apache.druid.segment.nested; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.query.extraction.ExtractionFn; -import org.apache.druid.segment.ColumnSelector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; @@ -47,29 +44,6 @@ */ public abstract class NestedDataComplexColumn implements ComplexColumn { - @Nullable - public static NestedDataComplexColumn fromColumnSelector( - ColumnSelector columnSelector, - String columnName - ) - { - ColumnHolder holder = columnSelector.getColumnHolder(columnName); - if (holder == null) { - return null; - } - BaseColumn theColumn = holder.getColumn(); - if (theColumn instanceof CompressedNestedDataComplexColumn) { - return (CompressedNestedDataComplexColumn) theColumn; - } - throw new IAE( - "Column [%s] is invalid type, found [%s] instead of [%s]", - columnName, - theColumn.getClass(), - NestedDataComplexColumn.class.getSimpleName() - ); - } - - /** * Make a {@link DimensionSelector} for a nested literal field column associated with this nested * complex column specified by a sequence of {@link NestedPathPart}. diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index b9a9366b8367..286626baf80a 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -194,6 +194,7 @@ public ColumnCapabilities capabilities(String columnName) return new ColumnCapabilitiesImpl().setType(outputType == null ? ColumnType.FLOAT : outputType); } + @Nullable @Override public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java index 3a9209fb33be..461a6b68dff7 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/FallbackVirtualColumn.java @@ -163,6 +163,7 @@ public ColumnCapabilities capabilities(String columnName) return ColumnCapabilitiesImpl.createDefault(); } + @Nullable @SuppressWarnings("ConstantConditions") @Override public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index f5f7e94d3746..3cdaf24508ff 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -158,6 +158,7 @@ public ColumnCapabilities capabilities(String columnName) .setHasBitmapIndexes(true); } + @Nullable @Override public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java index 86e9c95a803e..62cd303bc933 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/NestedFieldVirtualColumn.java @@ -54,6 +54,7 @@ import org.apache.druid.segment.column.ValueTypes; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexColumn; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.NestedPathArrayElement; @@ -410,6 +411,9 @@ public VectorObjectSelector makeVectorObjectSelector( // not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts if (parts.isEmpty()) { ColumnCapabilities capabilities = holder.getCapabilities(); + // expectedType shouldn't possibly be null if we are being asked for an object selector and the underlying column + // is numeric, else we would have been asked for a value selector + Preconditions.checkArgument(expectedType != null, "Asked for a VectorObjectSelector on a numeric column, 'expectedType' must not be null"); if (capabilities.isNumeric()) { return ExpressionVectorSelectors.castValueSelectorToObject( offset, @@ -600,12 +604,18 @@ public ColumnIndexSupplier getIndexSupplier( ColumnSelector selector ) { - final NestedDataComplexColumn column = NestedDataComplexColumn.fromColumnSelector(selector, this.columnName); - - if (column == null) { + ColumnHolder holder = selector.getColumnHolder(this.columnName); + if (holder == null) { return null; } - return column.getColumnIndexSupplier(parts); + BaseColumn theColumn = holder.getColumn(); + if (theColumn instanceof CompressedNestedDataComplexColumn) { + return ((CompressedNestedDataComplexColumn) theColumn).getColumnIndexSupplier(parts); + } + if (parts.isEmpty()) { + return holder.getIndexSupplier(); + } + return null; } @Override @@ -625,6 +635,7 @@ public ColumnCapabilities capabilities(String columnName) .setHasNulls(expectedType == null || !expectedType.isNumeric() || (expectedType.isNumeric() && NullHandling.sqlCompatible())); } + @Nullable @Override public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { @@ -637,17 +648,38 @@ public ColumnCapabilities capabilities(ColumnInspector inspector, String columnN } // ColumnInspector isn't really enough... we need the ability to read the complex column itself to examine // the nested fields type information to really be accurate here, so we rely on the expectedType to guide us - final ColumnCapabilities complexCapabilites = inspector.getColumnCapabilities(this.columnName); - if (complexCapabilites != null && complexCapabilites.isDictionaryEncoded().isTrue()) { - return ColumnCapabilitiesImpl.createDefault() - .setType(expectedType != null ? expectedType : ColumnType.STRING) - .setDictionaryEncoded(true) - .setDictionaryValuesSorted(true) - .setDictionaryValuesUnique(true) - .setHasBitmapIndexes(true) - .setHasNulls(expectedType == null || (expectedType.isNumeric() - && NullHandling.sqlCompatible())); + final ColumnCapabilities capabilities = inspector.getColumnCapabilities(this.columnName); + + if (capabilities != null) { + // if the underlying column is a nested column (and persisted to disk, re: the dictionary encoded check) + if (capabilities.is(ValueType.COMPLEX) && + capabilities.getComplexTypeName().equals(NestedDataComplexTypeSerde.TYPE_NAME) && + capabilities.isDictionaryEncoded().isTrue()) { + return ColumnCapabilitiesImpl.createDefault() + .setType(expectedType != null ? expectedType : ColumnType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesSorted(true) + .setDictionaryValuesUnique(true) + .setHasBitmapIndexes(true) + .setHasNulls(expectedType == null || (expectedType.isNumeric() + && NullHandling.sqlCompatible())); + } + // column is not nested, use underlying column capabilities, adjusted for expectedType as necessary + if (parts.isEmpty()) { + ColumnCapabilitiesImpl copy = ColumnCapabilitiesImpl.copyOf(capabilities); + if (expectedType != null) { + copy.setType(expectedType); + copy.setHasNulls( + copy.hasNulls().or(ColumnCapabilities.Capable.of(expectedType.getType() != capabilities.getType())) + ); + } + return copy; + } else if (capabilities.isPrimitive()) { + // path doesn't exist and column isn't nested, so effectively column doesn't exist + return null; + } } + return capabilities(columnName); } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java index 9f33735c03da..079307eadf71 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/NestedDataGroupByQueryTest.java @@ -37,6 +37,7 @@ import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.strategy.GroupByStrategySelector; import org.apache.druid.segment.Segment; import org.apache.druid.segment.column.ColumnType; @@ -312,6 +313,230 @@ public void testGroupByNonExistentVirtualColumn() ); } + @Test + public void testGroupBySomeFieldOnStringColumn() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0"), DefaultDimensionSpec.of("v1")) + .setVirtualColumns( + new NestedFieldVirtualColumn("dim", "$", "v0"), + new NestedFieldVirtualColumn("dim", "$.x", "v1") + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{"100", null, 2L}, + new Object[]{"hello", null, 12L}, + new Object[]{"world", null, 2L} + ) + ); + } + + @Test + public void testGroupBySomeFieldOnStringColumnWithFilter() + { + List vals = new ArrayList<>(); + vals.add("100"); + vals.add("200"); + vals.add("300"); + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0")) + .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0")) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .setDimFilter(new InDimFilter("v0", vals, null)) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{"100", 2L} + ) + ); + } + + @Test + public void testGroupBySomeFieldOnStringColumnWithFilterExpectedType() + { + List vals = new ArrayList<>(); + vals.add("100"); + vals.add("200"); + vals.add("300"); + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0", ColumnType.LONG)) + .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$", "v0", ColumnType.LONG)) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .setDimFilter(new InDimFilter("v0", vals, null)) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{100L, 2L} + ), + false, + true + ); + } + + @Test + public void testGroupBySomeFieldOnStringColumnWithFilterNil() + { + List vals = new ArrayList<>(); + vals.add("100"); + vals.add("200"); + vals.add("300"); + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions(DefaultDimensionSpec.of("v0")) + .setVirtualColumns(new NestedFieldVirtualColumn("dim", "$.x", "v0")) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .setDimFilter(new InDimFilter("v0", vals, null)) + .build(); + + + runResults( + groupQuery, + ImmutableList.of() + ); + } + + @Test + public void testGroupBySomeFieldOnLongColumn() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0", ColumnType.LONG), + DefaultDimensionSpec.of("v1", ColumnType.LONG) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("__time", "$", "v0"), + new NestedFieldVirtualColumn("__time", "$.x", "v1") + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{1609459200000L, NullHandling.defaultLongValue(), 8L}, + new Object[]{1609545600000L, NullHandling.defaultLongValue(), 8L} + ), + false, + true + ); + } + + @Test + public void testGroupBySomeFieldOnLongColumnFilter() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0", ColumnType.LONG) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("__time", "$", "v0") + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null)) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{1609459200000L, 8L} + ), + false, + true + ); + } + + @Test + public void testGroupBySomeFieldOnLongColumnFilterExpectedType() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0", ColumnType.STRING) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("__time", "$", "v0", ColumnType.STRING) + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null)) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of( + new Object[]{"1609459200000", 8L} + ), + true, + false + ); + } + + @Test + public void testGroupBySomeFieldOnLongColumnFilterNil() + { + GroupByQuery groupQuery = GroupByQuery.builder() + .setDataSource("test_datasource") + .setGranularity(Granularities.ALL) + .setInterval(Intervals.ETERNITY) + .setDimensions( + DefaultDimensionSpec.of("v0", ColumnType.LONG) + ) + .setVirtualColumns( + new NestedFieldVirtualColumn("__time", "$.x", "v0") + ) + .setAggregatorSpecs(new CountAggregatorFactory("count")) + .setDimFilter(new SelectorDimFilter("v0", "1609459200000", null)) + .setContext(getContext()) + .build(); + + + runResults( + groupQuery, + ImmutableList.of(), + false, + true + ); + } + private void runResults(GroupByQuery groupQuery, List expectedResults) { runResults(groupQuery, expectedResults, false, false);