From a414096026cc6bd7c5373e7dc5dd1f1dcc969cf0 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Tue, 2 Jul 2024 13:40:24 +0530 Subject: [PATCH 01/20] Preemptive restriction for queries with approximate count distinct on complex columns of unsupported type --- .../HyperUniquesAggregatorFactory.java | 9 +++++++ ...iltinApproxCountDistinctSqlAggregator.java | 26 +++++++++++++------ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index c1c55a826b10..622269894b35 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -24,6 +24,7 @@ import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.Aggregator; @@ -134,6 +135,14 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) { final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + if (capabilities != null && !Objects.equals(capabilities.toColumnType().getComplexTypeName(), TYPE.getComplexTypeName()) + && !Objects.equals(capabilities.toColumnType().getComplexTypeName(), PRECOMPUTED_TYPE.getComplexTypeName())) { + throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + capabilities.getType(), + capabilities.getComplexTypeName()); + } if (!Types.is(capabilities, ValueType.COMPLEX)) { return NoopVectorAggregator.instance(); } else { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 699c7a8d1c6b..51174dbc60e7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -53,6 +53,7 @@ import javax.annotation.Nullable; import java.util.Collections; import java.util.List; +import java.util.Objects; public class BuiltinApproxCountDistinctSqlAggregator implements SqlAggregator { @@ -88,13 +89,15 @@ public Aggregation toDruidAggregation( return null; } - final AggregatorFactory aggregatorFactory; + AggregatorFactory aggregatorFactory = null; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (arg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(arg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX)) + .map(type -> type.is(ValueType.COMPLEX) + && (Objects.equals(type.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(type.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName())) + ) .orElse(false)) { aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true); } else { @@ -118,12 +121,14 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - aggregatorFactory = new HyperUniquesAggregatorFactory( - aggregatorName, - dimensionSpec.getOutputName(), - false, - true - ); + if ((Objects.equals(inputType.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(inputType.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName()))) { + aggregatorFactory = new HyperUniquesAggregatorFactory( + aggregatorName, + dimensionSpec.getOutputName(), + false, + true + ); + } } else { aggregatorFactory = new CardinalityAggregatorFactory( aggregatorName, @@ -135,6 +140,11 @@ public Aggregation toDruidAggregation( } } + if (aggregatorFactory == null) { + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", arg.getDruidType(), arg.getSimpleExtraction().getColumn()); + return null; + } + return Aggregation.create( Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null From 90d8e16a15b2813f566551aec5a27ad483659d11 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 3 Jul 2024 15:38:41 +0530 Subject: [PATCH 02/20] Update ApproxCountDistinctSqlAggregator and HyperUniquesAggregatorFactory as per offline discussion --- .../HyperUniquesAggregatorFactory.java | 60 ++++++++++++------- .../ApproxCountDistinctSqlAggregator.java | 17 +++--- ...iltinApproxCountDistinctSqlAggregator.java | 7 ++- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 622269894b35..4137820964c8 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.hll.HyperLogLogCollector; -import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Comparators; @@ -104,46 +103,63 @@ public HyperUniquesAggregatorFactory( @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { + ColumnCapabilities columnCapabilities = metricFactory.getColumnCapabilities(fieldName); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type, + type.getComplexTypeName()); + } + } + BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopAggregator.instance(); } - final Class classOfObject = selector.classOfObject(); - if (classOfObject.equals(Object.class) || HyperLogLogCollector.class.isAssignableFrom(classOfObject)) { - return new HyperUniquesAggregator(selector); - } - - throw new IAE("Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject); + return new HyperUniquesAggregator(selector); } @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { + ColumnCapabilities columnCapabilities = metricFactory.getColumnCapabilities(fieldName); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type, + type.getComplexTypeName()); + } + } + BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopBufferAggregator.instance(); } - final Class classOfObject = selector.classOfObject(); - if (classOfObject.equals(Object.class) || HyperLogLogCollector.class.isAssignableFrom(classOfObject)) { - return new HyperUniquesBufferAggregator(selector); - } - - throw new IAE("Incompatible type for metric[%s], expected a HyperUnique, got a %s", fieldName, classOfObject); + return new HyperUniquesBufferAggregator(selector); } @Override public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) { - final ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); - if (capabilities != null && !Objects.equals(capabilities.toColumnType().getComplexTypeName(), TYPE.getComplexTypeName()) - && !Objects.equals(capabilities.toColumnType().getComplexTypeName(), PRECOMPUTED_TYPE.getComplexTypeName())) { - throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - capabilities.getType(), - capabilities.getComplexTypeName()); + final ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(fieldName); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type, + type.getComplexTypeName()); + } } - if (!Types.is(capabilities, ValueType.COMPLEX)) { + + if (!Types.is(columnCapabilities, ValueType.COMPLEX)) { return NoopVectorAggregator.instance(); } else { return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java index eceb4ebbf800..a8bae9698638 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java @@ -21,10 +21,7 @@ import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.sql.SqlAggFunction; -import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.type.InferTypes; -import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.util.Optionality; @@ -44,20 +41,20 @@ */ public class ApproxCountDistinctSqlAggregator implements SqlAggregator { - private static final SqlAggFunction FUNCTION_INSTANCE = new ApproxCountDistinctSqlAggFunction(); private static final String NAME = "APPROX_COUNT_DISTINCT"; - + private final SqlAggFunction delegateFunction; private final SqlAggregator delegate; public ApproxCountDistinctSqlAggregator(final SqlAggregator delegate) { this.delegate = delegate; + this.delegateFunction = new ApproxCountDistinctSqlAggFunction(delegate.calciteFunction()); } @Override public SqlAggFunction calciteFunction() { - return FUNCTION_INSTANCE; + return delegateFunction; } @Nullable @@ -85,16 +82,16 @@ public Aggregation toDruidAggregation( private static class ApproxCountDistinctSqlAggFunction extends SqlAggFunction { - ApproxCountDistinctSqlAggFunction() + ApproxCountDistinctSqlAggFunction(SqlAggFunction delegate) { super( NAME, null, SqlKind.OTHER_FUNCTION, ReturnTypes.explicit(SqlTypeName.BIGINT), - InferTypes.VARCHAR_1024, - OperandTypes.ANY, - SqlFunctionCategory.STRING, + delegate.getOperandTypeInference(), + delegate.getOperandTypeChecker(), + delegate.getFunctionType(), false, false, Optionality.FORBIDDEN diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 51174dbc60e7..3bcd04c42522 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -49,6 +49,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.InputAccessor; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; +import org.apache.druid.sql.calcite.table.RowSignatures; import javax.annotation.Nullable; import java.util.Collections; @@ -161,7 +162,11 @@ private static class BuiltinApproxCountDistinctSqlAggFunction extends SqlAggFunc SqlKind.OTHER_FUNCTION, ReturnTypes.explicit(SqlTypeName.BIGINT), InferTypes.VARCHAR_1024, - OperandTypes.ANY, + OperandTypes.or( + OperandTypes.STRING, + OperandTypes.NUMERIC, + RowSignatures.complexTypeChecker(HyperUniquesAggregatorFactory.TYPE) + ), SqlFunctionCategory.STRING, false, false, From d5af4412e7c388be1eec7c6a056e28041df496b3 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 3 Jul 2024 16:13:31 +0530 Subject: [PATCH 03/20] Fix error message in HyperUniquesAggregatorFactory --- .../HyperUniquesAggregatorFactory.java | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 4137820964c8..5617bc28c960 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -107,11 +107,10 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), - type, - type.getComplexTypeName()); + type); } } @@ -129,11 +128,10 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), - type, - type.getComplexTypeName()); + type); } } @@ -151,11 +149,10 @@ public VectorAggregator factorizeVector(final VectorColumnSelectorFactory select if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s<%s> column. " + throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), - type, - type.getComplexTypeName()); + type); } } From 6926ba21271f158f0afcbfe9afe7287050624218 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 3 Jul 2024 20:12:59 +0530 Subject: [PATCH 04/20] Update HllSketchMergeAggregatorFactory and HllSketchApproxCountDistinctSqlAggregator: SQL validation layer + Native layer changes --- .../hll/HllSketchMergeAggregatorFactory.java | 39 +++++++++++++++ ...ketchApproxCountDistinctSqlAggregator.java | 50 +++++++++++++++++-- 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 833df8ab1a55..b6195bdd36ec 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -25,6 +25,7 @@ import org.apache.datasketches.hll.TgtHllType; import org.apache.datasketches.hll.Union; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; @@ -34,6 +35,7 @@ import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; @@ -107,6 +109,19 @@ protected byte getCacheTypeId() @Override public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) { + ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + System.out.println("type = " + type); + System.out.println("type.getComplexTypeName() = " + type.getComplexTypeName()); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type); + } + } + final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); return new HllSketchMergeAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType())); } @@ -115,6 +130,19 @@ public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) @Override public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory) { + ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + System.out.println("type = " + type); + System.out.println("type.getComplexTypeName() = " + type.getComplexTypeName()); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type); + } + } + final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); return new HllSketchMergeBufferAggregator( selector, @@ -133,6 +161,17 @@ public boolean canVectorize(ColumnInspector columnInspector) @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { + ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(getFieldName()); + if (columnCapabilities != null) { + final ColumnType type = columnCapabilities.toColumnType(); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + throw new UOE("Using aggregation type %s is not supported for %s column. " + + "Use a different aggregator type and run the query again.", + getIntermediateType().getComplexTypeName(), + type); + } + } + return new HllSketchMergeVectorAggregator( selectorFactory, getFieldName(), diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java index 757674d6aa68..711673ff29fc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java @@ -21,28 +21,70 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.type.CastedLiteralOperandTypeCheckers; import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlSingleOperandTypeChecker; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.OperatorConversions; +import org.apache.druid.sql.calcite.table.RowSignatures; import java.util.Collections; public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlAggregator implements SqlAggregator { public static final String NAME = "APPROX_COUNT_DISTINCT_DS_HLL"; + + private static final SqlSingleOperandTypeChecker COLUMN_ALLOWED_TYPES = OperandTypes.or( + OperandTypes.STRING, + OperandTypes.NUMERIC, + RowSignatures.complexTypeChecker(HllSketchMergeAggregatorFactory.TYPE), + RowSignatures.complexTypeChecker(HllSketchBuildAggregatorFactory.TYPE) + ); + private static final SqlAggFunction FUNCTION_INSTANCE = OperatorConversions.aggregatorBuilder(NAME) - .operandNames("column", "lgK", "tgtHllType") - .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING) .operandTypeInference(InferTypes.VARCHAR_1024) - .requiredOperandCount(1) - .literalOperands(1, 2) + .operandTypeChecker( + OperandTypes.or( + // APPROX_COUNT_DISTINCT_DS_HLL(column) + OperandTypes.and( + OperandTypes.sequence( + StringUtils.format("'%s(column)'", NAME), + COLUMN_ALLOWED_TYPES + ), + OperandTypes.family(SqlTypeFamily.ANY) + ), + // APPROX_COUNT_DISTINCT_DS_HLL(column, lgk) + OperandTypes.and( + OperandTypes.sequence( + StringUtils.format("'%s(column, lgk)'", NAME), + COLUMN_ALLOWED_TYPES, + CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL + ), + OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC) + ), + // APPROX_COUNT_DISTINCT_DS_HLL(column, lgk, tgtHllType) + OperandTypes.and( + OperandTypes.sequence( + StringUtils.format("'%s(column, lgk, tgtHllType)'", NAME), + COLUMN_ALLOWED_TYPES, + CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL, + OperandTypes.STRING + ), + OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC, SqlTypeFamily.EXACT_NUMERIC) + ) + ) + ) .returnTypeNonNull(SqlTypeName.BIGINT) .functionCategory(SqlFunctionCategory.NUMERIC) .build(); From f310f8fb581c48a0d4ecc2f2d0821933d5bcdd24 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 3 Jul 2024 20:43:52 +0530 Subject: [PATCH 05/20] Fix tests and cleanup for HllSketchMergeAggregatorFactory and HllSketchApproxCountDistinctSqlAggregator --- .../hll/HllSketchMergeAggregatorFactory.java | 21 ++++++++++++------- ...ketchApproxCountDistinctSqlAggregator.java | 10 ++------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index b6195bdd36ec..833a627f12c4 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -37,9 +37,11 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; +import java.util.Objects; /** * This aggregator factory is for merging existing sketches. @@ -112,9 +114,10 @@ public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); - System.out.println("type = " + type); - System.out.println("type.getComplexTypeName() = " + type.getComplexTypeName()); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && + !(ValueType.COMPLEX.equals(type.getType()) && + (Objects.equals(type.getComplexTypeName(), "HLLSketch") || + Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), @@ -133,9 +136,10 @@ public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSele ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); - System.out.println("type = " + type); - System.out.println("type.getComplexTypeName() = " + type.getComplexTypeName()); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && + !(ValueType.COMPLEX.equals(type.getType()) && + (Objects.equals(type.getComplexTypeName(), "HLLSketch") || + Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), @@ -164,7 +168,10 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(getFieldName()); if (columnCapabilities != null) { final ColumnType type = columnCapabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type)) { + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && + !(ValueType.COMPLEX.equals(type.getType()) && + (Objects.equals(type.getComplexTypeName(), "HLLSketch") || + Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", getIntermediateType().getComplexTypeName(), diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java index 711673ff29fc..f02594963e3a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java @@ -57,13 +57,7 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA .operandTypeChecker( OperandTypes.or( // APPROX_COUNT_DISTINCT_DS_HLL(column) - OperandTypes.and( - OperandTypes.sequence( - StringUtils.format("'%s(column)'", NAME), - COLUMN_ALLOWED_TYPES - ), - OperandTypes.family(SqlTypeFamily.ANY) - ), + OperandTypes.and(COLUMN_ALLOWED_TYPES, OperandTypes.family(SqlTypeFamily.ANY)), // APPROX_COUNT_DISTINCT_DS_HLL(column, lgk) OperandTypes.and( OperandTypes.sequence( @@ -81,7 +75,7 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL, OperandTypes.STRING ), - OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC, SqlTypeFamily.EXACT_NUMERIC) + OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC, SqlTypeFamily.STRING) ) ) ) From c514d374507119128cca24d84512ba7029599b53 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 10:15:08 +0530 Subject: [PATCH 06/20] Add checks in HllSketchBaseSqlAggregator to handle count(distinct column) --- .../hll/sql/HllSketchBaseSqlAggregator.java | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index d221b72ac1c6..54f1f008f6fc 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -46,6 +46,7 @@ import javax.annotation.Nullable; import java.util.List; +import java.util.Objects; public abstract class HllSketchBaseSqlAggregator implements SqlAggregator { @@ -109,13 +110,18 @@ public Aggregation toDruidAggregation( tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name(); } - final AggregatorFactory aggregatorFactory; + AggregatorFactory aggregatorFactory = null; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX)) + .map(type -> type.is(ValueType.COMPLEX) && + ( + HllSketchMergeAggregatorFactory.TYPE.equals(type) || + Objects.equals(type.getComplexTypeName(), "HLLSketch") || + Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")) + ) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, @@ -154,19 +160,23 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - aggregatorFactory = new HllSketchMergeAggregatorFactory( - aggregatorName, - dimensionSpec.getOutputName(), - logK, - tgtHllType, - - // For HllSketchMergeAggregatorFactory, stringEncoding is only advisory to aid in detection of mismatched - // merges. It does not affect the results of the aggregator. At this point in the code, we do not know what - // the input encoding of the original sketches was, so we set it to the default. - HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, - finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - ROUND - ); + if (HllSketchMergeAggregatorFactory.TYPE.equals(inputType) || + Objects.equals(inputType.getComplexTypeName(), "HLLSketch") || + Objects.equals(inputType.getComplexTypeName(), "HLLSketchBuild")) { + aggregatorFactory = new HllSketchMergeAggregatorFactory( + aggregatorName, + dimensionSpec.getOutputName(), + logK, + tgtHllType, + + // For HllSketchMergeAggregatorFactory, stringEncoding is only advisory to aid in detection of mismatched + // merges. It does not affect the results of the aggregator. At this point in the code, we do not know what + // the input encoding of the original sketches was, so we set it to the default. + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, + finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), + ROUND + ); + } } else { aggregatorFactory = new HllSketchBuildAggregatorFactory( aggregatorName, @@ -180,6 +190,11 @@ public Aggregation toDruidAggregation( } } + if (aggregatorFactory == null) { + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + return null; + } + return toAggregation( name, finalizeAggregations, From 2796978dc2322801439a3f0b5b7584f78b95594d Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 14:02:40 +0530 Subject: [PATCH 07/20] SQL validation layer + Native layer changes for APPROX_COUNT_DISTINCT_DS_THETA --- .../theta/SketchAggregatorFactory.java | 25 ++++++++++ ...ketchApproxCountDistinctSqlAggregator.java | 32 ++++++++++-- .../sql/ThetaSketchBaseSqlAggregator.java | 50 ++++++++++++------- 3 files changed, 84 insertions(+), 23 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index 211373e873b0..a83e1f9a0e71 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -28,6 +28,7 @@ import org.apache.datasketches.theta.Union; import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.Aggregator; @@ -41,6 +42,7 @@ import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; @@ -80,6 +82,7 @@ public SketchAggregatorFactory(String name, String fieldName, Integer size, byte @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { + validateInputs(metricFactory.getColumnCapabilities(fieldName)); ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); if (capabilities != null && capabilities.isArray()) { throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); @@ -91,6 +94,7 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) @Override public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) { + validateInputs(metricFactory.getColumnCapabilities(fieldName)); ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); if (capabilities != null && capabilities.isArray()) { throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); @@ -104,6 +108,7 @@ public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { + validateInputs(metricFactory.getColumnCapabilities(fieldName)); ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); if (capabilities != null && capabilities.isArray()) { throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); @@ -115,9 +120,29 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { + validateInputs(selectorFactory.getColumnCapabilities(fieldName)); return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls()); } + private void validateInputs(@Nullable ColumnCapabilities capabilities) + { + if (capabilities != null) { + if (capabilities.isArray() || (capabilities.is(ValueType.COMPLEX) && !( + SketchModule.THETA_SKETCH_TYPE.equals(capabilities.toColumnType()) || + SketchModule.MERGE_TYPE.equals(capabilities.toColumnType()) || + SketchModule.BUILD_TYPE.equals(capabilities.toColumnType()))) + ) { + throw new ISE( + "Invalid input [%s] of type [%s] for [%s] aggregator [%s]", + getFieldName(), + capabilities.asTypeString(), + getIntermediateType(), + getName() + ); + } + } + } + @Override public boolean canVectorize(ColumnInspector columnInspector) { diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java index eac77901f1d2..babaf4341ce2 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java @@ -21,27 +21,51 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.type.CastedLiteralOperandTypeCheckers; import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlSingleOperandTypeChecker; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.theta.SketchModule; import org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.OperatorConversions; +import org.apache.druid.sql.calcite.table.RowSignatures; import java.util.Collections; public class ThetaSketchApproxCountDistinctSqlAggregator extends ThetaSketchBaseSqlAggregator implements SqlAggregator { public static final String NAME = "APPROX_COUNT_DISTINCT_DS_THETA"; + + private static final SqlSingleOperandTypeChecker COLUMN_ALLOWED_TYPES = OperandTypes.or( + OperandTypes.STRING, + OperandTypes.NUMERIC, + RowSignatures.complexTypeChecker(SketchModule.THETA_SKETCH_TYPE) + ); + private static final SqlAggFunction FUNCTION_INSTANCE = OperatorConversions.aggregatorBuilder(NAME) - .operandNames("column", "size") - .operandTypes(SqlTypeFamily.ANY, SqlTypeFamily.NUMERIC) .operandTypeInference(InferTypes.VARCHAR_1024) - .requiredOperandCount(1) - .literalOperands(1) + .operandTypeChecker( + OperandTypes.or( + // APPROX_COUNT_DISTINCT_DS_THETA(expr) + OperandTypes.and(COLUMN_ALLOWED_TYPES, OperandTypes.family(SqlTypeFamily.ANY)), + // APPROX_COUNT_DISTINCT_DS_THETA(expr, size) + OperandTypes.and( + OperandTypes.sequence( + StringUtils.format("'%s(expr, size)'", NAME), + COLUMN_ALLOWED_TYPES, + CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL + ), + OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC) + ) + ) + ) .returnTypeNonNull(SqlTypeName.BIGINT) .functionCategory(SqlFunctionCategory.USER_DEFINED_FUNCTION) .build(); diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index bf35cd665ae8..38eb677479af 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -29,6 +29,7 @@ import org.apache.druid.query.aggregation.datasketches.SketchQueryContext; import org.apache.druid.query.aggregation.datasketches.theta.SketchAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.theta.SketchMergeAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.theta.SketchModule; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.segment.column.ColumnType; @@ -89,13 +90,17 @@ public Aggregation toDruidAggregation( sketchSize = SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE; } - final AggregatorFactory aggregatorFactory; + AggregatorFactory aggregatorFactory = null; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX)) + .map(type -> type.is(ValueType.COMPLEX) && ( + SketchModule.THETA_SKETCH_TYPE.equals(type) || + SketchModule.MERGE_TYPE.equals(type) || + SketchModule.BUILD_TYPE.equals(type) + )) .orElse(false)) { aggregatorFactory = new SketchMergeAggregatorFactory( aggregatorName, @@ -116,26 +121,33 @@ public Aggregation toDruidAggregation( ); } - final DimensionSpec dimensionSpec; - - if (columnArg.isDirectColumnAccess()) { - dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType); - } else { - String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( - columnArg, - dataType + if (!inputType.is(ValueType.COMPLEX)) { + final DimensionSpec dimensionSpec; + + if (columnArg.isDirectColumnAccess()) { + dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType); + } else { + String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( + columnArg, + dataType + ); + dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType); + } + + aggregatorFactory = new SketchMergeAggregatorFactory( + aggregatorName, + dimensionSpec.getDimension(), + sketchSize, + finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), + null, + null ); - dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType); } + } - aggregatorFactory = new SketchMergeAggregatorFactory( - aggregatorName, - dimensionSpec.getDimension(), - sketchSize, - finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - null, - null - ); + if (aggregatorFactory == null) { + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + return null; } return toAggregation( From 0f90ff5a63b11e2669a9dbe251db084098a0cb72 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 18:46:28 +0530 Subject: [PATCH 08/20] Add tests --- .../HllSketchMergeAggregatorFactoryTest.java | 42 +++++++++++++++ .../hll/sql/HllSketchSqlAggregatorTest.java | 43 +++++++++++++++ .../theta/SketchAggregatorFactoryTest.java | 53 +++++++++++++++++++ .../sql/ThetaSketchSqlAggregatorTest.java | 39 ++++++++++++++ .../HyperUniquesAggregatorFactory.java | 45 ++++++---------- .../HyperUniquesAggregatorFactoryTest.java | 47 ++++++++++++++++ .../druid/sql/calcite/CalciteQueryTest.java | 28 ++++++++++ 7 files changed, 267 insertions(+), 30 deletions(-) diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index 101b25b99be0..de6bb933610f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -23,9 +23,15 @@ import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.datasketches.hll.TgtHllType; import org.apache.druid.java.util.common.StringEncoding; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.easymock.EasyMock; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -45,6 +51,10 @@ public class HllSketchMergeAggregatorFactoryTest private HllSketchMergeAggregatorFactory targetRound; private HllSketchMergeAggregatorFactory targetNoRound; + private final ColumnSelectorFactory metricFactory = EasyMock.mock(ColumnSelectorFactory.class); + private final VectorColumnSelectorFactory vectorFactory = EasyMock.mock(VectorColumnSelectorFactory.class); + private final ColumnCapabilities capabilities = EasyMock.mock(ColumnCapabilities.class); + @Before public void setUp() { @@ -66,6 +76,11 @@ public void setUp() SHOULD_FINALIZE, !ROUND ); + + EasyMock.expect(metricFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); + EasyMock.expect(vectorFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); + EasyMock.expect(capabilities.toColumnType()).andReturn(ColumnType.NESTED_DATA).anyTimes(); + EasyMock.replay(metricFactory, vectorFactory, capabilities); } @Test(expected = AggregatorFactoryNotMergeableException.class) @@ -291,4 +306,31 @@ public void testWithName() throws Exception Assert.assertEquals(factory, factory.withName(targetRound.getName())); Assert.assertEquals("newTest", factory.withName("newTest").getName()); } + + @Test + public void testFactorizeOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorize(metricFactory)); + Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + + EasyMock.verify(metricFactory, capabilities); + } + + @Test + public void testFactorizeBufferedOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorizeBuffered(metricFactory)); + Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + + EasyMock.verify(metricFactory, capabilities); + } + + @Test + public void testFactorizeVectorOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorizeVector(vectorFactory)); + Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + + EasyMock.verify(vectorFactory, capabilities); + } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 2907d6f8bb87..8821e2e9e096 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -24,7 +24,9 @@ import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidInjectorBuilder; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringEncoding; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; @@ -86,6 +88,7 @@ import org.apache.druid.sql.guice.SqlModule; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.timeline.partition.NumberedShardSpec; import org.joda.time.DateTimeZone; import org.joda.time.Period; import org.junit.Assert; @@ -100,6 +103,10 @@ @SqlTestFrameworkConfig.ComponentSupplier(HllSketchComponentSupplier.class) public class HllSketchSqlAggregatorTest extends BaseCalciteQueryTest { + static { + NullHandling.initializeForTests(); + } + private static final boolean ROUND = true; // For testHllSketchPostAggsGroupBy, testHllSketchPostAggsTimeseries @@ -300,6 +307,15 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource(CalciteTests.WIKIPEDIA_FIRST_LAST) + .interval(Intervals.of("2015-09-12/2015-09-13")) + .version("1") + .shardSpec(new NumberedShardSpec(0, 0)) + .size(0) + .build(), + TestDataBuilder.makeWikipediaIndexWithAggregation(tempDirProducer.newTempFolder()) ); } } @@ -508,6 +524,33 @@ public void testApproxCountDistinctHllSketchIsRounded() ); } + @Test + public void testApproxCountDistinctOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " + + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" + + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", + e.getMessage()); + } + } + + @Test + public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT APPROX_COUNT_DISTINCT_DS_HLL(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertTrue(e.getMessage().contains("Cannot apply 'APPROX_COUNT_DISTINCT_DS_HLL' to arguments of type 'APPROX_COUNT_DISTINCT_DS_HLL(>)'")); + } + } + @Test public void testHllSketchFilteredAggregatorsGroupBy() { diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java index 1d70ff30f251..3cd2f48018d8 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java @@ -20,6 +20,7 @@ package org.apache.druid.query.aggregation.datasketches.theta; import com.google.common.collect.ImmutableList; +import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.AggregatorAndSize; @@ -32,10 +33,14 @@ import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.easymock.EasyMock; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; public class SketchAggregatorFactoryTest @@ -46,6 +51,22 @@ public class SketchAggregatorFactoryTest private static final SketchMergeAggregatorFactory AGGREGATOR_32768 = new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null); + private final ColumnSelectorFactory metricFactory = EasyMock.mock(ColumnSelectorFactory.class); + private final VectorColumnSelectorFactory vectorFactory = EasyMock.mock(VectorColumnSelectorFactory.class); + private final ColumnCapabilities capabilities = EasyMock.mock(ColumnCapabilities.class); + + @Before + public void setup() + { + EasyMock.expect(metricFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); + EasyMock.expect(vectorFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); + EasyMock.expect(capabilities.toColumnType()).andReturn(ColumnType.NESTED_DATA).anyTimes(); + EasyMock.expect(capabilities.isArray()).andReturn(false).anyTimes(); + EasyMock.expect(capabilities.is(EasyMock.eq(ValueType.COMPLEX))).andReturn(true).anyTimes(); + EasyMock.expect(capabilities.asTypeString()).andReturn(ColumnType.NESTED_DATA.asTypeString()).anyTimes(); + EasyMock.replay(metricFactory, vectorFactory, capabilities); + } + @Test public void testGuessAggregatorHeapFootprint() { @@ -168,4 +189,36 @@ public void testWithName() Assert.assertEquals(AGGREGATOR_16384, AGGREGATOR_16384.withName("x")); Assert.assertEquals("newTest", AGGREGATOR_16384.withName("newTest").getName()); } + + @Test + public void testFactorizeOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorize(metricFactory)); + Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); + EasyMock.verify(metricFactory, capabilities); + } + + @Test + public void testFactorizeWithSizeOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeWithSize(metricFactory)); + Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); + EasyMock.verify(metricFactory, capabilities); + } + + @Test + public void testFactorizeBufferedOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeBuffered(metricFactory)); + Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); + EasyMock.verify(metricFactory, capabilities); + } + + @Test + public void testFactorizeVectorOnUnsupportedComplexColumn() + { + Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeVector(vectorFactory)); + Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); + EasyMock.verify(vectorFactory, capabilities); + } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 247f924357aa..ec48865c210f 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -23,7 +23,9 @@ import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidInjectorBuilder; +import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.PeriodGranularity; @@ -71,6 +73,7 @@ import org.apache.druid.sql.guice.SqlModule; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; +import org.apache.druid.timeline.partition.NumberedShardSpec; import org.joda.time.DateTimeZone; import org.joda.time.Period; import org.junit.Assert; @@ -158,6 +161,15 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker( .size(0) .build(), index + ).add( + DataSegment.builder() + .dataSource(CalciteTests.WIKIPEDIA_FIRST_LAST) + .interval(Intervals.of("2015-09-12/2015-09-13")) + .version("1") + .shardSpec(new NumberedShardSpec(0, 0)) + .size(0) + .build(), + TestDataBuilder.makeWikipediaIndexWithAggregation(tempDirProducer.newTempFolder()) ); } } @@ -373,6 +385,33 @@ public void testAvgDailyCountDistinctThetaSketch() ); } + @Test + public void testApproxCountDistinctOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " + + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" + + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", + e.getMessage()); + } + } + + @Test + public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT APPROX_COUNT_DISTINCT_DS_THETA(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertTrue(e.getMessage().contains("Cannot apply 'APPROX_COUNT_DISTINCT_DS_THETA' to arguments of type 'APPROX_COUNT_DISTINCT_DS_THETA(>)'")); + } + } + @Test public void testThetaSketchPostAggs() { diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 5617bc28c960..b3451470b39b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -103,17 +103,7 @@ public HyperUniquesAggregatorFactory( @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { - ColumnCapabilities columnCapabilities = metricFactory.getColumnCapabilities(fieldName); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); - } - } - + validateInputs(metricFactory.getColumnCapabilities(fieldName)); BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopAggregator.instance(); @@ -124,17 +114,7 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { - ColumnCapabilities columnCapabilities = metricFactory.getColumnCapabilities(fieldName); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); - } - } - + validateInputs(metricFactory.getColumnCapabilities(fieldName)); BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopBufferAggregator.instance(); @@ -146,8 +126,19 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) { final ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(fieldName); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); + validateInputs(columnCapabilities); + + if (!Types.is(columnCapabilities, ValueType.COMPLEX)) { + return NoopVectorAggregator.instance(); + } else { + return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); + } + } + + private void validateInputs(@Nullable ColumnCapabilities capabilities) + { + if (capabilities != null) { + final ColumnType type = capabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { throw new UOE("Using aggregation type %s is not supported for %s column. " + "Use a different aggregator type and run the query again.", @@ -155,12 +146,6 @@ public VectorAggregator factorizeVector(final VectorColumnSelectorFactory select type); } } - - if (!Types.is(columnCapabilities, ValueType.COMPLEX)) { - return NoopVectorAggregator.instance(); - } else { - return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); - } } @Override diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java index 421a457999d9..5e1f077a16fb 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java @@ -25,15 +25,29 @@ import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.hll.VersionZeroHyperLogLogCollector; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentMatchers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; import java.nio.ByteBuffer; import java.util.Comparator; import java.util.Random; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@RunWith(MockitoJUnitRunner.class) public class HyperUniquesAggregatorFactoryTest { static final HyperUniquesAggregatorFactory AGGREGATOR_FACTORY = new HyperUniquesAggregatorFactory( @@ -44,6 +58,18 @@ public class HyperUniquesAggregatorFactoryTest private final HashFunction fn = Hashing.murmur3_128(); + @Mock private ColumnCapabilities capabilities; + @Mock private ColumnSelectorFactory metricFactory; + @Mock private VectorColumnSelectorFactory selectorFactory; + + @Before + public void setup() + { + Mockito.doReturn(ColumnType.NESTED_DATA).when(capabilities).toColumnType(); + Mockito.doReturn(capabilities).when(metricFactory).getColumnCapabilities(ArgumentMatchers.any()); + Mockito.doReturn(capabilities).when(selectorFactory).getColumnCapabilities(ArgumentMatchers.any()); + } + @Test public void testDeserializeV0() { @@ -216,4 +242,25 @@ public void testSerde() throws Exception Assert.assertEquals(factory, factory2); } + + @Test + public void testFactorizeOnUnsupportedComplexColumn() + { + Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorize(metricFactory)); + Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + } + + @Test + public void testFactorizeBufferedOnUnsupportedComplexColumn() + { + Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorizeBuffered(metricFactory)); + Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + } + + @Test + public void testFactorizeVectorOnUnsupportedComplexColumn() + { + Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorizeVector(selectorFactory)); + Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index ec40fb3f871d..c0bd6808edc5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -2674,6 +2674,34 @@ public void testHavingOnApproximateCountDistinct() ); } + @Test + public void testApproxCountDistinctOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " + + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" + + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", + e.getMessage()); + } + } + + @Test + public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT APPROX_COUNT_DISTINCT(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); + Assert.fail("query planning should fail"); + } + catch (DruidException e) { + Assert.assertTrue(e.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'")); + } + } + @Test public void testHavingOnExactCountDistinct() { From c2213c809fb949910a3f4a0dbba0982b6b4aaf3e Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 19:25:48 +0530 Subject: [PATCH 09/20] Minor cleanup --- .../hll/HllSketchMergeAggregatorFactory.java | 53 ++++++------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 833a627f12c4..c838130d3dd8 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -111,19 +111,7 @@ protected byte getCacheTypeId() @Override public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) { - ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && - !(ValueType.COMPLEX.equals(type.getType()) && - (Objects.equals(type.getComplexTypeName(), "HLLSketch") || - Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); - } - } + validateInputs(columnSelectorFactory.getColumnCapabilities(getFieldName())); final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); return new HllSketchMergeAggregator(selector, getLgK(), TgtHllType.valueOf(getTgtHllType())); @@ -133,19 +121,7 @@ public Aggregator factorize(final ColumnSelectorFactory columnSelectorFactory) @Override public BufferAggregator factorizeBuffered(final ColumnSelectorFactory columnSelectorFactory) { - ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(getFieldName()); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && - !(ValueType.COMPLEX.equals(type.getType()) && - (Objects.equals(type.getComplexTypeName(), "HLLSketch") || - Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); - } - } + validateInputs(columnSelectorFactory.getColumnCapabilities(getFieldName())); final ColumnValueSelector selector = columnSelectorFactory.makeColumnValueSelector(getFieldName()); return new HllSketchMergeBufferAggregator( @@ -165,9 +141,20 @@ public boolean canVectorize(ColumnInspector columnInspector) @Override public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) { - ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(getFieldName()); - if (columnCapabilities != null) { - final ColumnType type = columnCapabilities.toColumnType(); + validateInputs(selectorFactory.getColumnCapabilities(getFieldName())); + return new HllSketchMergeVectorAggregator( + selectorFactory, + getFieldName(), + getLgK(), + TgtHllType.valueOf(getTgtHllType()), + getMaxIntermediateSize() + ); + } + + private void validateInputs(@Nullable ColumnCapabilities capabilities) + { + if (capabilities != null) { + final ColumnType type = capabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !(ValueType.COMPLEX.equals(type.getType()) && (Objects.equals(type.getComplexTypeName(), "HLLSketch") || @@ -178,14 +165,6 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact type); } } - - return new HllSketchMergeVectorAggregator( - selectorFactory, - getFieldName(), - getLgK(), - TgtHllType.valueOf(getTgtHllType()), - getMaxIntermediateSize() - ); } @Override From 6c8a18ef5df10c4683573d35d2a013f98c8df35f Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 19:37:54 +0530 Subject: [PATCH 10/20] Minor cleanup --- .../datasketches/theta/SketchAggregatorFactory.java | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index a83e1f9a0e71..cdd952eba9e3 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -27,7 +27,6 @@ import org.apache.datasketches.theta.SetOperation; import org.apache.datasketches.theta.Union; import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.druid.error.InvalidInput; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregateCombiner; @@ -83,10 +82,6 @@ public SketchAggregatorFactory(String name, String fieldName, Integer size, byte public Aggregator factorize(ColumnSelectorFactory metricFactory) { validateInputs(metricFactory.getColumnCapabilities(fieldName)); - ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isArray()) { - throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); - } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); return new SketchAggregator(selector, size); } @@ -95,10 +90,6 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) { validateInputs(metricFactory.getColumnCapabilities(fieldName)); - ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isArray()) { - throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); - } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); final SketchAggregator aggregator = new SketchAggregator(selector, size); return new AggregatorAndSize(aggregator, aggregator.getInitialSizeBytes()); @@ -109,10 +100,6 @@ public AggregatorAndSize factorizeWithSize(ColumnSelectorFactory metricFactory) public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { validateInputs(metricFactory.getColumnCapabilities(fieldName)); - ColumnCapabilities capabilities = metricFactory.getColumnCapabilities(fieldName); - if (capabilities != null && capabilities.isArray()) { - throw InvalidInput.exception("ARRAY types are not supported for theta sketch"); - } BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); return new SketchBufferAggregator(selector, size, getMaxIntermediateSizeWithNulls()); } From acb3ad645c2616a4e3cd512fadc61bd42ca219dc Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 4 Jul 2024 20:55:42 +0530 Subject: [PATCH 11/20] Fix failing tests --- .../calcite/CalciteNestedDataQueryTest.java | 29 +++++++++++++++++++ .../druid/sql/calcite/CalciteQueryTest.java | 28 ------------------ 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index e2bc9d45eb27..07b70fe64589 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -7321,6 +7321,35 @@ public void testNvlJsonValueDoubleSometimesMissingRangeFilter() ); } + @Test + public void testApproxCountDistinctOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT COUNT(DISTINCT nester) FROM druid.nested", ImmutableList.of(), ImmutableList.of()); + Assertions.fail("query planning should fail"); + } + catch (DruidException e) { + Assertions.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " + + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" + + " approximation and use COUNT(DISTINCT nester) and run the query again.]", + e.getMessage()); + } + } + + @Test + public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() + { + try { + testQuery("SELECT APPROX_COUNT_DISTINCT(nester) FROM druid.nested", ImmutableList.of(), ImmutableList.of()); + Assertions.fail("query planning should fail"); + } + catch (DruidException e) { + System.out.println("e.getMessage() = " + e.getMessage()); + Assertions.assertTrue(e.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'")); + } + } + @Test public void testNvlJsonValueDoubleSometimesMissingEqualityFilter() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index c0bd6808edc5..ec40fb3f871d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -2674,34 +2674,6 @@ public void testHavingOnApproximateCountDistinct() ); } - @Test - public void testApproxCountDistinctOnUnsupportedComplexColumn() - { - try { - testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " - + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" - + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", - e.getMessage()); - } - } - - @Test - public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() - { - try { - testQuery("SELECT APPROX_COUNT_DISTINCT(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertTrue(e.getMessage().contains( - "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'")); - } - } - @Test public void testHavingOnExactCountDistinct() { From 32df559d270b0b8d0c6ffaf99f06a76213a0e8ac Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Mon, 8 Jul 2024 19:13:25 +0530 Subject: [PATCH 12/20] Address review comments --- .../hll/HllSketchMergeAggregatorFactory.java | 23 +++++---- ...ketchApproxCountDistinctSqlAggregator.java | 12 +++-- .../hll/sql/HllSketchBaseSqlAggregator.java | 16 ++++--- .../theta/SketchAggregatorFactory.java | 19 ++++---- ...ketchApproxCountDistinctSqlAggregator.java | 10 ++-- .../sql/ThetaSketchBaseSqlAggregator.java | 5 +- .../HllSketchMergeAggregatorFactoryTest.java | 48 ++++++++++--------- .../theta/SketchAggregatorFactoryTest.java | 42 +++++++--------- .../HyperUniquesAggregatorFactory.java | 14 ++++-- .../HyperUniquesAggregatorFactoryTest.java | 35 ++++++-------- ...iltinApproxCountDistinctSqlAggregator.java | 5 +- 11 files changed, 124 insertions(+), 105 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index c838130d3dd8..ee5918910312 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -24,8 +24,8 @@ import org.apache.datasketches.hll.HllSketch; import org.apache.datasketches.hll.TgtHllType; import org.apache.datasketches.hll.Union; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; @@ -41,7 +41,6 @@ import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import javax.annotation.Nullable; -import java.util.Objects; /** * This aggregator factory is for merging existing sketches. @@ -151,18 +150,22 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact ); } + /** + * Validates whether the aggregator supports the input column type. + * @param capabilities + */ private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { final ColumnType type = capabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && - !(ValueType.COMPLEX.equals(type.getType()) && - (Objects.equals(type.getComplexTypeName(), "HLLSketch") || - Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")))) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); + boolean isUnsupportedComplexType = ValueType.COMPLEX.equals(type.getType()) && + (HllSketchModule.TYPE_NAME.equals(type.getComplexTypeName()) || + HllSketchModule.BUILD_TYPE_NAME.equals(type.getComplexTypeName())); + if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !isUnsupportedComplexType) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Using aggregator [%s] is not supported for complex columns with type [%s].", + getIntermediateType().getComplexTypeName(), type); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java index f02594963e3a..1644cb17a241 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java @@ -40,11 +40,15 @@ import java.util.Collections; +/** + * Approximate count distinct aggregator using HLL sketches. + * Supported column types: String, Numeric, HLLSketchMerge, HLLSketchBuild. + */ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlAggregator implements SqlAggregator { public static final String NAME = "APPROX_COUNT_DISTINCT_DS_HLL"; - private static final SqlSingleOperandTypeChecker COLUMN_ALLOWED_TYPES = OperandTypes.or( + private static final SqlSingleOperandTypeChecker AGGREGATED_COLUMN_TYPE_CHECKER = OperandTypes.or( OperandTypes.STRING, OperandTypes.NUMERIC, RowSignatures.complexTypeChecker(HllSketchMergeAggregatorFactory.TYPE), @@ -57,12 +61,12 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA .operandTypeChecker( OperandTypes.or( // APPROX_COUNT_DISTINCT_DS_HLL(column) - OperandTypes.and(COLUMN_ALLOWED_TYPES, OperandTypes.family(SqlTypeFamily.ANY)), + OperandTypes.and(AGGREGATED_COLUMN_TYPE_CHECKER, OperandTypes.family(SqlTypeFamily.ANY)), // APPROX_COUNT_DISTINCT_DS_HLL(column, lgk) OperandTypes.and( OperandTypes.sequence( StringUtils.format("'%s(column, lgk)'", NAME), - COLUMN_ALLOWED_TYPES, + AGGREGATED_COLUMN_TYPE_CHECKER, CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL ), OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC) @@ -71,7 +75,7 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA OperandTypes.and( OperandTypes.sequence( StringUtils.format("'%s(column, lgk, tgtHllType)'", NAME), - COLUMN_ALLOWED_TYPES, + AGGREGATED_COLUMN_TYPE_CHECKER, CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL, OperandTypes.STRING ), diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index 54f1f008f6fc..9d9a376ed26c 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -31,6 +31,7 @@ import org.apache.druid.query.aggregation.datasketches.hll.HllSketchAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchBuildAggregatorFactory; import org.apache.druid.query.aggregation.datasketches.hll.HllSketchMergeAggregatorFactory; +import org.apache.druid.query.aggregation.datasketches.hll.HllSketchModule; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.segment.column.ColumnType; @@ -46,7 +47,6 @@ import javax.annotation.Nullable; import java.util.List; -import java.util.Objects; public abstract class HllSketchBaseSqlAggregator implements SqlAggregator { @@ -119,8 +119,9 @@ public Aggregation toDruidAggregation( .map(type -> type.is(ValueType.COMPLEX) && ( HllSketchMergeAggregatorFactory.TYPE.equals(type) || - Objects.equals(type.getComplexTypeName(), "HLLSketch") || - Objects.equals(type.getComplexTypeName(), "HLLSketchBuild")) + HllSketchModule.TYPE_NAME.equals(type.getComplexTypeName()) || + HllSketchModule.BUILD_TYPE_NAME.equals(type.getComplexTypeName()) + ) ) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( @@ -161,8 +162,8 @@ public Aggregation toDruidAggregation( if (inputType.is(ValueType.COMPLEX)) { if (HllSketchMergeAggregatorFactory.TYPE.equals(inputType) || - Objects.equals(inputType.getComplexTypeName(), "HLLSketch") || - Objects.equals(inputType.getComplexTypeName(), "HLLSketchBuild")) { + HllSketchModule.TYPE_NAME.equals(inputType.getComplexTypeName()) || + HllSketchModule.BUILD_TYPE_NAME.equals(inputType.getComplexTypeName())) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, dimensionSpec.getOutputName(), @@ -191,7 +192,10 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " + + "is not supported for %s column. You can disable approximation and use " + + "COUNT(DISTINCT %s) and run the query again.", + columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); return null; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index cdd952eba9e3..e85e0e0e8fd1 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -27,7 +27,7 @@ import org.apache.datasketches.theta.SetOperation; import org.apache.datasketches.theta.Union; import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.druid.java.util.common.ISE; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.Aggregator; @@ -111,6 +111,10 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact return new SketchVectorAggregator(selectorFactory, fieldName, size, getMaxIntermediateSizeWithNulls()); } + /** + * Validates whether the aggregator supports the input column type. + * @param capabilities + */ private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { @@ -119,13 +123,12 @@ private void validateInputs(@Nullable ColumnCapabilities capabilities) SketchModule.MERGE_TYPE.equals(capabilities.toColumnType()) || SketchModule.BUILD_TYPE.equals(capabilities.toColumnType()))) ) { - throw new ISE( - "Invalid input [%s] of type [%s] for [%s] aggregator [%s]", - getFieldName(), - capabilities.asTypeString(), - getIntermediateType(), - getName() - ); + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Unsupported input [%s] of type [%s] for aggregator [%s].", + getFieldName(), + capabilities.asTypeString(), + getIntermediateType()); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java index babaf4341ce2..b48249da6695 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java @@ -38,11 +38,15 @@ import java.util.Collections; +/** + * Approximate count distinct aggregator using theta sketches. + * Supported column types: String, Numeric, Theta Sketch. + */ public class ThetaSketchApproxCountDistinctSqlAggregator extends ThetaSketchBaseSqlAggregator implements SqlAggregator { public static final String NAME = "APPROX_COUNT_DISTINCT_DS_THETA"; - private static final SqlSingleOperandTypeChecker COLUMN_ALLOWED_TYPES = OperandTypes.or( + private static final SqlSingleOperandTypeChecker AGGREGATED_COLUMN_TYPE_CHECKER = OperandTypes.or( OperandTypes.STRING, OperandTypes.NUMERIC, RowSignatures.complexTypeChecker(SketchModule.THETA_SKETCH_TYPE) @@ -54,12 +58,12 @@ public class ThetaSketchApproxCountDistinctSqlAggregator extends ThetaSketchBase .operandTypeChecker( OperandTypes.or( // APPROX_COUNT_DISTINCT_DS_THETA(expr) - OperandTypes.and(COLUMN_ALLOWED_TYPES, OperandTypes.family(SqlTypeFamily.ANY)), + OperandTypes.and(AGGREGATED_COLUMN_TYPE_CHECKER, OperandTypes.family(SqlTypeFamily.ANY)), // APPROX_COUNT_DISTINCT_DS_THETA(expr, size) OperandTypes.and( OperandTypes.sequence( StringUtils.format("'%s(expr, size)'", NAME), - COLUMN_ALLOWED_TYPES, + AGGREGATED_COLUMN_TYPE_CHECKER, CastedLiteralOperandTypeCheckers.POSITIVE_INTEGER_LITERAL ), OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.EXACT_NUMERIC) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 38eb677479af..415d90e03742 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -146,7 +146,10 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " + + "is not supported for %s column. You can disable approximation and use " + + "COUNT(DISTINCT %s) and run the query again.", + columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); return null; } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index de6bb933610f..d5ac40ba7394 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -22,16 +22,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import nl.jqno.equalsverifier.EqualsVerifier; import org.apache.datasketches.hll.TgtHllType; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringEncoding; -import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.TestColumnSelectorFactory; import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.TestVectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; -import org.easymock.EasyMock; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -51,9 +52,8 @@ public class HllSketchMergeAggregatorFactoryTest private HllSketchMergeAggregatorFactory targetRound; private HllSketchMergeAggregatorFactory targetNoRound; - private final ColumnSelectorFactory metricFactory = EasyMock.mock(ColumnSelectorFactory.class); - private final VectorColumnSelectorFactory vectorFactory = EasyMock.mock(VectorColumnSelectorFactory.class); - private final ColumnCapabilities capabilities = EasyMock.mock(ColumnCapabilities.class); + private ColumnSelectorFactory metricFactory; + private VectorColumnSelectorFactory vectorFactory; @Before public void setUp() @@ -77,10 +77,9 @@ public void setUp() !ROUND ); - EasyMock.expect(metricFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); - EasyMock.expect(vectorFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); - EasyMock.expect(capabilities.toColumnType()).andReturn(ColumnType.NESTED_DATA).anyTimes(); - EasyMock.replay(metricFactory, vectorFactory, capabilities); + final ColumnCapabilitiesImpl columnCapabilities = ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA); + metricFactory = new TestColumnSelectorFactory().addCapabilities(FIELD_NAME, columnCapabilities); + vectorFactory = new TestVectorColumnSelectorFactory().addCapabilities(FIELD_NAME, columnCapabilities); } @Test(expected = AggregatorFactoryNotMergeableException.class) @@ -310,27 +309,32 @@ public void testWithName() throws Exception @Test public void testFactorizeOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorize(metricFactory)); - Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); - - EasyMock.verify(metricFactory, capabilities); + final ColumnSelectorFactory metricFactory = new TestColumnSelectorFactory() + .addCapabilities( + FIELD_NAME, + ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA) + ); + Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorize(metricFactory)); + Assert.assertEquals( + "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", + exception.getMessage()); } @Test public void testFactorizeBufferedOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorizeBuffered(metricFactory)); - Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); - - EasyMock.verify(metricFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorizeBuffered(metricFactory)); + Assert.assertEquals( + "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", + exception.getMessage()); } @Test public void testFactorizeVectorOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(UOE.class, () -> targetRound.factorizeVector(vectorFactory)); - Assert.assertEquals("Using aggregation type HLLSketchMerge is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); - - EasyMock.verify(vectorFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorizeVector(vectorFactory)); + Assert.assertEquals( + "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", + exception.getMessage()); } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java index 3cd2f48018d8..23887652a735 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactoryTest.java @@ -20,7 +20,7 @@ package org.apache.druid.query.aggregation.datasketches.theta; import com.google.common.collect.ImmutableList; -import org.apache.druid.java.util.common.ISE; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.Druids; import org.apache.druid.query.aggregation.AggregatorAndSize; @@ -33,10 +33,11 @@ import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.TestColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.TestVectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.easymock.EasyMock; import org.junit.Assert; @@ -51,20 +52,15 @@ public class SketchAggregatorFactoryTest private static final SketchMergeAggregatorFactory AGGREGATOR_32768 = new SketchMergeAggregatorFactory("x", "x", 32768, null, false, null); - private final ColumnSelectorFactory metricFactory = EasyMock.mock(ColumnSelectorFactory.class); - private final VectorColumnSelectorFactory vectorFactory = EasyMock.mock(VectorColumnSelectorFactory.class); - private final ColumnCapabilities capabilities = EasyMock.mock(ColumnCapabilities.class); + private ColumnSelectorFactory metricFactory; + private VectorColumnSelectorFactory vectorFactory; @Before public void setup() { - EasyMock.expect(metricFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); - EasyMock.expect(vectorFactory.getColumnCapabilities(EasyMock.anyString())).andReturn(capabilities).anyTimes(); - EasyMock.expect(capabilities.toColumnType()).andReturn(ColumnType.NESTED_DATA).anyTimes(); - EasyMock.expect(capabilities.isArray()).andReturn(false).anyTimes(); - EasyMock.expect(capabilities.is(EasyMock.eq(ValueType.COMPLEX))).andReturn(true).anyTimes(); - EasyMock.expect(capabilities.asTypeString()).andReturn(ColumnType.NESTED_DATA.asTypeString()).anyTimes(); - EasyMock.replay(metricFactory, vectorFactory, capabilities); + final ColumnCapabilitiesImpl columnCapabilities = ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA); + metricFactory = new TestColumnSelectorFactory().addCapabilities("x", columnCapabilities); + vectorFactory = new TestVectorColumnSelectorFactory().addCapabilities("x", columnCapabilities); } @Test @@ -193,32 +189,28 @@ public void testWithName() @Test public void testFactorizeOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorize(metricFactory)); - Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); - EasyMock.verify(metricFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> AGGREGATOR_16384.factorize(metricFactory)); + Assert.assertEquals("Unsupported input [x] of type [COMPLEX] for aggregator [COMPLEX].", exception.getMessage()); } @Test public void testFactorizeWithSizeOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeWithSize(metricFactory)); - Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); - EasyMock.verify(metricFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> AGGREGATOR_16384.factorizeWithSize(metricFactory)); + Assert.assertEquals("Unsupported input [x] of type [COMPLEX] for aggregator [COMPLEX].", exception.getMessage()); } @Test public void testFactorizeBufferedOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeBuffered(metricFactory)); - Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); - EasyMock.verify(metricFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> AGGREGATOR_16384.factorizeBuffered(metricFactory)); + Assert.assertEquals("Unsupported input [x] of type [COMPLEX] for aggregator [COMPLEX].", exception.getMessage()); } @Test public void testFactorizeVectorOnUnsupportedComplexColumn() { - Throwable exception = Assert.assertThrows(ISE.class, () -> AGGREGATOR_16384.factorizeVector(vectorFactory)); - Assert.assertEquals("Invalid input [x] of type [COMPLEX] for [COMPLEX] aggregator [x]", exception.getMessage()); - EasyMock.verify(vectorFactory, capabilities); + Throwable exception = Assert.assertThrows(DruidException.class, () -> AGGREGATOR_16384.factorizeVector(vectorFactory)); + Assert.assertEquals("Unsupported input [x] of type [COMPLEX] for aggregator [COMPLEX].", exception.getMessage()); } } diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index b3451470b39b..0b0f64c6a27b 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -21,9 +21,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.DruidException; import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.UOE; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.query.aggregation.AggregateCombiner; import org.apache.druid.query.aggregation.Aggregator; @@ -135,15 +135,19 @@ public VectorAggregator factorizeVector(final VectorColumnSelectorFactory select } } + /** + * Validates whether the aggregator supports the input column type. + * @param capabilities + */ private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { final ColumnType type = capabilities.toColumnType(); if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { - throw new UOE("Using aggregation type %s is not supported for %s column. " - + "Use a different aggregator type and run the query again.", - getIntermediateType().getComplexTypeName(), - type); + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.UNSUPPORTED) + .build("Using aggregator [%s] is not supported for complex columns with type [%s].", + getIntermediateType().getComplexTypeName(), type); } } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java index 5e1f077a16fb..5d1f512c75ca 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java @@ -22,24 +22,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; +import org.apache.druid.error.DruidException; import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.hll.VersionZeroHyperLogLogCollector; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.TestColumnSelectorFactory; import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.vector.TestVectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; import java.nio.ByteBuffer; import java.util.Comparator; @@ -47,7 +44,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; -@RunWith(MockitoJUnitRunner.class) public class HyperUniquesAggregatorFactoryTest { static final HyperUniquesAggregatorFactory AGGREGATOR_FACTORY = new HyperUniquesAggregatorFactory( @@ -58,16 +54,15 @@ public class HyperUniquesAggregatorFactoryTest private final HashFunction fn = Hashing.murmur3_128(); - @Mock private ColumnCapabilities capabilities; - @Mock private ColumnSelectorFactory metricFactory; - @Mock private VectorColumnSelectorFactory selectorFactory; + private ColumnSelectorFactory metricFactory; + private VectorColumnSelectorFactory vectorFactory; @Before public void setup() { - Mockito.doReturn(ColumnType.NESTED_DATA).when(capabilities).toColumnType(); - Mockito.doReturn(capabilities).when(metricFactory).getColumnCapabilities(ArgumentMatchers.any()); - Mockito.doReturn(capabilities).when(selectorFactory).getColumnCapabilities(ArgumentMatchers.any()); + final ColumnCapabilitiesImpl columnCapabilities = ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA); + metricFactory = new TestColumnSelectorFactory().addCapabilities("uniques", columnCapabilities); + vectorFactory = new TestVectorColumnSelectorFactory().addCapabilities("uniques", columnCapabilities); } @Test @@ -246,21 +241,21 @@ public void testSerde() throws Exception @Test public void testFactorizeOnUnsupportedComplexColumn() { - Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorize(metricFactory)); - Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + Throwable exception = assertThrows(DruidException.class, () -> AGGREGATOR_FACTORY.factorize(metricFactory)); + Assert.assertEquals("Using aggregator [hyperUnique] is not supported for complex columns with type [COMPLEX].", exception.getMessage()); } @Test public void testFactorizeBufferedOnUnsupportedComplexColumn() { - Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorizeBuffered(metricFactory)); - Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + Throwable exception = assertThrows(DruidException.class, () -> AGGREGATOR_FACTORY.factorizeBuffered(metricFactory)); + Assert.assertEquals("Using aggregator [hyperUnique] is not supported for complex columns with type [COMPLEX].", exception.getMessage()); } @Test public void testFactorizeVectorOnUnsupportedComplexColumn() { - Throwable exception = assertThrows(UOE.class, () -> AGGREGATOR_FACTORY.factorizeVector(selectorFactory)); - Assert.assertEquals("Using aggregation type hyperUnique is not supported for COMPLEX column. Use a different aggregator type and run the query again.", exception.getMessage()); + Throwable exception = assertThrows(DruidException.class, () -> AGGREGATOR_FACTORY.factorizeVector(vectorFactory)); + Assert.assertEquals("Using aggregator [hyperUnique] is not supported for complex columns with type [COMPLEX].", exception.getMessage()); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 3bcd04c42522..142807e97529 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -142,7 +142,10 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for %s column. You can disable approximation and use COUNT(DISTINCT %s) and run the query again.", arg.getDruidType(), arg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " + + "is not supported for %s column. You can disable approximation and use " + + "COUNT(DISTINCT %s) and run the query again.", + arg.getDruidType(), arg.getSimpleExtraction().getColumn()); return null; } From 2a398f671ba4a25e2d56b68f8e59001b88a57385 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 10 Jul 2024 22:34:14 +0530 Subject: [PATCH 13/20] Address review comments --- .../hll/HllSketchMergeAggregatorFactory.java | 19 ++++++++---- .../hll/sql/HllSketchBaseSqlAggregator.java | 29 ++++++++++--------- .../theta/SketchAggregatorFactory.java | 14 +++++---- .../sql/ThetaSketchBaseSqlAggregator.java | 10 ++++--- .../hll/sql/HllSketchSqlAggregatorTest.java | 16 ++++------ .../sql/ThetaSketchSqlAggregatorTest.java | 16 ++++------ .../HyperUniquesAggregatorFactory.java | 7 +++-- ...iltinApproxCountDistinctSqlAggregator.java | 22 +++++++++----- .../calcite/CalciteNestedDataQueryTest.java | 24 +++++++-------- 9 files changed, 85 insertions(+), 72 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index ee5918910312..3e1cc7f022ed 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -158,14 +158,21 @@ private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { final ColumnType type = capabilities.toColumnType(); - boolean isUnsupportedComplexType = ValueType.COMPLEX.equals(type.getType()) && - (HllSketchModule.TYPE_NAME.equals(type.getComplexTypeName()) || - HllSketchModule.BUILD_TYPE_NAME.equals(type.getComplexTypeName())); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !isUnsupportedComplexType) { + boolean isSupportedComplexType = ValueType.COMPLEX.equals(type.getType()) && + ( + HllSketchModule.TYPE_NAME.equals(type.getComplexTypeName()) || + HllSketchModule.BUILD_TYPE_NAME.equals(type.getComplexTypeName()) || + HllSketchModule.MERGE_TYPE_NAME.equals(type.getComplexTypeName()) || + type.getComplexTypeName() == null + ); + if (!isSupportedComplexType) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Using aggregator [%s] is not supported for complex columns with type [%s].", - getIntermediateType().getComplexTypeName(), type); + .build( + "Using aggregator [%s] is not supported for complex columns with type [%s].", + getIntermediateType().getComplexTypeName(), + type + ); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index 9d9a376ed26c..e3313131e22a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -116,13 +116,7 @@ public Aggregation toDruidAggregation( if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && - ( - HllSketchMergeAggregatorFactory.TYPE.equals(type) || - HllSketchModule.TYPE_NAME.equals(type.getComplexTypeName()) || - HllSketchModule.BUILD_TYPE_NAME.equals(type.getComplexTypeName()) - ) - ) + .map(type -> type.is(ValueType.COMPLEX) && validateInputComplexTypeName(type)) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, @@ -161,9 +155,7 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if (HllSketchMergeAggregatorFactory.TYPE.equals(inputType) || - HllSketchModule.TYPE_NAME.equals(inputType.getComplexTypeName()) || - HllSketchModule.BUILD_TYPE_NAME.equals(inputType.getComplexTypeName())) { + if (validateInputComplexTypeName(inputType)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, dimensionSpec.getOutputName(), @@ -192,10 +184,12 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " - + "is not supported for %s column. You can disable approximation and use " - + "COUNT(DISTINCT %s) and run the query again.", - columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + columnArg.getDruidType(), + columnArg.getSimpleExtraction().getColumn() + ); return null; } @@ -211,4 +205,11 @@ protected abstract Aggregation toAggregation( boolean finalizeAggregations, AggregatorFactory aggregatorFactory ); + + private boolean validateInputComplexTypeName(ColumnType columnType) + { + return HllSketchMergeAggregatorFactory.TYPE.equals(columnType) || + HllSketchModule.TYPE_NAME.equals(columnType.getComplexTypeName()) || + HllSketchModule.BUILD_TYPE_NAME.equals(columnType.getComplexTypeName()); + } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index e85e0e0e8fd1..c41ce5e6edd2 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -118,17 +118,21 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { - if (capabilities.isArray() || (capabilities.is(ValueType.COMPLEX) && !( + boolean isSupportedComplexType = capabilities.is(ValueType.COMPLEX) && !( SketchModule.THETA_SKETCH_TYPE.equals(capabilities.toColumnType()) || SketchModule.MERGE_TYPE.equals(capabilities.toColumnType()) || - SketchModule.BUILD_TYPE.equals(capabilities.toColumnType()))) - ) { + SketchModule.BUILD_TYPE.equals(capabilities.toColumnType()) + ); + + if (capabilities.isArray() || isSupportedComplexType) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Unsupported input [%s] of type [%s] for aggregator [%s].", + .build( + "Unsupported input [%s] of type [%s] for aggregator [%s].", getFieldName(), capabilities.asTypeString(), - getIntermediateType()); + getIntermediateType() + ); } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 415d90e03742..4433c6459362 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -146,10 +146,12 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " - + "is not supported for %s column. You can disable approximation and use " - + "COUNT(DISTINCT %s) and run the query again.", - columnArg.getDruidType(), columnArg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + columnArg.getDruidType(), + columnArg.getSimpleExtraction().getColumn() + ); return null; } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 8821e2e9e096..a1837b992307 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -527,16 +527,12 @@ public void testApproxCountDistinctHllSketchIsRounded() @Test public void testApproxCountDistinctOnUnsupportedComplexColumn() { - try { - testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " - + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" - + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", - e.getMessage()); - } + assertQueryIsUnplannable( + "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", + "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " + + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You " + + "can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + ); } @Test diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index ec48865c210f..f4956b7540a3 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -388,16 +388,12 @@ public void testAvgDailyCountDistinctThetaSketch() @Test public void testApproxCountDistinctOnUnsupportedComplexColumn() { - try { - testQuery("SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " - + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" - + " approximation and use COUNT(DISTINCT double_first_added) and run the query again.]", - e.getMessage()); - } + assertQueryIsUnplannable( + "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", + "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " + + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You " + + "can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + ); } @Test diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 0b0f64c6a27b..a01003830780 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -146,8 +146,11 @@ private void validateInputs(@Nullable ColumnCapabilities capabilities) if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) - .build("Using aggregator [%s] is not supported for complex columns with type [%s].", - getIntermediateType().getComplexTypeName(), type); + .build( + "Using aggregator [%s] is not supported for complex columns with type [%s].", + getIntermediateType().getComplexTypeName(), + type + ); } } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 142807e97529..a02a24fc2080 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -96,9 +96,7 @@ public Aggregation toDruidAggregation( if (arg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(arg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) - && (Objects.equals(type.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(type.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName())) - ) + .map(type -> type.is(ValueType.COMPLEX) && validateInputComplexTypeName(type)) .orElse(false)) { aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true); } else { @@ -122,7 +120,7 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if ((Objects.equals(inputType.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(inputType.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName()))) { + if (validateInputComplexTypeName(inputType)) { aggregatorFactory = new HyperUniquesAggregatorFactory( aggregatorName, dimensionSpec.getOutputName(), @@ -142,10 +140,12 @@ public Aggregation toDruidAggregation( } if (aggregatorFactory == null) { - plannerContext.setPlanningError("Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) " - + "is not supported for %s column. You can disable approximation and use " - + "COUNT(DISTINCT %s) and run the query again.", - arg.getDruidType(), arg.getSimpleExtraction().getColumn()); + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + arg.getDruidType(), + arg.getSimpleExtraction().getColumn() + ); return null; } @@ -177,4 +177,10 @@ private static class BuiltinApproxCountDistinctSqlAggFunction extends SqlAggFunc ); } } + + private boolean validateInputComplexTypeName(ColumnType columnType) + { + return Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || + Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName()); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 07b70fe64589..3997f948d15b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -7324,16 +7324,12 @@ public void testNvlJsonValueDoubleSometimesMissingRangeFilter() @Test public void testApproxCountDistinctOnUnsupportedComplexColumn() { - try { - testQuery("SELECT COUNT(DISTINCT nester) FROM druid.nested", ImmutableList.of(), ImmutableList.of()); - Assertions.fail("query planning should fail"); - } - catch (DruidException e) { - Assertions.assertEquals("Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling approximation " - + "with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable" - + " approximation and use COUNT(DISTINCT nester) and run the query again.]", - e.getMessage()); - } + assertQueryIsUnplannable( + "SELECT COUNT(DISTINCT nester) FROM druid.nested", + "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " + + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable approximation," + + " use COUNT(DISTINCT nester) and rerun the query.]" + ); } @Test @@ -7344,9 +7340,11 @@ public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() Assertions.fail("query planning should fail"); } catch (DruidException e) { - System.out.println("e.getMessage() = " + e.getMessage()); - Assertions.assertTrue(e.getMessage().contains( - "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'")); + Assertions.assertTrue( + e.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'" + ) + ); } } From 7ce44ed7884270f7695eca384555cd5d628d39d9 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 10 Jul 2024 22:40:51 +0530 Subject: [PATCH 14/20] Minor cleanup --- .../datasketches/theta/SketchAggregatorFactory.java | 4 ++-- .../hll/HllSketchMergeAggregatorFactoryTest.java | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index c41ce5e6edd2..241b4c9b5166 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -118,13 +118,13 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { - boolean isSupportedComplexType = capabilities.is(ValueType.COMPLEX) && !( + boolean isUnsupportedComplexType = capabilities.is(ValueType.COMPLEX) && !( SketchModule.THETA_SKETCH_TYPE.equals(capabilities.toColumnType()) || SketchModule.MERGE_TYPE.equals(capabilities.toColumnType()) || SketchModule.BUILD_TYPE.equals(capabilities.toColumnType()) ); - if (capabilities.isArray() || isSupportedComplexType) { + if (capabilities.isArray() || isUnsupportedComplexType) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) .build( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java index d5ac40ba7394..fcecef62d4aa 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactoryTest.java @@ -317,7 +317,8 @@ public void testFactorizeOnUnsupportedComplexColumn() Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorize(metricFactory)); Assert.assertEquals( "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", - exception.getMessage()); + exception.getMessage() + ); } @Test @@ -326,7 +327,8 @@ public void testFactorizeBufferedOnUnsupportedComplexColumn() Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorizeBuffered(metricFactory)); Assert.assertEquals( "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", - exception.getMessage()); + exception.getMessage() + ); } @Test @@ -335,6 +337,7 @@ public void testFactorizeVectorOnUnsupportedComplexColumn() Throwable exception = Assert.assertThrows(DruidException.class, () -> targetRound.factorizeVector(vectorFactory)); Assert.assertEquals( "Using aggregator [HLLSketchMerge] is not supported for complex columns with type [COMPLEX].", - exception.getMessage()); + exception.getMessage() + ); } } From 9428d1ed8590ecf27834ee92d3616b353318c13c Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 10 Jul 2024 23:43:39 +0530 Subject: [PATCH 15/20] Improve javadoc --- .../datasketches/hll/HllSketchMergeAggregatorFactory.java | 1 + .../datasketches/theta/SketchAggregatorFactory.java | 5 +++++ .../hyperloglog/HyperUniquesAggregatorFactory.java | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java index 3e1cc7f022ed..7d0bb79c71e1 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/HllSketchMergeAggregatorFactory.java @@ -152,6 +152,7 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact /** * Validates whether the aggregator supports the input column type. + * Supported column types are complex types of HLLSketch, HLLSketchBuild, HLLSketchMerge, as well as UNKNOWN_COMPLEX. * @param capabilities */ private void validateInputs(@Nullable ColumnCapabilities capabilities) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java index 241b4c9b5166..b24e382ec0aa 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/SketchAggregatorFactory.java @@ -113,6 +113,11 @@ public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFact /** * Validates whether the aggregator supports the input column type. + * Unsupported column types are: + *
    + *
  • Arrays
  • + *
  • Complex types of thetaSketch, thetaSketchMerge, thetaSketchBuild.
  • + *
* @param capabilities */ private void validateInputs(@Nullable ColumnCapabilities capabilities) diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index a01003830780..7419c001b0c7 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -137,13 +137,14 @@ public VectorAggregator factorizeVector(final VectorColumnSelectorFactory select /** * Validates whether the aggregator supports the input column type. + * Supported column types are complex types of hyperUnique, preComputedHyperUnique, as well as UNKNOWN_COMPLEX. * @param capabilities */ private void validateInputs(@Nullable ColumnCapabilities capabilities) { if (capabilities != null) { final ColumnType type = capabilities.toColumnType(); - if (!ColumnType.UNKNOWN_COMPLEX.equals(type) && !TYPE.equals(type) && !PRECOMPUTED_TYPE.equals(type)) { + if (!(ColumnType.UNKNOWN_COMPLEX.equals(type) || TYPE.equals(type) || PRECOMPUTED_TYPE.equals(type))) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.UNSUPPORTED) .build( From bc183c8c0ee40fc71a3f19c9da6c6ea66483d5ed Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 11 Jul 2024 20:07:36 +0530 Subject: [PATCH 16/20] Address review comments --- .../hll/sql/HllSketchBaseSqlAggregator.java | 6 ++--- .../hll/sql/HllSketchSqlAggregatorTest.java | 18 ++++++++----- .../sql/ThetaSketchSqlAggregatorTest.java | 18 ++++++++----- .../HyperUniquesAggregatorFactory.java | 7 +++-- .../HyperUniquesAggregatorFactoryTest.java | 27 ++++++++++++++++++- ...iltinApproxCountDistinctSqlAggregator.java | 6 ++--- .../calcite/CalciteNestedDataQueryTest.java | 23 ++++++++-------- 7 files changed, 69 insertions(+), 36 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index e3313131e22a..4cd8cd8232cf 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -116,7 +116,7 @@ public Aggregation toDruidAggregation( if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && validateInputComplexTypeName(type)) + .map(type -> type.is(ValueType.COMPLEX) && validateInputType(type)) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, @@ -155,7 +155,7 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if (validateInputComplexTypeName(inputType)) { + if (validateInputType(inputType)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, dimensionSpec.getOutputName(), @@ -206,7 +206,7 @@ protected abstract Aggregation toAggregation( AggregatorFactory aggregatorFactory ); - private boolean validateInputComplexTypeName(ColumnType columnType) + private boolean validateInputType(ColumnType columnType) { return HllSketchMergeAggregatorFactory.TYPE.equals(columnType) || HllSketchModule.TYPE_NAME.equals(columnType.getComplexTypeName()) || diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index a1837b992307..0af6007f71e3 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -538,13 +538,17 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() @Test public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() { - try { - testQuery("SELECT APPROX_COUNT_DISTINCT_DS_HLL(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertTrue(e.getMessage().contains("Cannot apply 'APPROX_COUNT_DISTINCT_DS_HLL' to arguments of type 'APPROX_COUNT_DISTINCT_DS_HLL(>)'")); - } + DruidException druidException = Assert.assertThrows( + DruidException.class, + () -> testQuery( + "SELECT APPROX_COUNT_DISTINCT_DS_HLL(double_first_added) FROM druid.wikipedia_first_last", + ImmutableList.of(), + ImmutableList.of() + ) + ); + Assert.assertTrue(druidException.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT_DS_HLL' to arguments of type 'APPROX_COUNT_DISTINCT_DS_HLL(>)'" + )); } @Test diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index f4956b7540a3..11218cf512a2 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -399,13 +399,17 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() @Test public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() { - try { - testQuery("SELECT APPROX_COUNT_DISTINCT_DS_THETA(double_first_added) FROM druid.wikipedia_first_last", ImmutableList.of(), ImmutableList.of()); - Assert.fail("query planning should fail"); - } - catch (DruidException e) { - Assert.assertTrue(e.getMessage().contains("Cannot apply 'APPROX_COUNT_DISTINCT_DS_THETA' to arguments of type 'APPROX_COUNT_DISTINCT_DS_THETA(>)'")); - } + DruidException druidException = Assert.assertThrows( + DruidException.class, + () -> testQuery( + "SELECT APPROX_COUNT_DISTINCT_DS_THETA(double_first_added) FROM druid.wikipedia_first_last", + ImmutableList.of(), + ImmutableList.of() + ) + ); + Assert.assertTrue(druidException.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT_DS_THETA' to arguments of type 'APPROX_COUNT_DISTINCT_DS_THETA(>)'" + )); } @Test diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java index 7419c001b0c7..c90b651fffa3 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactory.java @@ -103,22 +103,22 @@ public HyperUniquesAggregatorFactory( @Override public Aggregator factorize(ColumnSelectorFactory metricFactory) { - validateInputs(metricFactory.getColumnCapabilities(fieldName)); BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopAggregator.instance(); } + validateInputs(metricFactory.getColumnCapabilities(fieldName)); return new HyperUniquesAggregator(selector); } @Override public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) { - validateInputs(metricFactory.getColumnCapabilities(fieldName)); BaseObjectColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); if (selector instanceof NilColumnValueSelector) { return NoopBufferAggregator.instance(); } + validateInputs(metricFactory.getColumnCapabilities(fieldName)); return new HyperUniquesBufferAggregator(selector); } @@ -126,11 +126,10 @@ public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) public VectorAggregator factorizeVector(final VectorColumnSelectorFactory selectorFactory) { final ColumnCapabilities columnCapabilities = selectorFactory.getColumnCapabilities(fieldName); - validateInputs(columnCapabilities); - if (!Types.is(columnCapabilities, ValueType.COMPLEX)) { return NoopVectorAggregator.instance(); } else { + validateInputs(columnCapabilities); return new HyperUniquesVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); } } diff --git a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java index 5d1f512c75ca..c4df70a88de8 100644 --- a/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java +++ b/processing/src/test/java/org/apache/druid/query/aggregation/hyperloglog/HyperUniquesAggregatorFactoryTest.java @@ -22,12 +22,17 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; +import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; import org.apache.druid.hll.HyperLogLogCollector; import org.apache.druid.hll.VersionZeroHyperLogLogCollector; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.NoopAggregator; +import org.apache.druid.query.aggregation.NoopBufferAggregator; +import org.apache.druid.query.aggregation.NoopVectorAggregator; import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.NilColumnValueSelector; import org.apache.druid.segment.TestColumnSelectorFactory; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; @@ -46,6 +51,10 @@ public class HyperUniquesAggregatorFactoryTest { + static { + NullHandling.initializeForTests(); + } + static final HyperUniquesAggregatorFactory AGGREGATOR_FACTORY = new HyperUniquesAggregatorFactory( "hyperUnique", "uniques" @@ -61,7 +70,9 @@ public class HyperUniquesAggregatorFactoryTest public void setup() { final ColumnCapabilitiesImpl columnCapabilities = ColumnCapabilitiesImpl.createDefault().setType(ColumnType.NESTED_DATA); - metricFactory = new TestColumnSelectorFactory().addCapabilities("uniques", columnCapabilities); + metricFactory = new TestColumnSelectorFactory() + .addCapabilities("uniques", columnCapabilities) + .addColumnSelector("uniques", null); vectorFactory = new TestVectorColumnSelectorFactory().addCapabilities("uniques", columnCapabilities); } @@ -238,6 +249,20 @@ public void testSerde() throws Exception Assert.assertEquals(factory, factory2); } + @Test + public void testFactorizeOnPrimitiveColumnType() + { + final ColumnCapabilitiesImpl columnCapabilities = ColumnCapabilitiesImpl.createDefault().setType(ColumnType.LONG); + final ColumnSelectorFactory metricFactory = new TestColumnSelectorFactory() + .addCapabilities("uniques", columnCapabilities) + .addColumnSelector("uniques", NilColumnValueSelector.instance()); + final VectorColumnSelectorFactory vectorFactory = new TestVectorColumnSelectorFactory().addCapabilities("uniques", columnCapabilities); + + Assert.assertEquals(NoopAggregator.instance(), AGGREGATOR_FACTORY.factorize(metricFactory)); + Assert.assertEquals(NoopBufferAggregator.instance(), AGGREGATOR_FACTORY.factorizeBuffered(metricFactory)); + Assert.assertEquals(NoopVectorAggregator.instance(), AGGREGATOR_FACTORY.factorizeVector(vectorFactory)); + } + @Test public void testFactorizeOnUnsupportedComplexColumn() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index a02a24fc2080..3440920c3961 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -96,7 +96,7 @@ public Aggregation toDruidAggregation( if (arg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(arg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && validateInputComplexTypeName(type)) + .map(type -> type.is(ValueType.COMPLEX) && validateInputType(type)) .orElse(false)) { aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true); } else { @@ -120,7 +120,7 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if (validateInputComplexTypeName(inputType)) { + if (validateInputType(inputType)) { aggregatorFactory = new HyperUniquesAggregatorFactory( aggregatorName, dimensionSpec.getOutputName(), @@ -178,7 +178,7 @@ private static class BuiltinApproxCountDistinctSqlAggFunction extends SqlAggFunc } } - private boolean validateInputComplexTypeName(ColumnType columnType) + private boolean validateInputType(ColumnType columnType) { return Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName()); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 3997f948d15b..44e6f36710a5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -80,6 +80,7 @@ import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.hamcrest.CoreMatchers; +import org.junit.Assert; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -7335,17 +7336,17 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() @Test public void testApproxCountDistinctFunctionOnUnsupportedComplexColumn() { - try { - testQuery("SELECT APPROX_COUNT_DISTINCT(nester) FROM druid.nested", ImmutableList.of(), ImmutableList.of()); - Assertions.fail("query planning should fail"); - } - catch (DruidException e) { - Assertions.assertTrue( - e.getMessage().contains( - "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'" - ) - ); - } + DruidException druidException = Assert.assertThrows( + DruidException.class, + () -> testQuery( + "SELECT APPROX_COUNT_DISTINCT(nester) FROM druid.nested", + ImmutableList.of(), + ImmutableList.of() + ) + ); + Assert.assertTrue(druidException.getMessage().contains( + "Cannot apply 'APPROX_COUNT_DISTINCT' to arguments of type 'APPROX_COUNT_DISTINCT(>)'" + )); } @Test From 76ab0e39efbb67a0edcec6cbd9c98858bdc5c833 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Mon, 15 Jul 2024 14:36:16 +0530 Subject: [PATCH 17/20] Address review comments --- .../datasketches/hll/sql/HllSketchBaseSqlAggregator.java | 2 +- .../datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java | 2 +- .../builtin/BuiltinApproxCountDistinctSqlAggregator.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index 4cd8cd8232cf..f5035b506f19 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -116,7 +116,7 @@ public Aggregation toDruidAggregation( if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && validateInputType(type)) + .map(this::validateInputType) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 4433c6459362..9a079b8a477a 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -96,7 +96,7 @@ public Aggregation toDruidAggregation( if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && ( + .map(type -> ( SketchModule.THETA_SKETCH_TYPE.equals(type) || SketchModule.MERGE_TYPE.equals(type) || SketchModule.BUILD_TYPE.equals(type) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 3440920c3961..b8e77bbbbb34 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -96,7 +96,7 @@ public Aggregation toDruidAggregation( if (arg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(arg.getDirectColumn()) - .map(type -> type.is(ValueType.COMPLEX) && validateInputType(type)) + .map(this::validateInputType) .orElse(false)) { aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true); } else { From 0aceb728e019321f219b8ec5811ae24b8bcffd96 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 17 Jul 2024 17:51:06 +0530 Subject: [PATCH 18/20] Address review comments --- ...ketchApproxCountDistinctSqlAggregator.java | 2 +- .../hll/sql/HllSketchBaseSqlAggregator.java | 49 ++++++++-------- ...ketchApproxCountDistinctSqlAggregator.java | 2 +- .../sql/ThetaSketchBaseSqlAggregator.java | 56 +++++++++---------- .../hll/sql/HllSketchSqlAggregatorTest.java | 4 +- .../sql/ThetaSketchSqlAggregatorTest.java | 4 +- ...iltinApproxCountDistinctSqlAggregator.java | 35 ++++++------ .../calcite/CalciteNestedDataQueryTest.java | 4 +- 8 files changed, 74 insertions(+), 82 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java index 1644cb17a241..7d303d272740 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchApproxCountDistinctSqlAggregator.java @@ -61,7 +61,7 @@ public class HllSketchApproxCountDistinctSqlAggregator extends HllSketchBaseSqlA .operandTypeChecker( OperandTypes.or( // APPROX_COUNT_DISTINCT_DS_HLL(column) - OperandTypes.and(AGGREGATED_COLUMN_TYPE_CHECKER, OperandTypes.family(SqlTypeFamily.ANY)), + AGGREGATED_COLUMN_TYPE_CHECKER, // APPROX_COUNT_DISTINCT_DS_HLL(column, lgk) OperandTypes.and( OperandTypes.sequence( diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index f5035b506f19..00123f0ca4e4 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -110,13 +110,13 @@ public Aggregation toDruidAggregation( tgtHllType = HllSketchAggregatorFactory.DEFAULT_TGT_HLL_TYPE.name(); } - AggregatorFactory aggregatorFactory = null; + final AggregatorFactory aggregatorFactory; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (columnArg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(columnArg.getDirectColumn()) - .map(this::validateInputType) + .map(this::isValidComplexInputType) .orElse(false)) { aggregatorFactory = new HllSketchMergeAggregatorFactory( aggregatorName, @@ -155,21 +155,28 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if (validateInputType(inputType)) { - aggregatorFactory = new HllSketchMergeAggregatorFactory( - aggregatorName, - dimensionSpec.getOutputName(), - logK, - tgtHllType, - - // For HllSketchMergeAggregatorFactory, stringEncoding is only advisory to aid in detection of mismatched - // merges. It does not affect the results of the aggregator. At this point in the code, we do not know what - // the input encoding of the original sketches was, so we set it to the default. - HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, - finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - ROUND + if (!isValidComplexInputType(inputType)) { + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + columnArg.getDruidType(), + columnArg.getSimpleExtraction().getColumn() ); + return null; } + aggregatorFactory = new HllSketchMergeAggregatorFactory( + aggregatorName, + dimensionSpec.getOutputName(), + logK, + tgtHllType, + + // For HllSketchMergeAggregatorFactory, stringEncoding is only advisory to aid in detection of mismatched + // merges. It does not affect the results of the aggregator. At this point in the code, we do not know what + // the input encoding of the original sketches was, so we set it to the default. + HllSketchAggregatorFactory.DEFAULT_STRING_ENCODING, + finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), + ROUND + ); } else { aggregatorFactory = new HllSketchBuildAggregatorFactory( aggregatorName, @@ -183,16 +190,6 @@ public Aggregation toDruidAggregation( } } - if (aggregatorFactory == null) { - plannerContext.setPlanningError( - "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", - columnArg.getDruidType(), - columnArg.getSimpleExtraction().getColumn() - ); - return null; - } - return toAggregation( name, finalizeAggregations, @@ -206,7 +203,7 @@ protected abstract Aggregation toAggregation( AggregatorFactory aggregatorFactory ); - private boolean validateInputType(ColumnType columnType) + private boolean isValidComplexInputType(ColumnType columnType) { return HllSketchMergeAggregatorFactory.TYPE.equals(columnType) || HllSketchModule.TYPE_NAME.equals(columnType.getComplexTypeName()) || diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java index b48249da6695..5ecd289c7287 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchApproxCountDistinctSqlAggregator.java @@ -58,7 +58,7 @@ public class ThetaSketchApproxCountDistinctSqlAggregator extends ThetaSketchBase .operandTypeChecker( OperandTypes.or( // APPROX_COUNT_DISTINCT_DS_THETA(expr) - OperandTypes.and(AGGREGATED_COLUMN_TYPE_CHECKER, OperandTypes.family(SqlTypeFamily.ANY)), + AGGREGATED_COLUMN_TYPE_CHECKER, // APPROX_COUNT_DISTINCT_DS_THETA(expr, size) OperandTypes.and( OperandTypes.sequence( diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 9a079b8a477a..41754dc6d657 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -90,7 +90,7 @@ public Aggregation toDruidAggregation( sketchSize = SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE; } - AggregatorFactory aggregatorFactory = null; + final AggregatorFactory aggregatorFactory; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (columnArg.isDirectColumnAccess() @@ -121,38 +121,36 @@ public Aggregation toDruidAggregation( ); } - if (!inputType.is(ValueType.COMPLEX)) { - final DimensionSpec dimensionSpec; - - if (columnArg.isDirectColumnAccess()) { - dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType); - } else { - String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( - columnArg, - dataType - ); - dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType); - } - - aggregatorFactory = new SketchMergeAggregatorFactory( - aggregatorName, - dimensionSpec.getDimension(), - sketchSize, - finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), - null, - null + if (inputType.is(ValueType.COMPLEX)) { + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + columnArg.getDruidType(), + columnArg.getSimpleExtraction().getColumn() ); + return null; } - } - if (aggregatorFactory == null) { - plannerContext.setPlanningError( - "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", - columnArg.getDruidType(), - columnArg.getSimpleExtraction().getColumn() + final DimensionSpec dimensionSpec; + + if (columnArg.isDirectColumnAccess()) { + dimensionSpec = columnArg.getSimpleExtraction().toDimensionSpec(null, inputType); + } else { + String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( + columnArg, + dataType + ); + dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType); + } + + aggregatorFactory = new SketchMergeAggregatorFactory( + aggregatorName, + dimensionSpec.getDimension(), + sketchSize, + finalizeSketch || SketchQueryContext.isFinalizeOuterSketches(plannerContext), + null, + null ); - return null; } return toAggregation( diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 0af6007f71e3..769761e0dd0a 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -530,8 +530,8 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() assertQueryIsUnplannable( "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " - + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You " - + "can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]." + + " You can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" ); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 11218cf512a2..94049a3a65ae 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -391,8 +391,8 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() assertQueryIsUnplannable( "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " - + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You " - + "can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]." + + " You can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index b8e77bbbbb34..04826645689e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -90,13 +90,13 @@ public Aggregation toDruidAggregation( return null; } - AggregatorFactory aggregatorFactory = null; + final AggregatorFactory aggregatorFactory; final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name; if (arg.isDirectColumnAccess() && inputAccessor.getInputRowSignature() .getColumnType(arg.getDirectColumn()) - .map(this::validateInputType) + .map(this::isValidComplexInputType) .orElse(false)) { aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true); } else { @@ -120,14 +120,21 @@ public Aggregation toDruidAggregation( } if (inputType.is(ValueType.COMPLEX)) { - if (validateInputType(inputType)) { - aggregatorFactory = new HyperUniquesAggregatorFactory( - aggregatorName, - dimensionSpec.getOutputName(), - false, - true + if (!isValidComplexInputType(inputType)) { + plannerContext.setPlanningError( + "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" + + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + arg.getDruidType(), + arg.getSimpleExtraction().getColumn() ); + return null; } + aggregatorFactory = new HyperUniquesAggregatorFactory( + aggregatorName, + dimensionSpec.getOutputName(), + false, + true + ); } else { aggregatorFactory = new CardinalityAggregatorFactory( aggregatorName, @@ -139,16 +146,6 @@ public Aggregation toDruidAggregation( } } - if (aggregatorFactory == null) { - plannerContext.setPlanningError( - "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " %s column. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", - arg.getDruidType(), - arg.getSimpleExtraction().getColumn() - ); - return null; - } - return Aggregation.create( Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null @@ -178,7 +175,7 @@ private static class BuiltinApproxCountDistinctSqlAggFunction extends SqlAggFunc } } - private boolean validateInputType(ColumnType columnType) + private boolean isValidComplexInputType(ColumnType columnType) { return Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.TYPE.getComplexTypeName()) || Objects.equals(columnType.getComplexTypeName(), HyperUniquesAggregatorFactory.PRECOMPUTED_TYPE.getComplexTypeName()); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 44e6f36710a5..d56995cd90f4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -7328,8 +7328,8 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() assertQueryIsUnplannable( "SELECT COUNT(DISTINCT nester) FROM druid.nested", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " - + "approximation with COUNT(DISTINCT) is not supported for COMPLEX column. You can disable approximation," - + " use COUNT(DISTINCT nester) and rerun the query.]" + + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]. You can disable " + + "approximation, use COUNT(DISTINCT nester) and rerun the query.]" ); } From a0b630923bdd694b6d22736abe8b9b375cedd14a Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 17 Jul 2024 19:04:07 +0530 Subject: [PATCH 19/20] Address review comments --- .../datasketches/hll/sql/HllSketchBaseSqlAggregator.java | 5 +++-- .../datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java | 5 +++-- .../datasketches/hll/sql/HllSketchSqlAggregatorTest.java | 2 +- .../datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java | 2 +- .../builtin/BuiltinApproxCountDistinctSqlAggregator.java | 5 +++-- .../apache/druid/sql/calcite/CalciteNestedDataQueryTest.java | 4 ++-- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java index 00123f0ca4e4..15221c0f6f81 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchBaseSqlAggregator.java @@ -41,6 +41,7 @@ import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.planner.Calcites; +import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.InputAccessor; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; @@ -158,9 +159,9 @@ public Aggregation toDruidAggregation( if (!isValidComplexInputType(inputType)) { plannerContext.setPlanningError( "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + + " column type [%s]. You can disable approximation by setting [%s: false] in the query context.", columnArg.getDruidType(), - columnArg.getSimpleExtraction().getColumn() + PlannerConfig.CTX_KEY_USE_APPROXIMATE_COUNT_DISTINCT ); return null; } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java index 41754dc6d657..1f45f31496ab 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchBaseSqlAggregator.java @@ -39,6 +39,7 @@ import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.planner.Calcites; +import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.InputAccessor; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; @@ -124,9 +125,9 @@ public Aggregation toDruidAggregation( if (inputType.is(ValueType.COMPLEX)) { plannerContext.setPlanningError( "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + + " column type [%s]. You can disable approximation by setting [%s: false] in the query context.", columnArg.getDruidType(), - columnArg.getSimpleExtraction().getColumn() + PlannerConfig.CTX_KEY_USE_APPROXIMATE_COUNT_DISTINCT ); return null; } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java index 769761e0dd0a..edb7dc5a11f4 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java @@ -531,7 +531,7 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]." - + " You can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + + " You can disable approximation by setting [useApproximateCountDistinct: false] in the query context." ); } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java index 94049a3a65ae..7afd2710ccd0 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java @@ -392,7 +392,7 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() "SELECT COUNT(distinct double_first_added) FROM druid.wikipedia_first_last", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]." - + " You can disable approximation, use COUNT(DISTINCT double_first_added) and rerun the query.]" + + " You can disable approximation by setting [useApproximateCountDistinct: false] in the query context." ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java index 04826645689e..c756aa64cc31 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/BuiltinApproxCountDistinctSqlAggregator.java @@ -46,6 +46,7 @@ import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.planner.Calcites; +import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.InputAccessor; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; @@ -123,9 +124,9 @@ public Aggregation toDruidAggregation( if (!isValidComplexInputType(inputType)) { plannerContext.setPlanningError( "Using APPROX_COUNT_DISTINCT() or enabling approximation with COUNT(DISTINCT) is not supported for" - + " column type [%s]. You can disable approximation, use COUNT(DISTINCT %s) and rerun the query.", + + " column type [%s]. You can disable approximation by setting [%s: false] in the query context.", arg.getDruidType(), - arg.getSimpleExtraction().getColumn() + PlannerConfig.CTX_KEY_USE_APPROXIMATE_COUNT_DISTINCT ); return null; } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index d56995cd90f4..2731f0079880 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -7328,8 +7328,8 @@ public void testApproxCountDistinctOnUnsupportedComplexColumn() assertQueryIsUnplannable( "SELECT COUNT(DISTINCT nester) FROM druid.nested", "Query could not be planned. A possible reason is [Using APPROX_COUNT_DISTINCT() or enabling " - + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]. You can disable " - + "approximation, use COUNT(DISTINCT nester) and rerun the query.]" + + "approximation with COUNT(DISTINCT) is not supported for column type [COMPLEX]. " + + "You can disable approximation by setting [useApproximateCountDistinct: false] in the query context." ); } From 18e0de8725c754d0a206a611ee9ae8f80e22271f Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 18 Jul 2024 00:01:15 +0530 Subject: [PATCH 20/20] Trigger Build