From f0de4b94e698781fbf27062eb13d5b318790c509 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Wed, 13 Mar 2024 07:18:14 +0530 Subject: [PATCH 1/4] mv_filter_only to allow non literal expr --- .../apache/druid/math/expr/ApplyFunction.java | 38 ++++++++++ .../org/apache/druid/math/expr/Parser.java | 1 + .../MultiValueStringOperatorConversions.java | 30 ++++---- .../CalciteMultiValueStringQueryTest.java | 70 +++++++++++++++++++ 4 files changed, 122 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 6403f2b28c0c..1b2c8e80b2a5 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -476,6 +476,44 @@ private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding b } } + /** + * Extended version of {@link FilterFunction} to return a null expr if filtered result turns out to be empty + */ + class FilterOnlyFunction extends FilterFunction + { + static final String NAME = "filter_only"; + + @Override + public String name() + { + return NAME; + } + + @Override + public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) + { + Expr arrayExpr = argsExpr.get(0); + ExprEval arrayEval = arrayExpr.eval(bindings); + + Object[] array = arrayEval.asArray(); + if (array == null) { + return ExprEval.of(null); + } + + SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(arrayEval.elementType(), lambdaExpr, bindings); + Object[] filtered = filter(arrayEval.asArray(), lambdaExpr, lambdaBinding).toArray(); + if (filtered.length == 0) { + return ExprEval.of(null); + } + return ExprEval.ofArray(arrayEval.asArrayType(), filtered); + } + + private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding) + { + return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean()); + } + } + /** * Base class for family of {@link ApplyFunction} which evaluate elements elements of a single array input against * a {@link LambdaExpr} to evaluate to a final 'truthy' value diff --git a/processing/src/main/java/org/apache/druid/math/expr/Parser.java b/processing/src/main/java/org/apache/druid/math/expr/Parser.java index e92dd130b7ce..bbc0be908be9 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Parser.java @@ -556,6 +556,7 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List literals = Sets.newHashSetWithExpectedSize(lit.length); - for (Object o : lit) { - literals.add(Evals.asString(o)); - } - final DruidExpression.ExpressionGenerator builder = (args) -> { final StringBuilder expressionBuilder; if (isAllowList()) { - expressionBuilder = new StringBuilder("filter((x) -> array_contains("); + expressionBuilder = new StringBuilder("filter_only((x) -> array_contains("); } else { - expressionBuilder = new StringBuilder("filter((x) -> !array_contains("); + expressionBuilder = new StringBuilder("filter_only((x) -> !array_contains("); } expressionBuilder.append(args.get(1).getExpression()) @@ -370,7 +356,17 @@ public DruidExpression toDruidExpression( return expressionBuilder.toString(); }; - if (druidExpressions.get(0).isSimpleExtraction()) { + Expr expr = plannerContext.parseExpression(druidExpressions.get(1).getExpression()); + if (druidExpressions.get(0).isSimpleExtraction() && expr.isLiteral()) { + Object[] lit = expr.eval(InputBindings.nilBindings()).asArray(); + if (lit == null || lit.length == 0) { + return null; + } + HashSet literals = Sets.newHashSetWithExpectedSize(lit.length); + for (Object o : lit) { + literals.add(Evals.asString(o)); + } + DruidExpression druidExpression = DruidExpression.ofVirtualColumn( Calcites.getColumnTypeForRelDataType(rexNode.getType()), builder, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index a93a02a543d2..6a9882a735aa 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -1333,6 +1333,41 @@ public void testMultiValueListFilter() ); } + @Test + public void testMultiValueListFilterNonLiteral() + { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + + testQuery( + "SELECT MV_FILTER_ONLY(dim3, ARRAY[dim2]) FROM druid.numfoo", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new ExpressionVirtualColumn( + "v0", + "filter_only((x) -> array_contains(array(\"dim2\"), x), \"dim3\")", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .columns("v0") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{NullHandling.defaultStringValue()} + ) + ); + } + @Test public void testMultiValueListFilterDeny() { @@ -1388,6 +1423,41 @@ public void testMultiValueListFilterDeny() ); } + @Test + public void testMultiValueListFilterDenyNonLiteral() + { + // Cannot vectorize due to usage of expressions. + cannotVectorize(); + + testQuery( + "SELECT MV_FILTER_NONE(dim3, ARRAY[dim2]) FROM druid.numfoo", + ImmutableList.of( + newScanQueryBuilder() + .dataSource(CalciteTests.DATASOURCE3) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new ExpressionVirtualColumn( + "v0", + "filter_only((x) -> !array_contains(array(\"dim2\"), x), \"dim3\")", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ) + ) + .columns("v0") + .context(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"[\"b\",\"c\"]"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{NullHandling.defaultStringValue()}, + new Object[]{NullHandling.defaultStringValue()} + ) + ); + } + @Test public void testMultiValueListFilterComposed() { From 59ced9754c77c1ba2bfd38a031e100dc56b9cc43 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Thu, 14 Mar 2024 08:12:42 +0530 Subject: [PATCH 2/4] update existing filter function to return null --- .../apache/druid/math/expr/ApplyFunction.java | 44 +++---------------- .../org/apache/druid/math/expr/Parser.java | 1 - .../MultiValueStringOperatorConversions.java | 4 +- .../CalciteMultiValueStringQueryTest.java | 4 +- 4 files changed, 9 insertions(+), 44 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java b/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java index 1b2c8e80b2a5..e2b1537fb0ab 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ApplyFunction.java @@ -442,11 +442,15 @@ public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBin Object[] array = arrayEval.asArray(); if (array == null) { - return ExprEval.of(null); + return ExprEval.ofArray(arrayEval.asArrayType(), null); } SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(arrayEval.elementType(), lambdaExpr, bindings); Object[] filtered = filter(arrayEval.asArray(), lambdaExpr, lambdaBinding).toArray(); + // return null array expr if nothing is left in filtered + if (filtered.length == 0) { + return ExprEval.ofArray(arrayEval.asArrayType(), null); + } return ExprEval.ofArray(arrayEval.asArrayType(), filtered); } @@ -476,44 +480,6 @@ private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding b } } - /** - * Extended version of {@link FilterFunction} to return a null expr if filtered result turns out to be empty - */ - class FilterOnlyFunction extends FilterFunction - { - static final String NAME = "filter_only"; - - @Override - public String name() - { - return NAME; - } - - @Override - public ExprEval apply(LambdaExpr lambdaExpr, List argsExpr, Expr.ObjectBinding bindings) - { - Expr arrayExpr = argsExpr.get(0); - ExprEval arrayEval = arrayExpr.eval(bindings); - - Object[] array = arrayEval.asArray(); - if (array == null) { - return ExprEval.of(null); - } - - SettableLambdaBinding lambdaBinding = new SettableLambdaBinding(arrayEval.elementType(), lambdaExpr, bindings); - Object[] filtered = filter(arrayEval.asArray(), lambdaExpr, lambdaBinding).toArray(); - if (filtered.length == 0) { - return ExprEval.of(null); - } - return ExprEval.ofArray(arrayEval.asArrayType(), filtered); - } - - private Stream filter(T[] array, LambdaExpr expr, SettableLambdaBinding binding) - { - return Arrays.stream(array).filter(s -> expr.eval(binding.withBinding(expr.getIdentifier(), s)).asBoolean()); - } - } - /** * Base class for family of {@link ApplyFunction} which evaluate elements elements of a single array input against * a {@link LambdaExpr} to evaluate to a final 'truthy' value diff --git a/processing/src/main/java/org/apache/druid/math/expr/Parser.java b/processing/src/main/java/org/apache/druid/math/expr/Parser.java index bbc0be908be9..e92dd130b7ce 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/Parser.java +++ b/processing/src/main/java/org/apache/druid/math/expr/Parser.java @@ -556,7 +556,6 @@ private static ApplyFunctionExpr liftApplyLambda(ApplyFunctionExpr expr, List { final StringBuilder expressionBuilder; if (isAllowList()) { - expressionBuilder = new StringBuilder("filter_only((x) -> array_contains("); + expressionBuilder = new StringBuilder("filter((x) -> array_contains("); } else { - expressionBuilder = new StringBuilder("filter_only((x) -> !array_contains("); + expressionBuilder = new StringBuilder("filter((x) -> !array_contains("); } expressionBuilder.append(args.get(1).getExpression()) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java index 6a9882a735aa..4a7250c09284 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteMultiValueStringQueryTest.java @@ -1348,7 +1348,7 @@ public void testMultiValueListFilterNonLiteral() .virtualColumns( new ExpressionVirtualColumn( "v0", - "filter_only((x) -> array_contains(array(\"dim2\"), x), \"dim3\")", + "filter((x) -> array_contains(array(\"dim2\"), x), \"dim3\")", ColumnType.STRING, TestExprMacroTable.INSTANCE ) @@ -1438,7 +1438,7 @@ public void testMultiValueListFilterDenyNonLiteral() .virtualColumns( new ExpressionVirtualColumn( "v0", - "filter_only((x) -> !array_contains(array(\"dim2\"), x), \"dim3\")", + "filter((x) -> !array_contains(array(\"dim2\"), x), \"dim3\")", ColumnType.STRING, TestExprMacroTable.INSTANCE ) From 47ae2e508bd49305ed7849c4008bae0d44304be1 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Wed, 20 Mar 2024 16:17:32 +0530 Subject: [PATCH 3/4] Trigger Build From e113f5dce0d52201f3bbcfcb81add3765f8b94c8 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Wed, 20 Mar 2024 19:38:27 +0530 Subject: [PATCH 4/4] coverage --- .../java/org/apache/druid/math/expr/ApplyFunctionTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java b/processing/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java index 272d219bd5b7..2eb0aadf0bd8 100644 --- a/processing/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java +++ b/processing/src/test/java/org/apache/druid/math/expr/ApplyFunctionTest.java @@ -93,6 +93,10 @@ public void testFilter() assertExpr("filter((x) -> x > 2, [1, 2, 3, 4, 5])", new Long[] {3L, 4L, 5L}); assertExpr("filter((x) -> x > 2, b)", new Long[] {3L, 4L, 5L}); + + String dummyNull = null; + assertExpr("filter((x) -> array_contains([], x), ['a', 'b'])", dummyNull); + assertExpr("filter((x) -> array_contains([], x), null)", dummyNull); } @Test