diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index b81c6c869b..1ed805513b 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -60,8 +60,6 @@ the [Comet Supported Expressions Guide](expressions.md) for more information on ### Array Expressions -- **ArrayRemove**: Returns null when the element to remove is null, instead of removing null elements from the array. - [#3173](https://github.com/apache/datafusion-comet/issues/3173) - **ArraysOverlap**: Inconsistent behavior when arrays contain NULL values. [#3645](https://github.com/apache/datafusion-comet/issues/3645), [#2036](https://github.com/apache/datafusion-comet/issues/2036) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index e269ae10c0..16c0823475 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -243,7 +243,7 @@ Comet supports using the following aggregate functions within window contexts wi | ArrayJoin | No | | | ArrayMax | Yes | | | ArrayMin | Yes | | -| ArrayRemove | No | Returns null when element is null instead of removing null elements ([#3173](https://github.com/apache/datafusion-comet/issues/3173)) | +| ArrayRemove | Yes | | | ArrayRepeat | No | | | ArrayUnion | No | Behaves differently than spark. Comet sorts the input arrays before performing the union, while Spark preserves the order of the first array and appends unique elements from the second. | | ArraysOverlap | No | | diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index ccfb4f4bf8..f107d5b309 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -35,12 +35,6 @@ object CometArrayRemove with CometExprShim with ArraysBase { - override def getSupportLevel(expr: ArrayRemove): SupportLevel = - Incompatible( - Some( - "Returns null when element is null instead of removing null elements" + - " (https://github.com/apache/datafusion-comet/issues/3173)")) - override def convert( expr: ArrayRemove, inputs: Seq[Attribute], @@ -55,34 +49,7 @@ object CometArrayRemove val arrayExprProto = exprToProto(expr.left, inputs, binding) val keyExprProto = exprToProto(expr.right, inputs, binding) - val arrayRemoveScalarExpr = - scalarFunctionExprToProto("array_remove_all", arrayExprProto, keyExprProto) - - val isNotNullExpr = createUnaryExpr( - expr, - expr.right, - inputs, - binding, - (builder, unaryExpr) => builder.setIsNotNull(unaryExpr)) - - val nullLiteralProto = exprToProto(Literal(null, expr.right.dataType), Seq.empty) - - if (arrayRemoveScalarExpr.isDefined && isNotNullExpr.isDefined && nullLiteralProto.isDefined) { - val caseWhenExpr = ExprOuterClass.CaseWhen - .newBuilder() - .addWhen(isNotNullExpr.get) - .addThen(arrayRemoveScalarExpr.get) - .setElseExpr(nullLiteralProto.get) - .build() - Some( - ExprOuterClass.Expr - .newBuilder() - .setCaseWhen(caseWhenExpr) - .build()) - } else { - withInfo(expr, expr.children: _*) - None - } + scalarFunctionExprToProto("array_remove_all", arrayExprProto, keyExprProto) } } diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_remove.sql b/spark/src/test/resources/sql-tests/expressions/array/array_remove.sql index aead1fa44d..a5b20c756f 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_remove.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_remove.sql @@ -24,15 +24,15 @@ CREATE TABLE test_array_remove(arr array, val int) USING parquet statement INSERT INTO test_array_remove VALUES (array(1, 2, 3, 2), 2), (array(1, 2, 3), 4), (array(), 1), (NULL, 1), (array(1, NULL, 3), NULL) -query spark_answer_only +query SELECT array_remove(arr, val) FROM test_array_remove -- column + literal -query spark_answer_only +query SELECT array_remove(arr, 2) FROM test_array_remove -- literal + column -query spark_answer_only +query SELECT array_remove(array(1, 2, 3, 2), val) FROM test_array_remove -- literal + literal diff --git a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala index fb5531a573..bb519492db 100644 --- a/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometArrayExpressionSuite.scala @@ -137,7 +137,7 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp sql("SELECT array(struct(_1, _2)) as a, struct(_1, _2) as b FROM t1") .createOrReplaceTempView("t2") val expectedFallbackReason = - "is not fully compatible with Spark" + "data type not supported" checkSparkAnswerAndFallbackReason( sql("SELECT array_remove(a, b) FROM t2"), expectedFallbackReason)