From 79dc1f46b4f74115bd32ed3a2bc55c357c55ce6c Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Wed, 17 Aug 2022 13:47:16 -0700 Subject: [PATCH 01/13] scalar functionality --- datafusion/common/src/scalar.rs | 9 ++ .../core/src/datasource/listing/helpers.rs | 1 + datafusion/core/src/physical_plan/planner.rs | 4 + datafusion/expr/src/expr.rs | 14 ++ datafusion/expr/src/expr_rewriter.rs | 1 + datafusion/expr/src/expr_schema.rs | 3 +- datafusion/expr/src/expr_visitor.rs | 1 + datafusion/expr/src/utils.rs | 1 + .../optimizer/src/common_subexpr_eliminate.rs | 3 + .../optimizer/src/simplify_expressions.rs | 1 + .../physical-expr/src/expressions/is_true.rs | 140 ++++++++++++++++++ .../physical-expr/src/expressions/mod.rs | 2 + datafusion/physical-expr/src/planner.rs | 6 + datafusion/sql/src/planner.rs | 4 + datafusion/sql/src/utils.rs | 3 + 15 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 datafusion/physical-expr/src/expressions/is_true.rs diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index dff97d2f981f..b4cab2dfd83d 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -726,6 +726,15 @@ impl ScalarValue { } } + /// whether this value is true or not. + pub fn is_true(&self) -> bool { + println!("{:?}", &self); + match self { + ScalarValue::Boolean(v) => v.eq(&Some(true)), + _ => false, + } + } + /// whether this value is null or not. pub fn is_null(&self) -> bool { match self { diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 6c018eda3e76..2ac1d0c6908b 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -82,6 +82,7 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::Alias(_, _) | Expr::ScalarVariable(_, _) | Expr::Not(_) + | Expr::IsTrue(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 46e09ff27445..941e3a4ebb30 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -148,6 +148,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NULL", expr)) } + Expr::IsTrue(expr) => { + let expr = create_physical_name(expr, false)?; + Ok(format!("{} IS TRUE", expr)) + } Expr::IsNotNull(expr) => { let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NOT NULL", expr)) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index ba6f7a96c29d..390921b94d5d 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -102,6 +102,8 @@ pub enum Expr { }, /// Negation of an expression. The expression's type must be a boolean to make sense. Not(Box), + /// Whether an expression is true. This expression is never null. + IsTrue(Box), /// Whether an expression is not Null. This expression is never null. IsNotNull(Box), /// Whether an expression is Null. This expression is never null. @@ -333,6 +335,7 @@ impl Expr { Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", Expr::InSubquery { .. } => "InSubquery", + Expr::IsTrue(..) => "IsTrue", Expr::IsNotNull(..) => "IsNotNull", Expr::IsNull(..) => "IsNull", Expr::Literal(..) => "Literal", @@ -433,6 +436,12 @@ impl Expr { Expr::IsNull(Box::new(self)) } + /// Return `IsTrue(Box(self)) + #[allow(clippy::wrong_self_convention)] + pub fn is_true(self) -> Expr { + Expr::IsTrue(Box::new(self)) + } + /// Return `IsNotNull(Box(self)) #[allow(clippy::wrong_self_convention)] pub fn is_not_null(self) -> Expr { @@ -547,6 +556,7 @@ impl fmt::Debug for Expr { Expr::Not(expr) => write!(f, "NOT {:?}", expr), Expr::Negative(expr) => write!(f, "(- {:?})", expr), Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr), + Expr::IsTrue(expr) => write!(f, "{:?} IS TRUE", expr), Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr), Expr::Exists { subquery, @@ -795,6 +805,10 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { let expr = create_name(expr, input_schema)?; Ok(format!("{} IS NULL", expr)) } + Expr::IsTrue(expr) => { + let expr = create_name(expr, input_schema)?; + Ok(format!("{} IS TRUE", expr)) + } Expr::IsNotNull(expr) => { let expr = create_name(expr, input_schema)?; Ok(format!("{} IS NOT NULL", expr)) diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index e8cf049dde6f..9cee88714385 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -128,6 +128,7 @@ impl ExprRewritable for Expr { right: rewrite_boxed(right, rewriter)?, }, Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?), + Expr::IsTrue(expr) => Expr::IsTrue(rewrite_boxed(expr, rewriter)?), Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?), Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?), Expr::Negative(expr) => Expr::Negative(rewrite_boxed(expr, rewriter)?), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index bbb414655c26..a5d8b74410e7 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -104,6 +104,7 @@ impl ExprSchemable for Expr { | Expr::InSubquery { .. } | Expr::Between { .. } | Expr::InList { .. } + | Expr::IsTrue(_) | Expr::IsNotNull(_) => Ok(DataType::Boolean), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).data_type().clone()) @@ -183,7 +184,7 @@ impl ExprSchemable for Expr { | Expr::WindowFunction { .. } | Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } => Ok(true), - Expr::IsNull(_) | Expr::IsNotNull(_) | Expr::Exists { .. } => Ok(false), + Expr::IsNull(_) | Expr::IsTrue(_) | Expr::IsNotNull(_) | Expr::Exists { .. } => Ok(false), Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index 162db60a03c9..d9a0af3f8b30 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -96,6 +96,7 @@ impl ExprVisitable for Expr { let visitor = match self { Expr::Alias(expr, _) | Expr::Not(expr) + | Expr::IsTrue(expr) | Expr::IsNotNull(expr) | Expr::IsNull(expr) | Expr::Negative(expr) diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 25ca8d6e36d6..a8ebe45f9bd7 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -80,6 +80,7 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { | Expr::Literal(_) | Expr::BinaryExpr { .. } | Expr::Not(_) + | Expr::IsTrue(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 8627b404dce8..007b1eb2bf47 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -408,6 +408,9 @@ impl ExprIdentifierVisitor<'_> { Expr::Not(_) => { desc.push_str("Not-"); } + Expr::IsTrue(_) => { + desc.push_str("IsTrue-"); + } Expr::IsNotNull(_) => { desc.push_str("IsNotNull-"); } diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 8bb829024f39..60cb83a03398 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -515,6 +515,7 @@ impl<'a> ConstEvaluator<'a> { Expr::Literal(_) | Expr::BinaryExpr { .. } | Expr::Not(_) + | Expr::IsTrue(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs new file mode 100644 index 000000000000..4184732acd49 --- /dev/null +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! IS TRUE expression + +use std::{any::Any, sync::Arc}; + +use crate::PhysicalExpr; +use arrow::compute; // ? +use arrow::{ + array::BooleanArray, // ? + datatypes::{DataType, Schema}, + record_batch::RecordBatch, +}; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; + +/// IS TRUE expression +#[derive(Debug)] +pub struct IsTrueExpr { + /// The input expression + arg: Arc, +} + +impl IsTrueExpr { + /// Create new not expression + pub fn new(arg: Arc) -> Self { + Self { arg } + } + + /// Get the input expression + pub fn arg(&self) -> &Arc { + &self.arg + } +} + +impl std::fmt::Display for IsTrueExpr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} IS TRUE", self.arg) + } +} + +impl PhysicalExpr for IsTrueExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result { + Ok(DataType::Boolean) + } + + fn nullable(&self, _input_schema: &Schema) -> Result { + Ok(false) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result { + let arg = self.arg.evaluate(batch)?; + println!("{:?}", arg); + match arg { + /* + ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new( + compute::is_not_null(array.as_ref())?, + ))), + */ + ColumnarValue::Array(array) => { + println!("{:?}", array); + println!("{}", array.len()); + // let array_slice = array.slice(1,1); + // let my_slice: &dyn arrow::array::Array = &*array_slice; + // let my_array = ColumnarValue::Array(array).into_array(3); + // let my_data = my_array.data(); + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))) // TODO + // Ok(ColumnarValue::Array(Arc::new(compute::is_not_null(array.as_ref())?))) + } + ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(scalar.is_true())), + )), + } + } +} + +//pub fn eq_true(arr: &BooleanArray) -> Result { +// +//} + +/// Create an IS TRUE expression +pub fn is_true(arg: Arc) -> Result> { + Ok(Arc::new(IsTrueExpr::new(arg))) +} + +/* +#[cfg(test)] +mod tests { + use super::*; + use crate::expressions::col; + use arrow::{ + array::{BooleanArray, StringArray}, + datatypes::*, + record_batch::RecordBatch, + }; + use std::sync::Arc; + + #[test] + fn is_not_null_op() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]); + let a = StringArray::from(vec![Some("foo"), None]); + let expr = is_not_null(col("a", &schema)?).unwrap(); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?; + + // expression: "a is not null" + let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); + let result = result + .as_any() + .downcast_ref::() + .expect("failed to downcast to BooleanArray"); + + let expected = &BooleanArray::from(vec![true, false]); + + assert_eq!(expected, result); + + Ok(()) + } +} +*/ diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index 00bf6aafade4..cb3b57694666 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -27,6 +27,7 @@ mod delta; mod get_indexed_field; mod in_list; mod is_not_null; +mod is_true; mod is_null; mod literal; mod negative; @@ -76,6 +77,7 @@ pub use column::{col, Column}; pub use datetime::DateTimeIntervalExpr; pub use get_indexed_field::GetIndexedFieldExpr; pub use in_list::{in_list, InListExpr}; +pub use is_true::{is_true, IsTrueExpr}; pub use is_not_null::{is_not_null, IsNotNullExpr}; pub use is_null::{is_null, IsNullExpr}; pub use literal::{lit, Literal}; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 44fe0595ac58..03801e09692c 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -195,6 +195,12 @@ pub fn create_physical_expr( input_schema, execution_props, )?), + Expr::IsTrue(expr) => expressions::is_true(create_physical_expr( + expr, + input_dfschema, + input_schema, + execution_props, + )?), Expr::IsNotNull(expr) => expressions::is_not_null(create_physical_expr( expr, input_dfschema, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 28c82f80246f..8c71bb81bb68 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1848,6 +1848,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.sql_expr_to_logical_expr(*expr, schema, ctes)?, ))), + SQLExpr::IsTrue(expr) => Ok(Expr::IsTrue(Box::new( + self.sql_expr_to_logical_expr(*expr, schema, ctes)?, + ))), + SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new( self.sql_expr_to_logical_expr(*expr, schema, ctes)?, ))), diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 81ea34de187b..1e94bab0ece8 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -280,6 +280,9 @@ where nested_expr, replacement_fn, )?))), + Expr::IsTrue(nested_expr) => Ok(Expr::IsTrue(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), Expr::IsNotNull(nested_expr) => Ok(Expr::IsNotNull(Box::new( clone_with_replacement(nested_expr, replacement_fn)?, ))), From 4ced79f86135a4914cf68c58c288527c79760809 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 10:54:08 -0700 Subject: [PATCH 02/13] IsTrue full functionality --- datafusion/common/src/scalar.rs | 4 +- .../physical-expr/src/expressions/is_true.rs | 75 +++++-------------- datafusion/proto/proto/datafusion.proto | 7 ++ datafusion/proto/src/from_proto.rs | 3 + datafusion/proto/src/to_proto.rs | 8 ++ 5 files changed, 40 insertions(+), 57 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index b4cab2dfd83d..b7d5a1ee791b 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -728,10 +728,10 @@ impl ScalarValue { /// whether this value is true or not. pub fn is_true(&self) -> bool { - println!("{:?}", &self); match self { ScalarValue::Boolean(v) => v.eq(&Some(true)), - _ => false, + ScalarValue::Null => false, + other => panic!("Cannot apply 'IS TRUE' to arguments of type '{} IS TRUE'. Supported form(s): ' IS TRUE'", other) } } diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index 4184732acd49..cfd3e21e1a70 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -20,9 +20,8 @@ use std::{any::Any, sync::Arc}; use crate::PhysicalExpr; -use arrow::compute; // ? use arrow::{ - array::BooleanArray, // ? + array::{Array, BooleanArray}, datatypes::{DataType, Schema}, record_batch::RecordBatch, }; @@ -71,22 +70,27 @@ impl PhysicalExpr for IsTrueExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { let arg = self.arg.evaluate(batch)?; - println!("{:?}", arg); match arg { - /* - ColumnarValue::Array(array) => Ok(ColumnarValue::Array(Arc::new( - compute::is_not_null(array.as_ref())?, - ))), - */ ColumnarValue::Array(array) => { - println!("{:?}", array); - println!("{}", array.len()); - // let array_slice = array.slice(1,1); - // let my_slice: &dyn arrow::array::Array = &*array_slice; - // let my_array = ColumnarValue::Array(array).into_array(3); - // let my_data = my_array.data(); - Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))) // TODO - // Ok(ColumnarValue::Array(Arc::new(compute::is_not_null(array.as_ref())?))) + let array_len = array.len(); + let my_array = ColumnarValue::Array(array).into_array(array_len); + let true_array = BooleanArray::from(vec![Some(true)]); + let false_array = BooleanArray::from(vec![Some(false)]); + let null_array = BooleanArray::from(vec![None]); + let mut result_vec = vec![]; + for i in 0..array_len { + let current = (*my_array).slice(i,1); + if (*current).eq(&true_array) { + result_vec.push(Some(true)); + } else if (*current).eq(&false_array) || (*current).eq(&null_array) { + result_vec.push(Some(false)); + } else { + panic!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): ' IS TRUE'", current.data_type()) + } + } + + let return_array = BooleanArray::from(result_vec); + Ok(ColumnarValue::Array(Arc::new(return_array))) } ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( ScalarValue::Boolean(Some(scalar.is_true())), @@ -95,46 +99,7 @@ impl PhysicalExpr for IsTrueExpr { } } -//pub fn eq_true(arr: &BooleanArray) -> Result { -// -//} - /// Create an IS TRUE expression pub fn is_true(arg: Arc) -> Result> { Ok(Arc::new(IsTrueExpr::new(arg))) } - -/* -#[cfg(test)] -mod tests { - use super::*; - use crate::expressions::col; - use arrow::{ - array::{BooleanArray, StringArray}, - datatypes::*, - record_batch::RecordBatch, - }; - use std::sync::Arc; - - #[test] - fn is_not_null_op() -> Result<()> { - let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]); - let a = StringArray::from(vec![Some("foo"), None]); - let expr = is_not_null(col("a", &schema)?).unwrap(); - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?; - - // expression: "a is not null" - let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); - let result = result - .as_any() - .downcast_ref::() - .expect("failed to downcast to BooleanArray"); - - let expected = &BooleanArray::from(vec![true, false]); - - assert_eq!(expected, result); - - Ok(()) - } -} -*/ diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 7b08e4f40456..82f4f9741d74 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -312,6 +312,9 @@ message LogicalExprNode { CubeNode cube = 23; RollupNode rollup = 24; + + // boolean operations + IsTrue is_true = 25; } } @@ -346,6 +349,10 @@ message IsNotNull { LogicalExprNode expr = 1; } +message IsTrue { + LogicalExprNode expr = 1; +} + message Not { LogicalExprNode expr = 1; } diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 524b03bd6933..8836726a4289 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -905,6 +905,9 @@ pub fn parse_expr( ExprType::IsNotNullExpr(is_not_null) => Ok(Expr::IsNotNull(Box::new( parse_required_expr(&is_not_null.expr, registry, "expr")?, ))), + ExprType::IsTrueExpr(is_true) => Ok(Expr::IsTrue(Box::new( + parse_required_expr(&is_true.expr, registry, "expr")?, + ))), ExprType::NotExpr(not) => Ok(Expr::Not(Box::new(parse_required_expr( ¬.expr, registry, "expr", )?))), diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 045b97a3188d..16778f46b055 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -616,6 +616,14 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::IsNotNullExpr(expr)), } } + Expr::IsTrue(expr) => { + let expr = Box::new(protobuf::IsTrue { + expr: Some(Box::new(expr.as_ref().try_into()?)), + }); + Self { + expr_type: Some(ExprType::IsTrueExpr(expr)), + } + } Expr::Between { expr, negated, From 71c0001686fee28a47b4ab2a97053765ba59b254 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 12:33:42 -0700 Subject: [PATCH 03/13] IS FALSE functionality --- datafusion/common/src/scalar.rs | 9 ++ .../core/src/datasource/listing/helpers.rs | 1 + datafusion/core/src/physical_plan/planner.rs | 4 + datafusion/expr/src/expr.rs | 14 +++ datafusion/expr/src/expr_rewriter.rs | 1 + datafusion/expr/src/expr_schema.rs | 3 +- datafusion/expr/src/expr_visitor.rs | 1 + datafusion/expr/src/utils.rs | 1 + .../optimizer/src/common_subexpr_eliminate.rs | 3 + .../optimizer/src/simplify_expressions.rs | 1 + .../physical-expr/src/expressions/is_false.rs | 105 ++++++++++++++++++ .../physical-expr/src/expressions/mod.rs | 2 + datafusion/physical-expr/src/planner.rs | 6 + datafusion/proto/proto/datafusion.proto | 5 + datafusion/proto/src/from_proto.rs | 3 + datafusion/proto/src/to_proto.rs | 8 ++ datafusion/sql/src/planner.rs | 4 + datafusion/sql/src/utils.rs | 3 + 18 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 datafusion/physical-expr/src/expressions/is_false.rs diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index b7d5a1ee791b..44ed03849cf1 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -735,6 +735,15 @@ impl ScalarValue { } } + /// whether this value is false or not. + pub fn is_false(&self) -> bool { + match self { + ScalarValue::Boolean(v) => v.eq(&Some(false)), + ScalarValue::Null => false, + other => panic!("Cannot apply 'IS FALSE' to arguments of type '{} IS FALSE'. Supported form(s): ' IS FALSE'", other) + } + } + /// whether this value is null or not. pub fn is_null(&self) -> bool { match self { diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 2ac1d0c6908b..7f36e2a5dd84 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -83,6 +83,7 @@ impl ExpressionVisitor for ApplicabilityVisitor<'_> { | Expr::ScalarVariable(_, _) | Expr::Not(_) | Expr::IsTrue(_) + | Expr::IsFalse(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 941e3a4ebb30..9431094ad092 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -152,6 +152,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { let expr = create_physical_name(expr, false)?; Ok(format!("{} IS TRUE", expr)) } + Expr::IsFalse(expr) => { + let expr = create_physical_name(expr, false)?; + Ok(format!("{} IS FALSE", expr)) + } Expr::IsNotNull(expr) => { let expr = create_physical_name(expr, false)?; Ok(format!("{} IS NOT NULL", expr)) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 390921b94d5d..48990b2c2c3e 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -104,6 +104,8 @@ pub enum Expr { Not(Box), /// Whether an expression is true. This expression is never null. IsTrue(Box), + /// Whether an expression is false. This expression is never null. + IsFalse(Box), /// Whether an expression is not Null. This expression is never null. IsNotNull(Box), /// Whether an expression is Null. This expression is never null. @@ -336,6 +338,7 @@ impl Expr { Expr::InList { .. } => "InList", Expr::InSubquery { .. } => "InSubquery", Expr::IsTrue(..) => "IsTrue", + Expr::IsFalse(..) => "IsFalse", Expr::IsNotNull(..) => "IsNotNull", Expr::IsNull(..) => "IsNull", Expr::Literal(..) => "Literal", @@ -442,6 +445,12 @@ impl Expr { Expr::IsTrue(Box::new(self)) } + /// Return `IsFalse(Box(self)) + #[allow(clippy::wrong_self_convention)] + pub fn is_false(self) -> Expr { + Expr::IsFalse(Box::new(self)) + } + /// Return `IsNotNull(Box(self)) #[allow(clippy::wrong_self_convention)] pub fn is_not_null(self) -> Expr { @@ -557,6 +566,7 @@ impl fmt::Debug for Expr { Expr::Negative(expr) => write!(f, "(- {:?})", expr), Expr::IsNull(expr) => write!(f, "{:?} IS NULL", expr), Expr::IsTrue(expr) => write!(f, "{:?} IS TRUE", expr), + Expr::IsFalse(expr) => write!(f, "{:?} IS FALSE", expr), Expr::IsNotNull(expr) => write!(f, "{:?} IS NOT NULL", expr), Expr::Exists { subquery, @@ -809,6 +819,10 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { let expr = create_name(expr, input_schema)?; Ok(format!("{} IS TRUE", expr)) } + Expr::IsFalse(expr) => { + let expr = create_name(expr, input_schema)?; + Ok(format!("{} IS FALSE", expr)) + } Expr::IsNotNull(expr) => { let expr = create_name(expr, input_schema)?; Ok(format!("{} IS NOT NULL", expr)) diff --git a/datafusion/expr/src/expr_rewriter.rs b/datafusion/expr/src/expr_rewriter.rs index 9cee88714385..73a450d74801 100644 --- a/datafusion/expr/src/expr_rewriter.rs +++ b/datafusion/expr/src/expr_rewriter.rs @@ -129,6 +129,7 @@ impl ExprRewritable for Expr { }, Expr::Not(expr) => Expr::Not(rewrite_boxed(expr, rewriter)?), Expr::IsTrue(expr) => Expr::IsTrue(rewrite_boxed(expr, rewriter)?), + Expr::IsFalse(expr) => Expr::IsFalse(rewrite_boxed(expr, rewriter)?), Expr::IsNotNull(expr) => Expr::IsNotNull(rewrite_boxed(expr, rewriter)?), Expr::IsNull(expr) => Expr::IsNull(rewrite_boxed(expr, rewriter)?), Expr::Negative(expr) => Expr::Negative(rewrite_boxed(expr, rewriter)?), diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index a5d8b74410e7..98c5624630bc 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -105,6 +105,7 @@ impl ExprSchemable for Expr { | Expr::Between { .. } | Expr::InList { .. } | Expr::IsTrue(_) + | Expr::IsFalse(_) | Expr::IsNotNull(_) => Ok(DataType::Boolean), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).data_type().clone()) @@ -184,7 +185,7 @@ impl ExprSchemable for Expr { | Expr::WindowFunction { .. } | Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } => Ok(true), - Expr::IsNull(_) | Expr::IsTrue(_) | Expr::IsNotNull(_) | Expr::Exists { .. } => Ok(false), + Expr::IsNull(_) | Expr::IsNotNull(_) | Expr::IsTrue(_) | Expr::IsFalse(_) | Expr::Exists { .. } => Ok(false), Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/expr/src/expr_visitor.rs b/datafusion/expr/src/expr_visitor.rs index d9a0af3f8b30..febdc81fe02d 100644 --- a/datafusion/expr/src/expr_visitor.rs +++ b/datafusion/expr/src/expr_visitor.rs @@ -97,6 +97,7 @@ impl ExprVisitable for Expr { Expr::Alias(expr, _) | Expr::Not(expr) | Expr::IsTrue(expr) + | Expr::IsFalse(expr) | Expr::IsNotNull(expr) | Expr::IsNull(expr) | Expr::Negative(expr) diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index a8ebe45f9bd7..c2ba4cd648b3 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -81,6 +81,7 @@ impl ExpressionVisitor for ColumnNameVisitor<'_> { | Expr::BinaryExpr { .. } | Expr::Not(_) | Expr::IsTrue(_) + | Expr::IsFalse(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 007b1eb2bf47..a8a2702ca4ab 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -411,6 +411,9 @@ impl ExprIdentifierVisitor<'_> { Expr::IsTrue(_) => { desc.push_str("IsTrue-"); } + Expr::IsFalse(_) => { + desc.push_str("IsFalse-"); + } Expr::IsNotNull(_) => { desc.push_str("IsNotNull-"); } diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 60cb83a03398..25e77994e4c4 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -516,6 +516,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::BinaryExpr { .. } | Expr::Not(_) | Expr::IsTrue(_) + | Expr::IsFalse(_) | Expr::IsNotNull(_) | Expr::IsNull(_) | Expr::Negative(_) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs new file mode 100644 index 000000000000..2596941e2398 --- /dev/null +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! IS FALSE expression + +use std::{any::Any, sync::Arc}; + +use crate::PhysicalExpr; +use arrow::{ + array::{Array, BooleanArray}, + datatypes::{DataType, Schema}, + record_batch::RecordBatch, +}; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; + +/// IS FALSE expression +#[derive(Debug)] +pub struct IsFalseExpr { + /// The input expression + arg: Arc, +} + +impl IsFalseExpr { + /// Create new not expression + pub fn new(arg: Arc) -> Self { + Self { arg } + } + + /// Get the input expression + pub fn arg(&self) -> &Arc { + &self.arg + } +} + +impl std::fmt::Display for IsFalseExpr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} IS FALSE", self.arg) + } +} + +impl PhysicalExpr for IsFalseExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result { + Ok(DataType::Boolean) + } + + fn nullable(&self, _input_schema: &Schema) -> Result { + Ok(false) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result { + let arg = self.arg.evaluate(batch)?; + match arg { + ColumnarValue::Array(array) => { + let array_len = array.len(); + let my_array = ColumnarValue::Array(array).into_array(array_len); + let true_array = BooleanArray::from(vec![Some(true)]); + let false_array = BooleanArray::from(vec![Some(false)]); + let null_array = BooleanArray::from(vec![None]); + let mut result_vec = vec![]; + for i in 0..array_len { + let current = (*my_array).slice(i,1); + if (*current).eq(&false_array) { + result_vec.push(Some(true)); + } else if (*current).eq(&true_array) || (*current).eq(&null_array) { + result_vec.push(Some(false)); + } else { + panic!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): ' IS FALSE'", current.data_type()) + } + } + + let return_array = BooleanArray::from(result_vec); + Ok(ColumnarValue::Array(Arc::new(return_array))) + } + ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(scalar.is_false())), + )), + } + } +} + +/// Create an IS FALSE expression +pub fn is_false(arg: Arc) -> Result> { + Ok(Arc::new(IsFalseExpr::new(arg))) +} diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index cb3b57694666..ddccc12f8acb 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -28,6 +28,7 @@ mod get_indexed_field; mod in_list; mod is_not_null; mod is_true; +mod is_false; mod is_null; mod literal; mod negative; @@ -78,6 +79,7 @@ pub use datetime::DateTimeIntervalExpr; pub use get_indexed_field::GetIndexedFieldExpr; pub use in_list::{in_list, InListExpr}; pub use is_true::{is_true, IsTrueExpr}; +pub use is_false::{is_false, IsFalseExpr}; pub use is_not_null::{is_not_null, IsNotNullExpr}; pub use is_null::{is_null, IsNullExpr}; pub use literal::{lit, Literal}; diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 03801e09692c..6dd8efd98aa9 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -201,6 +201,12 @@ pub fn create_physical_expr( input_schema, execution_props, )?), + Expr::IsFalse(expr) => expressions::is_false(create_physical_expr( + expr, + input_dfschema, + input_schema, + execution_props, + )?), Expr::IsNotNull(expr) => expressions::is_not_null(create_physical_expr( expr, input_dfschema, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 82f4f9741d74..cff9f91f0e23 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -315,6 +315,7 @@ message LogicalExprNode { // boolean operations IsTrue is_true = 25; + IsFalse is_false = 26; } } @@ -353,6 +354,10 @@ message IsTrue { LogicalExprNode expr = 1; } +message IsFalse { + LogicalExprNode expr = 1; +} + message Not { LogicalExprNode expr = 1; } diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 8836726a4289..1a162767f9d8 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -908,6 +908,9 @@ pub fn parse_expr( ExprType::IsTrueExpr(is_true) => Ok(Expr::IsTrue(Box::new( parse_required_expr(&is_true.expr, registry, "expr")?, ))), + ExprType::IsFalseExpr(is_false) => Ok(Expr::IsFalse(Box::new( + parse_required_expr(&is_false.expr, registry, "expr")?, + ))), ExprType::NotExpr(not) => Ok(Expr::Not(Box::new(parse_required_expr( ¬.expr, registry, "expr", )?))), diff --git a/datafusion/proto/src/to_proto.rs b/datafusion/proto/src/to_proto.rs index 16778f46b055..987a6fd375d7 100644 --- a/datafusion/proto/src/to_proto.rs +++ b/datafusion/proto/src/to_proto.rs @@ -624,6 +624,14 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::IsTrueExpr(expr)), } } + Expr::IsFalse(expr) => { + let expr = Box::new(protobuf::IsFalse { + expr: Some(Box::new(expr.as_ref().try_into()?)), + }); + Self { + expr_type: Some(ExprType::IsFalseExpr(expr)), + } + } Expr::Between { expr, negated, diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 8c71bb81bb68..9adebac3e224 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1852,6 +1852,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.sql_expr_to_logical_expr(*expr, schema, ctes)?, ))), + SQLExpr::IsFalse(expr) => Ok(Expr::IsFalse(Box::new( + self.sql_expr_to_logical_expr(*expr, schema, ctes)?, + ))), + SQLExpr::IsNotNull(expr) => Ok(Expr::IsNotNull(Box::new( self.sql_expr_to_logical_expr(*expr, schema, ctes)?, ))), diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 1e94bab0ece8..01064ea2cb4d 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -283,6 +283,9 @@ where Expr::IsTrue(nested_expr) => Ok(Expr::IsTrue(Box::new( clone_with_replacement(nested_expr, replacement_fn)?, ))), + Expr::IsFalse(nested_expr) => Ok(Expr::IsFalse(Box::new( + clone_with_replacement(nested_expr, replacement_fn)?, + ))), Expr::IsNotNull(nested_expr) => Ok(Expr::IsNotNull(Box::new( clone_with_replacement(nested_expr, replacement_fn)?, ))), From 20cfe360a414e1da411dc2c282a3b3432a191293 Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Thu, 18 Aug 2022 13:28:45 -0700 Subject: [PATCH 04/13] Update datafusion/proto/proto/datafusion.proto Co-authored-by: Andy Grove --- datafusion/proto/proto/datafusion.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index cff9f91f0e23..887b3977db0b 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -314,8 +314,8 @@ message LogicalExprNode { RollupNode rollup = 24; // boolean operations - IsTrue is_true = 25; - IsFalse is_false = 26; + IsTrue is_true_expr = 25; + IsFalse is_false_expr = 26; } } From 632700d9bdc156668ab0e06a590187f9baf9d292 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 13:42:35 -0700 Subject: [PATCH 05/13] formatting --- datafusion/expr/src/expr_schema.rs | 6 +++++- datafusion/physical-expr/src/expressions/is_false.rs | 2 +- datafusion/physical-expr/src/expressions/is_true.rs | 2 +- datafusion/physical-expr/src/expressions/mod.rs | 6 +++--- datafusion/proto/src/from_proto.rs | 8 +++++--- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 98c5624630bc..7306b0b2b1f9 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -185,7 +185,11 @@ impl ExprSchemable for Expr { | Expr::WindowFunction { .. } | Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } => Ok(true), - Expr::IsNull(_) | Expr::IsNotNull(_) | Expr::IsTrue(_) | Expr::IsFalse(_) | Expr::Exists { .. } => Ok(false), + Expr::IsNull(_) + | Expr::IsNotNull(_) + | Expr::IsTrue(_) + | Expr::IsFalse(_) + | Expr::Exists { .. } => Ok(false), Expr::InSubquery { expr, .. } => expr.nullable(input_schema), Expr::ScalarSubquery(subquery) => { Ok(subquery.subquery.schema().field(0).is_nullable()) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 2596941e2398..33d4409c514f 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -79,7 +79,7 @@ impl PhysicalExpr for IsFalseExpr { let null_array = BooleanArray::from(vec![None]); let mut result_vec = vec![]; for i in 0..array_len { - let current = (*my_array).slice(i,1); + let current = (*my_array).slice(i, 1); if (*current).eq(&false_array) { result_vec.push(Some(true)); } else if (*current).eq(&true_array) || (*current).eq(&null_array) { diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index cfd3e21e1a70..012740be7a71 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -79,7 +79,7 @@ impl PhysicalExpr for IsTrueExpr { let null_array = BooleanArray::from(vec![None]); let mut result_vec = vec![]; for i in 0..array_len { - let current = (*my_array).slice(i,1); + let current = (*my_array).slice(i, 1); if (*current).eq(&true_array) { result_vec.push(Some(true)); } else if (*current).eq(&false_array) || (*current).eq(&null_array) { diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index ddccc12f8acb..60279f46eb24 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -26,10 +26,10 @@ mod datetime; mod delta; mod get_indexed_field; mod in_list; -mod is_not_null; -mod is_true; mod is_false; +mod is_not_null; mod is_null; +mod is_true; mod literal; mod negative; mod not; @@ -78,10 +78,10 @@ pub use column::{col, Column}; pub use datetime::DateTimeIntervalExpr; pub use get_indexed_field::GetIndexedFieldExpr; pub use in_list::{in_list, InListExpr}; -pub use is_true::{is_true, IsTrueExpr}; pub use is_false::{is_false, IsFalseExpr}; pub use is_not_null::{is_not_null, IsNotNullExpr}; pub use is_null::{is_null, IsNullExpr}; +pub use is_true::{is_true, IsTrueExpr}; pub use literal::{lit, Literal}; pub use negative::{negative, NegativeExpr}; pub use not::{not, NotExpr}; diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 1a162767f9d8..76f7846bc6a7 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -905,9 +905,11 @@ pub fn parse_expr( ExprType::IsNotNullExpr(is_not_null) => Ok(Expr::IsNotNull(Box::new( parse_required_expr(&is_not_null.expr, registry, "expr")?, ))), - ExprType::IsTrueExpr(is_true) => Ok(Expr::IsTrue(Box::new( - parse_required_expr(&is_true.expr, registry, "expr")?, - ))), + ExprType::IsTrueExpr(is_true) => Ok(Expr::IsTrue(Box::new(parse_required_expr( + &is_true.expr, + registry, + "expr", + )?,))), ExprType::IsFalseExpr(is_false) => Ok(Expr::IsFalse(Box::new( parse_required_expr(&is_false.expr, registry, "expr")?, ))), From 15e42be49c5eb347d8afec2b3e1c46e36105fc56 Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Thu, 18 Aug 2022 13:43:22 -0700 Subject: [PATCH 06/13] Update datafusion/common/src/scalar.rs Co-authored-by: Andy Grove --- datafusion/common/src/scalar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 44ed03849cf1..e94ca00bbb84 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -729,7 +729,7 @@ impl ScalarValue { /// whether this value is true or not. pub fn is_true(&self) -> bool { match self { - ScalarValue::Boolean(v) => v.eq(&Some(true)), + ScalarValue::Boolean(Some(true)) => true, ScalarValue::Null => false, other => panic!("Cannot apply 'IS TRUE' to arguments of type '{} IS TRUE'. Supported form(s): ' IS TRUE'", other) } From 450f776e6bacbc90c9c16d74e7a31952e80e49e3 Mon Sep 17 00:00:00 2001 From: Sarah Yurick <53962159+sarahyurick@users.noreply.github.com> Date: Thu, 18 Aug 2022 13:43:45 -0700 Subject: [PATCH 07/13] Update datafusion/common/src/scalar.rs Co-authored-by: Andy Grove --- datafusion/common/src/scalar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index e94ca00bbb84..2018aba49e60 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -738,7 +738,7 @@ impl ScalarValue { /// whether this value is false or not. pub fn is_false(&self) -> bool { match self { - ScalarValue::Boolean(v) => v.eq(&Some(false)), + ScalarValue::Boolean(Some(false)) => true, ScalarValue::Null => false, other => panic!("Cannot apply 'IS FALSE' to arguments of type '{} IS FALSE'. Supported form(s): ' IS FALSE'", other) } From 11b5559e7a20172d0b7e9ab4a9c6f09dae039b55 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 14:35:21 -0700 Subject: [PATCH 08/13] err instead of panic --- datafusion/common/src/scalar.rs | 18 ------------------ .../physical-expr/src/expressions/is_false.rs | 13 +++++++++---- .../physical-expr/src/expressions/is_true.rs | 13 +++++++++---- datafusion/proto/src/from_proto.rs | 2 +- 4 files changed, 19 insertions(+), 27 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 2018aba49e60..dff97d2f981f 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -726,24 +726,6 @@ impl ScalarValue { } } - /// whether this value is true or not. - pub fn is_true(&self) -> bool { - match self { - ScalarValue::Boolean(Some(true)) => true, - ScalarValue::Null => false, - other => panic!("Cannot apply 'IS TRUE' to arguments of type '{} IS TRUE'. Supported form(s): ' IS TRUE'", other) - } - } - - /// whether this value is false or not. - pub fn is_false(&self) -> bool { - match self { - ScalarValue::Boolean(Some(false)) => true, - ScalarValue::Null => false, - other => panic!("Cannot apply 'IS FALSE' to arguments of type '{} IS FALSE'. Supported form(s): ' IS FALSE'", other) - } - } - /// whether this value is null or not. pub fn is_null(&self) -> bool { match self { diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 33d4409c514f..63f125d2cb9e 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -25,6 +25,7 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; +use datafusion_common::DataFusionError; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; @@ -85,16 +86,20 @@ impl PhysicalExpr for IsFalseExpr { } else if (*current).eq(&true_array) || (*current).eq(&null_array) { result_vec.push(Some(false)); } else { - panic!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): ' IS FALSE'", current.data_type()) + return Err(DataFusionError::Execution("Cannot apply 'IS FALSE' to argument type. Supported form(s): ' IS FALSE'".to_string())) } } let return_array = BooleanArray::from(result_vec); Ok(ColumnarValue::Array(Arc::new(return_array))) } - ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( - ScalarValue::Boolean(Some(scalar.is_false())), - )), + ColumnarValue::Scalar(scalar) => { + match scalar { + ScalarValue::Boolean(Some(false)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), + ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), + _ => return Err(DataFusionError::Execution("Cannot apply 'IS FALSE' to argument type. Supported form(s): ' IS FALSE'".to_string())) + } + } } } } diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index 012740be7a71..a22551f440ae 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -25,6 +25,7 @@ use arrow::{ datatypes::{DataType, Schema}, record_batch::RecordBatch, }; +use datafusion_common::DataFusionError; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; @@ -85,16 +86,20 @@ impl PhysicalExpr for IsTrueExpr { } else if (*current).eq(&false_array) || (*current).eq(&null_array) { result_vec.push(Some(false)); } else { - panic!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): ' IS TRUE'", current.data_type()) + return Err(DataFusionError::Execution("Cannot apply 'IS TRUE' to argument type. Supported form(s): ' IS TRUE'".to_string())) } } let return_array = BooleanArray::from(result_vec); Ok(ColumnarValue::Array(Arc::new(return_array))) } - ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( - ScalarValue::Boolean(Some(scalar.is_true())), - )), + ColumnarValue::Scalar(scalar) => { + match scalar { + ScalarValue::Boolean(Some(true)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), + ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), + _ => return Err(DataFusionError::Execution("Cannot apply 'IS TRUE' to argument type. Supported form(s): ' IS TRUE'".to_string())) + } + } } } } diff --git a/datafusion/proto/src/from_proto.rs b/datafusion/proto/src/from_proto.rs index 76f7846bc6a7..ca03308ef1a7 100644 --- a/datafusion/proto/src/from_proto.rs +++ b/datafusion/proto/src/from_proto.rs @@ -909,7 +909,7 @@ pub fn parse_expr( &is_true.expr, registry, "expr", - )?,))), + )?))), ExprType::IsFalseExpr(is_false) => Ok(Expr::IsFalse(Box::new( parse_required_expr(&is_false.expr, registry, "expr")?, ))), From 70acf4b3c2563f10d3345d21bce6914dd65166ba Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 16:26:52 -0700 Subject: [PATCH 09/13] format error --- datafusion/physical-expr/src/expressions/is_false.rs | 4 ++-- datafusion/physical-expr/src/expressions/is_true.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 63f125d2cb9e..855ca80b911a 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -86,7 +86,7 @@ impl PhysicalExpr for IsFalseExpr { } else if (*current).eq(&true_array) || (*current).eq(&null_array) { result_vec.push(Some(false)); } else { - return Err(DataFusionError::Execution("Cannot apply 'IS FALSE' to argument type. Supported form(s): ' IS FALSE'".to_string())) + return Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): ' IS FALSE'", current.data_type()))) } } @@ -97,7 +97,7 @@ impl PhysicalExpr for IsFalseExpr { match scalar { ScalarValue::Boolean(Some(false)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), - _ => return Err(DataFusionError::Execution("Cannot apply 'IS FALSE' to argument type. Supported form(s): ' IS FALSE'".to_string())) + e => return Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{}> IS FALSE'. Supported form(s): ' IS FALSE'", e.get_datatype()))) } } } diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index a22551f440ae..8a2f80773dbc 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -86,7 +86,7 @@ impl PhysicalExpr for IsTrueExpr { } else if (*current).eq(&false_array) || (*current).eq(&null_array) { result_vec.push(Some(false)); } else { - return Err(DataFusionError::Execution("Cannot apply 'IS TRUE' to argument type. Supported form(s): ' IS TRUE'".to_string())) + return Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): ' IS TRUE'", current.data_type()))) } } @@ -97,7 +97,7 @@ impl PhysicalExpr for IsTrueExpr { match scalar { ScalarValue::Boolean(Some(true)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), - _ => return Err(DataFusionError::Execution("Cannot apply 'IS TRUE' to argument type. Supported form(s): ' IS TRUE'".to_string())) + e => return Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{}> IS TRUE'. Supported form(s): ' IS TRUE'", e.get_datatype()))) } } } From affa48d56c439e4cf8d2cdae7ba5528ab42bf882 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Thu, 18 Aug 2022 16:37:07 -0700 Subject: [PATCH 10/13] tests --- .../physical-expr/src/expressions/is_false.rs | 33 +++++++++++++++++++ .../physical-expr/src/expressions/is_true.rs | 33 +++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 855ca80b911a..6644bc1c8df7 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -108,3 +108,36 @@ impl PhysicalExpr for IsFalseExpr { pub fn is_false(arg: Arc) -> Result> { Ok(Arc::new(IsFalseExpr::new(arg))) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::expressions::col; + use arrow::{ + array::{BooleanArray, StringArray}, + datatypes::*, + record_batch::RecordBatch, + }; + use std::sync::Arc; + + #[test] + fn is_false_op() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]); + let a = BooleanArray::from(vec![Some(true), Some(false), None]); + let expr = is_false(col("a", &schema)?).unwrap(); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?; + + // expression: "a is false" + let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); + let result = result + .as_any() + .downcast_ref::() + .expect("failed to downcast to BooleanArray"); + + let expected = &BooleanArray::from(vec![false, true, false]); + + assert_eq!(expected, result); + + Ok(()) + } +} diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index 8a2f80773dbc..a83a934ef408 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -108,3 +108,36 @@ impl PhysicalExpr for IsTrueExpr { pub fn is_true(arg: Arc) -> Result> { Ok(Arc::new(IsTrueExpr::new(arg))) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::expressions::col; + use arrow::{ + array::{BooleanArray, StringArray}, + datatypes::*, + record_batch::RecordBatch, + }; + use std::sync::Arc; + + #[test] + fn is_true_op() -> Result<()> { + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, true)]); + let a = BooleanArray::from(vec![Some(true), Some(false), None]); + let expr = is_true(col("a", &schema)?).unwrap(); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)])?; + + // expression: "a is true" + let result = expr.evaluate(&batch)?.into_array(batch.num_rows()); + let result = result + .as_any() + .downcast_ref::() + .expect("failed to downcast to BooleanArray"); + + let expected = &BooleanArray::from(vec![true, false, false]); + + assert_eq!(expected, result); + + Ok(()) + } +} From d408b2fb6ac49aa219f317b3870ce9c7aeaca9c0 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Fri, 19 Aug 2022 09:36:42 -0700 Subject: [PATCH 11/13] update is_true and is_false --- datafusion/common/src/scalar.rs | 18 ++++++++++++++++++ .../physical-expr/src/expressions/is_false.rs | 12 ++++-------- .../physical-expr/src/expressions/is_true.rs | 12 ++++-------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index dff97d2f981f..5bbc98a1e393 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -762,6 +762,24 @@ impl ScalarValue { } } + /// whether this value is true or not. + pub fn is_true(&self) -> Result { + match self { + ScalarValue::Boolean(Some(true)) => Ok(true), + ScalarValue::Null => Ok(false), + e => Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{}> IS TRUE'. Supported form(s): ' IS TRUE'", e.get_datatype()))) + } + } + + /// whether this value is false or not. + pub fn is_false(&self) -> Result { + match self { + ScalarValue::Boolean(Some(false)) => Ok(true), + ScalarValue::Null => Ok(false), + e => Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{}> IS FALSE'. Supported form(s): ' IS FALSE'", e.get_datatype()))) + } + } + /// Converts a scalar value into an 1-row array. pub fn to_array(&self) -> ArrayRef { self.to_array_of_size(1) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 6644bc1c8df7..36e96416947a 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -93,13 +93,9 @@ impl PhysicalExpr for IsFalseExpr { let return_array = BooleanArray::from(result_vec); Ok(ColumnarValue::Array(Arc::new(return_array))) } - ColumnarValue::Scalar(scalar) => { - match scalar { - ScalarValue::Boolean(Some(false)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), - ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), - e => return Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{}> IS FALSE'. Supported form(s): ' IS FALSE'", e.get_datatype()))) - } - } + ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(scalar.is_false().unwrap())), + )), } } } @@ -114,7 +110,7 @@ mod tests { use super::*; use crate::expressions::col; use arrow::{ - array::{BooleanArray, StringArray}, + array::BooleanArray, datatypes::*, record_batch::RecordBatch, }; diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index a83a934ef408..5e18b67f0fdd 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -93,13 +93,9 @@ impl PhysicalExpr for IsTrueExpr { let return_array = BooleanArray::from(result_vec); Ok(ColumnarValue::Array(Arc::new(return_array))) } - ColumnarValue::Scalar(scalar) => { - match scalar { - ScalarValue::Boolean(Some(true)) => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))), - ScalarValue::Null => Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))), - e => return Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{}> IS TRUE'. Supported form(s): ' IS TRUE'", e.get_datatype()))) - } - } + ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(scalar.is_true().unwrap())), + )), } } } @@ -114,7 +110,7 @@ mod tests { use super::*; use crate::expressions::col; use arrow::{ - array::{BooleanArray, StringArray}, + array::BooleanArray, datatypes::*, record_batch::RecordBatch, }; From 6b3ba4971f0fa55e92557fd2b56f4d6e7b426bf7 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Fri, 19 Aug 2022 13:24:35 -0700 Subject: [PATCH 12/13] update with andy's suggestions --- datafusion/common/src/scalar.rs | 4 +-- .../physical-expr/src/expressions/is_false.rs | 30 +++++++------------ .../physical-expr/src/expressions/is_true.rs | 30 +++++++------------ 3 files changed, 24 insertions(+), 40 deletions(-) diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs index 5bbc98a1e393..c236623abfe9 100644 --- a/datafusion/common/src/scalar.rs +++ b/datafusion/common/src/scalar.rs @@ -765,7 +765,7 @@ impl ScalarValue { /// whether this value is true or not. pub fn is_true(&self) -> Result { match self { - ScalarValue::Boolean(Some(true)) => Ok(true), + ScalarValue::Boolean(Some(v)) => Ok(*v), ScalarValue::Null => Ok(false), e => Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{}> IS TRUE'. Supported form(s): ' IS TRUE'", e.get_datatype()))) } @@ -774,7 +774,7 @@ impl ScalarValue { /// whether this value is false or not. pub fn is_false(&self) -> Result { match self { - ScalarValue::Boolean(Some(false)) => Ok(true), + ScalarValue::Boolean(Some(v)) => Ok(!v), ScalarValue::Null => Ok(false), e => Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{}> IS FALSE'. Supported form(s): ' IS FALSE'", e.get_datatype()))) } diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index 36e96416947a..a498b199b12e 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -21,7 +21,7 @@ use std::{any::Any, sync::Arc}; use crate::PhysicalExpr; use arrow::{ - array::{Array, BooleanArray}, + array::{Array, BooleanArray, BooleanBuilder}, datatypes::{DataType, Schema}, record_batch::RecordBatch, }; @@ -73,28 +73,20 @@ impl PhysicalExpr for IsFalseExpr { let arg = self.arg.evaluate(batch)?; match arg { ColumnarValue::Array(array) => { - let array_len = array.len(); - let my_array = ColumnarValue::Array(array).into_array(array_len); - let true_array = BooleanArray::from(vec![Some(true)]); - let false_array = BooleanArray::from(vec![Some(false)]); - let null_array = BooleanArray::from(vec![None]); - let mut result_vec = vec![]; - for i in 0..array_len { - let current = (*my_array).slice(i, 1); - if (*current).eq(&false_array) { - result_vec.push(Some(true)); - } else if (*current).eq(&true_array) || (*current).eq(&null_array) { - result_vec.push(Some(false)); - } else { - return Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): ' IS FALSE'", current.data_type()))) + match array.as_any().downcast_ref::() { + Some(bool_array) => { + let array_len = array.len(); + let mut result_builder = BooleanBuilder::new(array_len); + for i in 0..array_len { + result_builder.append_value(!bool_array.is_null(i) && !bool_array.value(i)); + } + Ok(ColumnarValue::Array(Arc::new(result_builder.finish()))) } + _ => Err(DataFusionError::Execution(format!("Cannot apply 'IS FALSE' to arguments of type '<{:?}> IS FALSE'. Supported form(s): ' IS FALSE'", array.data_type()))) } - - let return_array = BooleanArray::from(result_vec); - Ok(ColumnarValue::Array(Arc::new(return_array))) } ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( - ScalarValue::Boolean(Some(scalar.is_false().unwrap())), + ScalarValue::Boolean(Some(scalar.is_false()?)), )), } } diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index 5e18b67f0fdd..0d6c85c4d435 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -21,7 +21,7 @@ use std::{any::Any, sync::Arc}; use crate::PhysicalExpr; use arrow::{ - array::{Array, BooleanArray}, + array::{Array, BooleanArray, BooleanBuilder}, datatypes::{DataType, Schema}, record_batch::RecordBatch, }; @@ -73,28 +73,20 @@ impl PhysicalExpr for IsTrueExpr { let arg = self.arg.evaluate(batch)?; match arg { ColumnarValue::Array(array) => { - let array_len = array.len(); - let my_array = ColumnarValue::Array(array).into_array(array_len); - let true_array = BooleanArray::from(vec![Some(true)]); - let false_array = BooleanArray::from(vec![Some(false)]); - let null_array = BooleanArray::from(vec![None]); - let mut result_vec = vec![]; - for i in 0..array_len { - let current = (*my_array).slice(i, 1); - if (*current).eq(&true_array) { - result_vec.push(Some(true)); - } else if (*current).eq(&false_array) || (*current).eq(&null_array) { - result_vec.push(Some(false)); - } else { - return Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): ' IS TRUE'", current.data_type()))) + match array.as_any().downcast_ref::() { + Some(bool_array) => { + let array_len = array.len(); + let mut result_builder = BooleanBuilder::new(array_len); + for i in 0..array_len { + result_builder.append_value(!bool_array.is_null(i) && bool_array.value(i)); + } + Ok(ColumnarValue::Array(Arc::new(result_builder.finish()))) } + _ => Err(DataFusionError::Execution(format!("Cannot apply 'IS TRUE' to arguments of type '<{:?}> IS TRUE'. Supported form(s): ' IS TRUE'", array.data_type()))) } - - let return_array = BooleanArray::from(result_vec); - Ok(ColumnarValue::Array(Arc::new(return_array))) } ColumnarValue::Scalar(scalar) => Ok(ColumnarValue::Scalar( - ScalarValue::Boolean(Some(scalar.is_true().unwrap())), + ScalarValue::Boolean(Some(scalar.is_true()?)), )), } } From 46bf619de599fbc06c476765369d0f832257f56a Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Fri, 19 Aug 2022 14:11:41 -0700 Subject: [PATCH 13/13] rust format fix --- datafusion/physical-expr/src/expressions/is_false.rs | 6 +----- datafusion/physical-expr/src/expressions/is_true.rs | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/is_false.rs b/datafusion/physical-expr/src/expressions/is_false.rs index a498b199b12e..740127767356 100644 --- a/datafusion/physical-expr/src/expressions/is_false.rs +++ b/datafusion/physical-expr/src/expressions/is_false.rs @@ -101,11 +101,7 @@ pub fn is_false(arg: Arc) -> Result> { mod tests { use super::*; use crate::expressions::col; - use arrow::{ - array::BooleanArray, - datatypes::*, - record_batch::RecordBatch, - }; + use arrow::{array::BooleanArray, datatypes::*, record_batch::RecordBatch}; use std::sync::Arc; #[test] diff --git a/datafusion/physical-expr/src/expressions/is_true.rs b/datafusion/physical-expr/src/expressions/is_true.rs index 0d6c85c4d435..a5d08a0202db 100644 --- a/datafusion/physical-expr/src/expressions/is_true.rs +++ b/datafusion/physical-expr/src/expressions/is_true.rs @@ -101,11 +101,7 @@ pub fn is_true(arg: Arc) -> Result> { mod tests { use super::*; use crate::expressions::col; - use arrow::{ - array::BooleanArray, - datatypes::*, - record_batch::RecordBatch, - }; + use arrow::{array::BooleanArray, datatypes::*, record_batch::RecordBatch}; use std::sync::Arc; #[test]