From fda9a6c2c092d10eca114144e49a710fbda4d68b Mon Sep 17 00:00:00 2001 From: acking-you Date: Wed, 3 Jul 2024 16:12:18 +0800 Subject: [PATCH 1/3] [draft] add shot circuit in BinaryExpr --- .../physical-expr/src/expressions/binary.rs | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index d19279c20d10b..5f1d3c5e62e35 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -258,8 +258,47 @@ impl PhysicalExpr for BinaryExpr { use arrow::compute::kernels::numeric::*; let lhs = self.left.evaluate(batch)?; - let rhs = self.right.evaluate(batch)?; let left_data_type = lhs.data_type(); + + if left_data_type == DataType::Boolean && self.op == Operator::And { + match &lhs { + ColumnarValue::Array(array) => { + if let Ok(array) = as_boolean_array(&array) { + if array.true_count() == 0 { + return Ok(lhs); + } + } + } + ColumnarValue::Scalar(scalar) => { + if let ScalarValue::Boolean(Some(value)) = scalar { + if !value { + return Ok(lhs); + } + } + } + } + } + + if left_data_type == DataType::Boolean && self.op == Operator::Or { + match &lhs { + ColumnarValue::Array(array) => { + if let Ok(array) = as_boolean_array(&array) { + if array.true_count() == array.len() { + return Ok(lhs); + } + } + } + ColumnarValue::Scalar(scalar) => { + if let ScalarValue::Boolean(Some(value)) = scalar { + if *value { + return Ok(lhs); + } + } + } + } + } + + let rhs = self.right.evaluate(batch)?; let right_data_type = rhs.data_type(); let schema = batch.schema(); From 9510783b99db95aec95a25611301b5c29ad92323 Mon Sep 17 00:00:00 2001 From: acking-you Date: Wed, 3 Jul 2024 17:08:00 +0800 Subject: [PATCH 2/3] refactor: add check_short_circuit function --- .../physical-expr/src/expressions/binary.rs | 72 +++++++++++-------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 5f1d3c5e62e35..c08d1f9d61d0a 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -257,48 +257,58 @@ impl PhysicalExpr for BinaryExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { use arrow::compute::kernels::numeric::*; - let lhs = self.left.evaluate(batch)?; - let left_data_type = lhs.data_type(); - - if left_data_type == DataType::Boolean && self.op == Operator::And { - match &lhs { - ColumnarValue::Array(array) => { - if let Ok(array) = as_boolean_array(&array) { - if array.true_count() == 0 { - return Ok(lhs); + #[inline] + fn check_short_circuit(arg: &ColumnarValue, op: &Operator) -> bool { + let data_type = arg.data_type(); + if data_type == DataType::Boolean { + if *op == Operator::And { + match arg { + ColumnarValue::Array(array) => { + if let Ok(array) = as_boolean_array(&array) { + if array.true_count() == 0 { + return true; + } + } } - } - } - ColumnarValue::Scalar(scalar) => { - if let ScalarValue::Boolean(Some(value)) = scalar { - if !value { - return Ok(lhs); + ColumnarValue::Scalar(scalar) => { + if let ScalarValue::Boolean(Some(value)) = scalar { + if !value { + return true; + } + } } } - } - } - } - - if left_data_type == DataType::Boolean && self.op == Operator::Or { - match &lhs { - ColumnarValue::Array(array) => { - if let Ok(array) = as_boolean_array(&array) { - if array.true_count() == array.len() { - return Ok(lhs); + } else if *op == Operator::Or { + match arg { + ColumnarValue::Array(array) => { + if let Ok(array) = as_boolean_array(&array) { + if array.true_count() == array.len() { + return true; + } + } } - } - } - ColumnarValue::Scalar(scalar) => { - if let ScalarValue::Boolean(Some(value)) = scalar { - if *value { - return Ok(lhs); + ColumnarValue::Scalar(scalar) => { + if let ScalarValue::Boolean(Some(value)) = scalar { + if *value { + return true; + } + } } } } } + false + } + + let lhs = self.left.evaluate(batch)?; + + // Optimize for short-circuiting `Operator::And` or `Operator::Or` operations and return early. + if check_short_circuit(&lhs, &self.op) { + return Ok(lhs); } let rhs = self.right.evaluate(batch)?; + let left_data_type = lhs.data_type(); let right_data_type = rhs.data_type(); let schema = batch.schema(); From 6d92321fef69e88aa142fafd854245ecc374bef3 Mon Sep 17 00:00:00 2001 From: acking-you Date: Thu, 4 Jul 2024 16:33:18 +0800 Subject: [PATCH 3/3] refactor: change if condition to match --- .../physical-expr/src/expressions/binary.rs | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index c08d1f9d61d0a..c4f4f49b84bda 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -257,47 +257,41 @@ impl PhysicalExpr for BinaryExpr { fn evaluate(&self, batch: &RecordBatch) -> Result { use arrow::compute::kernels::numeric::*; - #[inline] fn check_short_circuit(arg: &ColumnarValue, op: &Operator) -> bool { let data_type = arg.data_type(); - if data_type == DataType::Boolean { - if *op == Operator::And { + match (data_type, op) { + (DataType::Boolean, Operator::And) => { match arg { ColumnarValue::Array(array) => { if let Ok(array) = as_boolean_array(&array) { - if array.true_count() == 0 { - return true; - } + return array.true_count() == 0; } } ColumnarValue::Scalar(scalar) => { if let ScalarValue::Boolean(Some(value)) = scalar { - if !value { - return true; - } + return !value; } } } - } else if *op == Operator::Or { + false + } + (DataType::Boolean, Operator::Or) => { match arg { ColumnarValue::Array(array) => { if let Ok(array) = as_boolean_array(&array) { - if array.true_count() == array.len() { - return true; - } + return array.true_count() == array.len(); } } ColumnarValue::Scalar(scalar) => { if let ScalarValue::Boolean(Some(value)) = scalar { - if *value { - return true; - } + return *value; } } } + false } + _ => false, } - false } let lhs = self.left.evaluate(batch)?;