diff --git a/datafusion-expr/src/accumulator.rs b/datafusion-expr/src/accumulator.rs index 599bd363fb615..d59764957ef59 100644 --- a/datafusion-expr/src/accumulator.rs +++ b/datafusion-expr/src/accumulator.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Accumulator module contains the trait definition for aggregation function's accumulators. + use arrow::array::ArrayRef; use datafusion_common::{Result, ScalarValue}; use std::fmt::Debug; diff --git a/datafusion-expr/src/aggregate_function.rs b/datafusion-expr/src/aggregate_function.rs index 8f12e88bf1a2a..dd8efea88460e 100644 --- a/datafusion-expr/src/aggregate_function.rs +++ b/datafusion-expr/src/aggregate_function.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Aggregate function module contains all built-in aggregate functions definitions + use datafusion_common::{DataFusionError, Result}; use std::{fmt, str::FromStr}; diff --git a/datafusion-expr/src/built_in_function.rs b/datafusion-expr/src/built_in_function.rs index 0d5ee9792ecb6..87626825c3e00 100644 --- a/datafusion-expr/src/built_in_function.rs +++ b/datafusion-expr/src/built_in_function.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Built-in functions +//! Built-in functions module contains all the built-in functions definitions. use crate::Volatility; use datafusion_common::{DataFusionError, Result}; diff --git a/datafusion-expr/src/columnar_value.rs b/datafusion-expr/src/columnar_value.rs index 5e6959d751f8e..4867c0e746b37 100644 --- a/datafusion-expr/src/columnar_value.rs +++ b/datafusion-expr/src/columnar_value.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Columnar value module contains a set of types that represent a columnar value. + use arrow::array::ArrayRef; use arrow::array::NullArray; use arrow::datatypes::DataType; diff --git a/datafusion-expr/src/expr.rs b/datafusion-expr/src/expr.rs index f26f1dfa97465..d3cbf7059ba48 100644 --- a/datafusion-expr/src/expr.rs +++ b/datafusion-expr/src/expr.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Expr module contains core type definition for `Expr`. + use crate::aggregate_function; use crate::built_in_function; use crate::expr_fn::binary_expr; @@ -696,3 +698,28 @@ fn create_name(e: &Expr, input_schema: &DFSchema) -> Result { )), } } + +#[cfg(test)] +mod test { + use crate::expr_fn::col; + use crate::lit; + + #[test] + fn test_not() { + assert_eq!(lit(1).not(), !lit(1)); + } + + #[test] + fn test_partial_ord() { + // Test validates that partial ord is defined for Expr using hashes, not + // intended to exhaustively test all possibilities + let exp1 = col("a") + lit(1); + let exp2 = col("a") + lit(2); + let exp3 = !(col("a") + lit(2)); + + assert!(exp1 < exp2); + assert!(exp2 > exp1); + assert!(exp2 > exp3); + assert!(exp3 < exp2); + } +} diff --git a/datafusion-expr/src/expr_fn.rs b/datafusion-expr/src/expr_fn.rs index 469a82d0ff241..d39269cca036d 100644 --- a/datafusion-expr/src/expr_fn.rs +++ b/datafusion-expr/src/expr_fn.rs @@ -15,14 +15,16 @@ // specific language governing permissions and limitations // under the License. -use crate::{Expr, Operator}; +//! Expr fn module contains the functional definitions for expressions. + +use crate::{aggregate_function, built_in_function, lit, Expr, Operator}; /// Create a column expression based on a qualified or unqualified column name pub fn col(ident: &str) -> Expr { Expr::Column(ident.into()) } -/// return a new expression l r +/// Return a new expression l r pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr { Expr::BinaryExpr { left: Box::new(l), @@ -30,3 +32,389 @@ pub fn binary_expr(l: Expr, op: Operator, r: Expr) -> Expr { right: Box::new(r), } } + +/// Return a new expression with a logical AND +pub fn and(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr { + left: Box::new(left), + op: Operator::And, + right: Box::new(right), + } +} + +/// Return a new expression with a logical OR +pub fn or(left: Expr, right: Expr) -> Expr { + Expr::BinaryExpr { + left: Box::new(left), + op: Operator::Or, + right: Box::new(right), + } +} + +/// Create an expression to represent the min() aggregate function +pub fn min(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Min, + distinct: false, + args: vec![expr], + } +} + +/// Create an expression to represent the max() aggregate function +pub fn max(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Max, + distinct: false, + args: vec![expr], + } +} + +/// Create an expression to represent the sum() aggregate function +pub fn sum(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Sum, + distinct: false, + args: vec![expr], + } +} + +/// Create an expression to represent the avg() aggregate function +pub fn avg(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Avg, + distinct: false, + args: vec![expr], + } +} + +/// Create an expression to represent the count() aggregate function +pub fn count(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Count, + distinct: false, + args: vec![expr], + } +} + +/// Create an expression to represent the count(distinct) aggregate function +pub fn count_distinct(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::Count, + distinct: true, + args: vec![expr], + } +} + +/// Create an in_list expression +pub fn in_list(expr: Expr, list: Vec, negated: bool) -> Expr { + Expr::InList { + expr: Box::new(expr), + list, + negated, + } +} + +/// Concatenates the text representations of all the arguments. NULL arguments are ignored. +pub fn concat(args: &[Expr]) -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::Concat, + args: args.to_vec(), + } +} + +/// Concatenates all but the first argument, with separators. +/// The first argument is used as the separator string, and should not be NULL. +/// Other NULL arguments are ignored. +pub fn concat_ws(sep: impl Into, values: &[Expr]) -> Expr { + let mut args = vec![lit(sep.into())]; + args.extend_from_slice(values); + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::ConcatWithSeparator, + args, + } +} + +/// Returns a random value in the range 0.0 <= x < 1.0 +pub fn random() -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::Random, + args: vec![], + } +} + +/// Returns the approximate number of distinct input values. +/// This function provides an approximation of count(DISTINCT x). +/// Zero is returned if all input values are null. +/// This function should produce a standard error of 0.81%, +/// which is the standard deviation of the (approximately normal) +/// error distribution over all possible sets. +/// It does not guarantee an upper bound on the error for any specific input set. +pub fn approx_distinct(expr: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::ApproxDistinct, + distinct: false, + args: vec![expr], + } +} + +/// Calculate an approximation of the specified `percentile` for `expr`. +pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr { + Expr::AggregateFunction { + fun: aggregate_function::AggregateFunction::ApproxPercentileCont, + distinct: false, + args: vec![expr, percentile], + } +} + +// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many +// varying arity functions +/// Create an convenience function representing a unary scalar function +macro_rules! unary_scalar_expr { + ($ENUM:ident, $FUNC:ident) => { + #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ] + pub fn $FUNC(e: Expr) -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::$ENUM, + args: vec![e], + } + } + }; +} + +macro_rules! scalar_expr { + ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { + #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] + pub fn $FUNC($($arg: Expr),*) -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::$ENUM, + args: vec![$($arg),*], + } + } + }; +} + +macro_rules! nary_scalar_expr { + ($ENUM:ident, $FUNC:ident) => { + #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] + pub fn $FUNC(args: Vec) -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::$ENUM, + args, + } + } + }; +} + +// generate methods for creating the supported unary/binary expressions + +// math functions +unary_scalar_expr!(Sqrt, sqrt); +unary_scalar_expr!(Sin, sin); +unary_scalar_expr!(Cos, cos); +unary_scalar_expr!(Tan, tan); +unary_scalar_expr!(Asin, asin); +unary_scalar_expr!(Acos, acos); +unary_scalar_expr!(Atan, atan); +unary_scalar_expr!(Floor, floor); +unary_scalar_expr!(Ceil, ceil); +unary_scalar_expr!(Now, now); +unary_scalar_expr!(Round, round); +unary_scalar_expr!(Trunc, trunc); +unary_scalar_expr!(Abs, abs); +unary_scalar_expr!(Signum, signum); +unary_scalar_expr!(Exp, exp); +unary_scalar_expr!(Log2, log2); +unary_scalar_expr!(Log10, log10); +unary_scalar_expr!(Ln, ln); + +// string functions +scalar_expr!(Ascii, ascii, string); +scalar_expr!(BitLength, bit_length, string); +nary_scalar_expr!(Btrim, btrim); +scalar_expr!(CharacterLength, character_length, string); +scalar_expr!(CharacterLength, length, string); +scalar_expr!(Chr, chr, string); +scalar_expr!(Digest, digest, string, algorithm); +scalar_expr!(InitCap, initcap, string); +scalar_expr!(Left, left, string, count); +scalar_expr!(Lower, lower, string); +nary_scalar_expr!(Lpad, lpad); +scalar_expr!(Ltrim, ltrim, string); +scalar_expr!(MD5, md5, string); +scalar_expr!(OctetLength, octet_length, string); +nary_scalar_expr!(RegexpMatch, regexp_match); +nary_scalar_expr!(RegexpReplace, regexp_replace); +scalar_expr!(Replace, replace, string, from, to); +scalar_expr!(Repeat, repeat, string, count); +scalar_expr!(Reverse, reverse, string); +scalar_expr!(Right, right, string, count); +nary_scalar_expr!(Rpad, rpad); +scalar_expr!(Rtrim, rtrim, string); +scalar_expr!(SHA224, sha224, string); +scalar_expr!(SHA256, sha256, string); +scalar_expr!(SHA384, sha384, string); +scalar_expr!(SHA512, sha512, string); +scalar_expr!(SplitPart, split_part, expr, delimiter, index); +scalar_expr!(StartsWith, starts_with, string, characters); +scalar_expr!(Strpos, strpos, string, substring); +scalar_expr!(Substr, substr, string, position); +scalar_expr!(ToHex, to_hex, string); +scalar_expr!(Translate, translate, string, from, to); +scalar_expr!(Trim, trim, string); +scalar_expr!(Upper, upper, string); + +// date functions +scalar_expr!(DatePart, date_part, part, date); +scalar_expr!(DateTrunc, date_trunc, part, date); + +/// Returns an array of fixed size with each argument on it. +pub fn array(args: Vec) -> Expr { + Expr::ScalarFunction { + fun: built_in_function::BuiltinScalarFunction::Array, + args, + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn filter_is_null_and_is_not_null() { + let col_null = col("col1"); + let col_not_null = col("col2"); + assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL"); + assert_eq!( + format!("{:?}", col_not_null.is_not_null()), + "#col2 IS NOT NULL" + ); + } + + macro_rules! test_unary_scalar_expr { + ($ENUM:ident, $FUNC:ident) => {{ + if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) { + let name = built_in_function::BuiltinScalarFunction::$ENUM; + assert_eq!(name, fun); + assert_eq!(1, args.len()); + } else { + assert!(false, "unexpected"); + } + }}; + } + + macro_rules! test_scalar_expr { + ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { + let expected = vec![$(stringify!($arg)),*]; + let result = $FUNC( + $( + col(stringify!($arg.to_string())) + ),* + ); + if let Expr::ScalarFunction { fun, args } = result { + let name = built_in_function::BuiltinScalarFunction::$ENUM; + assert_eq!(name, fun); + assert_eq!(expected.len(), args.len()); + } else { + assert!(false, "unexpected: {:?}", result); + } + }; + } + + macro_rules! test_nary_scalar_expr { + ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { + let expected = vec![$(stringify!($arg)),*]; + let result = $FUNC( + vec![ + $( + col(stringify!($arg.to_string())) + ),* + ] + ); + if let Expr::ScalarFunction { fun, args } = result { + let name = built_in_function::BuiltinScalarFunction::$ENUM; + assert_eq!(name, fun); + assert_eq!(expected.len(), args.len()); + } else { + assert!(false, "unexpected: {:?}", result); + } + }; + } + + #[test] + fn scalar_function_definitions() { + test_unary_scalar_expr!(Sqrt, sqrt); + test_unary_scalar_expr!(Sin, sin); + test_unary_scalar_expr!(Cos, cos); + test_unary_scalar_expr!(Tan, tan); + test_unary_scalar_expr!(Asin, asin); + test_unary_scalar_expr!(Acos, acos); + test_unary_scalar_expr!(Atan, atan); + test_unary_scalar_expr!(Floor, floor); + test_unary_scalar_expr!(Ceil, ceil); + test_unary_scalar_expr!(Now, now); + test_unary_scalar_expr!(Round, round); + test_unary_scalar_expr!(Trunc, trunc); + test_unary_scalar_expr!(Abs, abs); + test_unary_scalar_expr!(Signum, signum); + test_unary_scalar_expr!(Exp, exp); + test_unary_scalar_expr!(Log2, log2); + test_unary_scalar_expr!(Log10, log10); + test_unary_scalar_expr!(Ln, ln); + + test_scalar_expr!(Ascii, ascii, input); + test_scalar_expr!(BitLength, bit_length, string); + test_nary_scalar_expr!(Btrim, btrim, string); + test_nary_scalar_expr!(Btrim, btrim, string, characters); + test_scalar_expr!(CharacterLength, character_length, string); + test_scalar_expr!(CharacterLength, length, string); + test_scalar_expr!(Chr, chr, string); + test_scalar_expr!(Digest, digest, string, algorithm); + test_scalar_expr!(InitCap, initcap, string); + test_scalar_expr!(Left, left, string, count); + test_scalar_expr!(Lower, lower, string); + test_nary_scalar_expr!(Lpad, lpad, string, count); + test_nary_scalar_expr!(Lpad, lpad, string, count, characters); + test_scalar_expr!(Ltrim, ltrim, string); + test_scalar_expr!(MD5, md5, string); + test_scalar_expr!(OctetLength, octet_length, string); + test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern); + test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags); + test_nary_scalar_expr!( + RegexpReplace, + regexp_replace, + string, + pattern, + replacement + ); + test_nary_scalar_expr!( + RegexpReplace, + regexp_replace, + string, + pattern, + replacement, + flags + ); + test_scalar_expr!(Replace, replace, string, from, to); + test_scalar_expr!(Repeat, repeat, string, count); + test_scalar_expr!(Reverse, reverse, string); + test_scalar_expr!(Right, right, string, count); + test_nary_scalar_expr!(Rpad, rpad, string, count); + test_nary_scalar_expr!(Rpad, rpad, string, count, characters); + test_scalar_expr!(Rtrim, rtrim, string); + test_scalar_expr!(SHA224, sha224, string); + test_scalar_expr!(SHA256, sha256, string); + test_scalar_expr!(SHA384, sha384, string); + test_scalar_expr!(SHA512, sha512, string); + test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); + test_scalar_expr!(StartsWith, starts_with, string, characters); + test_scalar_expr!(Strpos, strpos, string, substring); + test_scalar_expr!(Substr, substr, string, position); + test_scalar_expr!(ToHex, to_hex, string); + test_scalar_expr!(Translate, translate, string, from, to); + test_scalar_expr!(Trim, trim, string); + test_scalar_expr!(Upper, upper, string); + + test_scalar_expr!(DatePart, date_part, part, date); + test_scalar_expr!(DateTrunc, date_trunc, part, date); + } +} diff --git a/datafusion-expr/src/function.rs b/datafusion-expr/src/function.rs index 2bacd6ae6227d..3689ff759a8be 100644 --- a/datafusion-expr/src/function.rs +++ b/datafusion-expr/src/function.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Function module contains typing and signature for built-in and user defined functions. + use crate::Accumulator; use crate::ColumnarValue; use arrow::datatypes::DataType; diff --git a/datafusion-expr/src/literal.rs b/datafusion-expr/src/literal.rs index 02c75af695737..08646b808644b 100644 --- a/datafusion-expr/src/literal.rs +++ b/datafusion-expr/src/literal.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Literal module contains foundational types that are used to represent literals in DataFusion. + use crate::Expr; use datafusion_common::ScalarValue; diff --git a/datafusion-expr/src/operator.rs b/datafusion-expr/src/operator.rs index a1cad76cdd976..0d3f17776c7cf 100644 --- a/datafusion-expr/src/operator.rs +++ b/datafusion-expr/src/operator.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Operator module contains foundational types that are used to represent operators in DataFusion. + use crate::expr_fn::binary_expr; use crate::Expr; use std::fmt; @@ -138,3 +140,32 @@ impl ops::Rem for Expr { binary_expr(self, Operator::Modulo, rhs) } } + +#[cfg(test)] +mod tests { + use crate::lit; + + #[test] + fn test_operators() { + assert_eq!( + format!("{:?}", lit(1u32) + lit(2u32)), + "UInt32(1) + UInt32(2)" + ); + assert_eq!( + format!("{:?}", lit(1u32) - lit(2u32)), + "UInt32(1) - UInt32(2)" + ); + assert_eq!( + format!("{:?}", lit(1u32) * lit(2u32)), + "UInt32(1) * UInt32(2)" + ); + assert_eq!( + format!("{:?}", lit(1u32) / lit(2u32)), + "UInt32(1) / UInt32(2)" + ); + assert_eq!( + format!("{:?}", lit(1u32) % lit(2u32)), + "UInt32(1) % UInt32(2)" + ); + } +} diff --git a/datafusion-expr/src/signature.rs b/datafusion-expr/src/signature.rs index 5c27f422c105d..b347448f7e777 100644 --- a/datafusion-expr/src/signature.rs +++ b/datafusion-expr/src/signature.rs @@ -15,16 +15,24 @@ // specific language governing permissions and limitations // under the License. +//! Signature module contains foundational types that are used to represent signatures, types, +//! and return types of functions in DataFusion. + use arrow::datatypes::DataType; ///A function's volatility, which defines the functions eligibility for certain optimizations #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub enum Volatility { - /// Immutable - An immutable function will always return the same output when given the same input. An example of this is [BuiltinScalarFunction::Cos]. + /// Immutable - An immutable function will always return the same output when given the same + /// input. An example of this is [BuiltinScalarFunction::Cos]. Immutable, - /// Stable - A stable function may return different values given the same input accross different queries but must return the same value for a given input within a query. An example of this is [BuiltinScalarFunction::Now]. + /// Stable - A stable function may return different values given the same input across different + /// queries but must return the same value for a given input within a query. An example of + /// this is [BuiltinScalarFunction::Now]. Stable, - /// Volatile - A volatile function may change the return value from evaluation to evaluation. Mutiple invocations of a volatile function may return different results when used in the same query. An example of this is [BuiltinScalarFunction::Random]. + /// Volatile - A volatile function may change the return value from evaluation to evaluation. + /// Multiple invocations of a volatile function may return different results when used in the + /// same query. An example of this is [BuiltinScalarFunction::Random]. Volatile, } @@ -92,7 +100,7 @@ impl Signature { volatility, } } - /// exact - Creates a signture which must match the types in exact_types in order. + /// exact - Creates a signature which must match the types in exact_types in order. pub fn exact(exact_types: Vec, volatility: Volatility) -> Self { Signature { type_signature: TypeSignature::Exact(exact_types), diff --git a/datafusion-expr/src/udaf.rs b/datafusion-expr/src/udaf.rs index a39d58b622f39..8c15da40f2238 100644 --- a/datafusion-expr/src/udaf.rs +++ b/datafusion-expr/src/udaf.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! This module contains functions and structs supporting user-defined aggregate functions. +//! Udaf module contains functions and structs supporting user-defined aggregate functions. use crate::Expr; use crate::{ diff --git a/datafusion-expr/src/udf.rs b/datafusion-expr/src/udf.rs index 79a17a4a2b4bd..4d60b2994c6b8 100644 --- a/datafusion-expr/src/udf.rs +++ b/datafusion-expr/src/udf.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! UDF support +//! Udf module contains foundational types that are used to represent UDFs in DataFusion. use crate::{Expr, ReturnTypeFunction, ScalarFunctionImplementation, Signature}; use std::fmt; diff --git a/datafusion-expr/src/window_frame.rs b/datafusion-expr/src/window_frame.rs index ba65a5088b616..a0d6ed0ebdebc 100644 --- a/datafusion-expr/src/window_frame.rs +++ b/datafusion-expr/src/window_frame.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Window frame +//! Window frame module //! //! The frame-spec determines which output rows are read by an aggregate window function. The frame-spec consists of four parts: //! - A frame type - either ROWS, RANGE or GROUPS, diff --git a/datafusion-expr/src/window_function.rs b/datafusion-expr/src/window_function.rs index 59523d6540b20..bccf653b0ca7c 100644 --- a/datafusion-expr/src/window_function.rs +++ b/datafusion-expr/src/window_function.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +//! Window function module contains foundational types that are used to represent window functions +//! in DataFusion. + use crate::aggregate_function::AggregateFunction; use datafusion_common::{DataFusionError, Result}; use std::{fmt, str::FromStr}; diff --git a/datafusion/src/logical_plan/expr.rs b/datafusion/src/logical_plan/expr.rs index de052983f7709..ba853ee4aa747 100644 --- a/datafusion/src/logical_plan/expr.rs +++ b/datafusion/src/logical_plan/expr.rs @@ -19,15 +19,17 @@ //! such as `col = 5` or `SUM(col)`. See examples on the [`Expr`] struct. pub use super::Operator; -use crate::error::{DataFusionError, Result}; +use crate::error::Result; use crate::logical_plan::ExprSchemable; use crate::logical_plan::{DFField, DFSchema}; use crate::physical_plan::udaf::AggregateUDF; -use crate::physical_plan::{aggregates, functions, udf::ScalarUDF}; +use crate::physical_plan::udf::ScalarUDF; use arrow::datatypes::DataType; +use datafusion_common::DataFusionError; pub use datafusion_common::{Column, ExprSchema}; -pub use datafusion_expr::expr_fn::col; +pub use datafusion_expr::expr_fn::*; use datafusion_expr::AccumulatorFunctionImplementation; +use datafusion_expr::BuiltinScalarFunction; pub use datafusion_expr::Expr; use datafusion_expr::StateTypeFunction; pub use datafusion_expr::{lit, lit_timestamp_nano, Literal}; @@ -64,9 +66,7 @@ impl CaseBuilder { pub fn end(&self) -> Result { self.build() } -} -impl CaseBuilder { fn build(&self) -> Result { // collect all "then" expressions let mut then_expr = self.then_expr.clone(); @@ -127,15 +127,6 @@ pub fn when(when: Expr, then: Expr) -> CaseBuilder { } } -/// return a new expression with a logical AND -pub fn and(left: Expr, right: Expr) -> Expr { - Expr::BinaryExpr { - left: Box::new(left), - op: Operator::And, - right: Box::new(right), - } -} - /// Combines an array of filter expressions into a single filter expression /// consisting of the input filter expressions joined with logical AND. /// Returns None if the filters array is empty. @@ -150,15 +141,6 @@ pub fn combine_filters(filters: &[Expr]) -> Option { Some(combined_filter) } -/// return a new expression with a logical OR -pub fn or(left: Expr, right: Expr) -> Expr { - Expr::BinaryExpr { - left: Box::new(left), - op: Operator::Or, - right: Box::new(right), - } -} - /// Convert an expression into Column expression if it's already provided as input plan. /// /// For example, it rewrites: @@ -200,230 +182,6 @@ pub fn unalias(expr: Expr) -> Expr { } } -/// Create an expression to represent the min() aggregate function -pub fn min(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Min, - distinct: false, - args: vec![expr], - } -} - -/// Create an expression to represent the max() aggregate function -pub fn max(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Max, - distinct: false, - args: vec![expr], - } -} - -/// Create an expression to represent the sum() aggregate function -pub fn sum(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Sum, - distinct: false, - args: vec![expr], - } -} - -/// Create an expression to represent the avg() aggregate function -pub fn avg(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Avg, - distinct: false, - args: vec![expr], - } -} - -/// Create an expression to represent the count() aggregate function -pub fn count(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Count, - distinct: false, - args: vec![expr], - } -} - -/// Create an expression to represent the count(distinct) aggregate function -pub fn count_distinct(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::Count, - distinct: true, - args: vec![expr], - } -} - -/// Create an in_list expression -pub fn in_list(expr: Expr, list: Vec, negated: bool) -> Expr { - Expr::InList { - expr: Box::new(expr), - list, - negated, - } -} - -/// Concatenates the text representations of all the arguments. NULL arguments are ignored. -pub fn concat(args: &[Expr]) -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::Concat, - args: args.to_vec(), - } -} - -/// Concatenates all but the first argument, with separators. -/// The first argument is used as the separator string, and should not be NULL. -/// Other NULL arguments are ignored. -pub fn concat_ws(sep: impl Into, values: &[Expr]) -> Expr { - let mut args = vec![lit(sep.into())]; - args.extend_from_slice(values); - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::ConcatWithSeparator, - args, - } -} - -/// Returns a random value in the range 0.0 <= x < 1.0 -pub fn random() -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::Random, - args: vec![], - } -} - -/// Returns the approximate number of distinct input values. -/// This function provides an approximation of count(DISTINCT x). -/// Zero is returned if all input values are null. -/// This function should produce a standard error of 0.81%, -/// which is the standard deviation of the (approximately normal) -/// error distribution over all possible sets. -/// It does not guarantee an upper bound on the error for any specific input set. -pub fn approx_distinct(expr: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::ApproxDistinct, - distinct: false, - args: vec![expr], - } -} - -/// Calculate an approximation of the specified `percentile` for `expr`. -pub fn approx_percentile_cont(expr: Expr, percentile: Expr) -> Expr { - Expr::AggregateFunction { - fun: aggregates::AggregateFunction::ApproxPercentileCont, - distinct: false, - args: vec![expr, percentile], - } -} - -// TODO(kszucs): this seems buggy, unary_scalar_expr! is used for many -// varying arity functions -/// Create an convenience function representing a unary scalar function -macro_rules! unary_scalar_expr { - ($ENUM:ident, $FUNC:ident) => { - #[doc = concat!("Unary scalar function definition for ", stringify!($FUNC) ) ] - pub fn $FUNC(e: Expr) -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::$ENUM, - args: vec![e], - } - } - }; -} - -macro_rules! scalar_expr { - ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { - #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] - pub fn $FUNC($($arg: Expr),*) -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::$ENUM, - args: vec![$($arg),*], - } - } - }; -} - -macro_rules! nary_scalar_expr { - ($ENUM:ident, $FUNC:ident) => { - #[doc = concat!("Scalar function definition for ", stringify!($FUNC) ) ] - pub fn $FUNC(args: Vec) -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::$ENUM, - args, - } - } - }; -} - -// generate methods for creating the supported unary/binary expressions - -// math functions -unary_scalar_expr!(Sqrt, sqrt); -unary_scalar_expr!(Sin, sin); -unary_scalar_expr!(Cos, cos); -unary_scalar_expr!(Tan, tan); -unary_scalar_expr!(Asin, asin); -unary_scalar_expr!(Acos, acos); -unary_scalar_expr!(Atan, atan); -unary_scalar_expr!(Floor, floor); -unary_scalar_expr!(Ceil, ceil); -unary_scalar_expr!(Now, now); -unary_scalar_expr!(Round, round); -unary_scalar_expr!(Trunc, trunc); -unary_scalar_expr!(Abs, abs); -unary_scalar_expr!(Signum, signum); -unary_scalar_expr!(Exp, exp); -unary_scalar_expr!(Log2, log2); -unary_scalar_expr!(Log10, log10); -unary_scalar_expr!(Ln, ln); - -// string functions -scalar_expr!(Ascii, ascii, string); -scalar_expr!(BitLength, bit_length, string); -nary_scalar_expr!(Btrim, btrim); -scalar_expr!(CharacterLength, character_length, string); -scalar_expr!(CharacterLength, length, string); -scalar_expr!(Chr, chr, string); -scalar_expr!(Digest, digest, string, algorithm); -scalar_expr!(InitCap, initcap, string); -scalar_expr!(Left, left, string, count); -scalar_expr!(Lower, lower, string); -nary_scalar_expr!(Lpad, lpad); -scalar_expr!(Ltrim, ltrim, string); -scalar_expr!(MD5, md5, string); -scalar_expr!(OctetLength, octet_length, string); -nary_scalar_expr!(RegexpMatch, regexp_match); -nary_scalar_expr!(RegexpReplace, regexp_replace); -scalar_expr!(Replace, replace, string, from, to); -scalar_expr!(Repeat, repeat, string, count); -scalar_expr!(Reverse, reverse, string); -scalar_expr!(Right, right, string, count); -nary_scalar_expr!(Rpad, rpad); -scalar_expr!(Rtrim, rtrim, string); -scalar_expr!(SHA224, sha224, string); -scalar_expr!(SHA256, sha256, string); -scalar_expr!(SHA384, sha384, string); -scalar_expr!(SHA512, sha512, string); -scalar_expr!(SplitPart, split_part, expr, delimiter, index); -scalar_expr!(StartsWith, starts_with, string, characters); -scalar_expr!(Strpos, strpos, string, substring); -scalar_expr!(Substr, substr, string, position); -scalar_expr!(ToHex, to_hex, string); -scalar_expr!(Translate, translate, string, from, to); -scalar_expr!(Trim, trim, string); -scalar_expr!(Upper, upper, string); - -// date functions -scalar_expr!(DatePart, date_part, part, date); -scalar_expr!(DateTrunc, date_trunc, part, date); - -/// returns an array of fixed size with each argument on it. -pub fn array(args: Vec) -> Expr { - Expr::ScalarFunction { - fun: functions::BuiltinScalarFunction::Array, - args, - } -} - /// Creates a new UDF with a specific signature and specific return type. /// This is a helper function to create a new UDF. /// The function `create_udf` returns a subset of all possible `ScalarFunction`: @@ -483,7 +241,7 @@ pub fn exprlist_to_fields<'a>( /// let expr = call_fn("sin", vec![col("x")]).unwrap().lt(lit(0.2)); /// ``` pub fn call_fn(name: impl AsRef, args: Vec) -> Result { - match name.as_ref().parse::() { + match name.as_ref().parse::() { Ok(fun) => Ok(Expr::ScalarFunction { fun, args }), Err(e) => Err(e), } @@ -511,76 +269,10 @@ mod tests { assert!(maybe_expr.is_err()); } - #[test] - fn filter_is_null_and_is_not_null() { - let col_null = col("col1"); - let col_not_null = col("col2"); - assert_eq!(format!("{:?}", col_null.is_null()), "#col1 IS NULL"); - assert_eq!( - format!("{:?}", col_not_null.is_not_null()), - "#col2 IS NOT NULL" - ); - } - - #[test] - fn test_not() { - assert_eq!(lit(1).not(), !lit(1)); - } - - macro_rules! test_unary_scalar_expr { - ($ENUM:ident, $FUNC:ident) => {{ - if let Expr::ScalarFunction { fun, args } = $FUNC(col("tableA.a")) { - let name = functions::BuiltinScalarFunction::$ENUM; - assert_eq!(name, fun); - assert_eq!(1, args.len()); - } else { - assert!(false, "unexpected"); - } - }}; - } - - macro_rules! test_scalar_expr { - ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { - let expected = vec![$(stringify!($arg)),*]; - let result = $FUNC( - $( - col(stringify!($arg.to_string())) - ),* - ); - if let Expr::ScalarFunction { fun, args } = result { - let name = functions::BuiltinScalarFunction::$ENUM; - assert_eq!(name, fun); - assert_eq!(expected.len(), args.len()); - } else { - assert!(false, "unexpected: {:?}", result); - } - }; - } - - macro_rules! test_nary_scalar_expr { - ($ENUM:ident, $FUNC:ident, $($arg:ident),*) => { - let expected = vec![$(stringify!($arg)),*]; - let result = $FUNC( - vec![ - $( - col(stringify!($arg.to_string())) - ),* - ] - ); - if let Expr::ScalarFunction { fun, args } = result { - let name = functions::BuiltinScalarFunction::$ENUM; - assert_eq!(name, fun); - assert_eq!(expected.len(), args.len()); - } else { - assert!(false, "unexpected: {:?}", result); - } - }; - } - #[test] fn digest_function_definitions() { if let Expr::ScalarFunction { fun, args } = digest(col("tableA.a"), lit("md5")) { - let name = functions::BuiltinScalarFunction::Digest; + let name = BuiltinScalarFunction::Digest; assert_eq!(name, fun); assert_eq!(2, args.len()); } else { @@ -588,98 +280,6 @@ mod tests { } } - #[test] - fn scalar_function_definitions() { - test_unary_scalar_expr!(Sqrt, sqrt); - test_unary_scalar_expr!(Sin, sin); - test_unary_scalar_expr!(Cos, cos); - test_unary_scalar_expr!(Tan, tan); - test_unary_scalar_expr!(Asin, asin); - test_unary_scalar_expr!(Acos, acos); - test_unary_scalar_expr!(Atan, atan); - test_unary_scalar_expr!(Floor, floor); - test_unary_scalar_expr!(Ceil, ceil); - test_unary_scalar_expr!(Now, now); - test_unary_scalar_expr!(Round, round); - test_unary_scalar_expr!(Trunc, trunc); - test_unary_scalar_expr!(Abs, abs); - test_unary_scalar_expr!(Signum, signum); - test_unary_scalar_expr!(Exp, exp); - test_unary_scalar_expr!(Log2, log2); - test_unary_scalar_expr!(Log10, log10); - test_unary_scalar_expr!(Ln, ln); - - test_scalar_expr!(Ascii, ascii, input); - test_scalar_expr!(BitLength, bit_length, string); - test_nary_scalar_expr!(Btrim, btrim, string); - test_nary_scalar_expr!(Btrim, btrim, string, characters); - test_scalar_expr!(CharacterLength, character_length, string); - test_scalar_expr!(CharacterLength, length, string); - test_scalar_expr!(Chr, chr, string); - test_scalar_expr!(Digest, digest, string, algorithm); - test_scalar_expr!(InitCap, initcap, string); - test_scalar_expr!(Left, left, string, count); - test_scalar_expr!(Lower, lower, string); - test_nary_scalar_expr!(Lpad, lpad, string, count); - test_nary_scalar_expr!(Lpad, lpad, string, count, characters); - test_scalar_expr!(Ltrim, ltrim, string); - test_scalar_expr!(MD5, md5, string); - test_scalar_expr!(OctetLength, octet_length, string); - test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern); - test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags); - test_nary_scalar_expr!( - RegexpReplace, - regexp_replace, - string, - pattern, - replacement - ); - test_nary_scalar_expr!( - RegexpReplace, - regexp_replace, - string, - pattern, - replacement, - flags - ); - test_scalar_expr!(Replace, replace, string, from, to); - test_scalar_expr!(Repeat, repeat, string, count); - test_scalar_expr!(Reverse, reverse, string); - test_scalar_expr!(Right, right, string, count); - test_nary_scalar_expr!(Rpad, rpad, string, count); - test_nary_scalar_expr!(Rpad, rpad, string, count, characters); - test_scalar_expr!(Rtrim, rtrim, string); - test_scalar_expr!(SHA224, sha224, string); - test_scalar_expr!(SHA256, sha256, string); - test_scalar_expr!(SHA384, sha384, string); - test_scalar_expr!(SHA512, sha512, string); - test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); - test_scalar_expr!(StartsWith, starts_with, string, characters); - test_scalar_expr!(Strpos, strpos, string, substring); - test_scalar_expr!(Substr, substr, string, position); - test_scalar_expr!(ToHex, to_hex, string); - test_scalar_expr!(Translate, translate, string, from, to); - test_scalar_expr!(Trim, trim, string); - test_scalar_expr!(Upper, upper, string); - - test_scalar_expr!(DatePart, date_part, part, date); - test_scalar_expr!(DateTrunc, date_trunc, part, date); - } - - #[test] - fn test_partial_ord() { - // Test validates that partial ord is defined for Expr using hashes, not - // intended to exhaustively test all possibilities - let exp1 = col("a") + lit(1); - let exp2 = col("a") + lit(2); - let exp3 = !(col("a") + lit(2)); - - assert!(exp1 < exp2); - assert!(exp2 > exp1); - assert!(exp2 > exp3); - assert!(exp3 < exp2); - } - #[test] fn combine_zero_filters() { let result = combine_filters(&[]); diff --git a/datafusion/src/logical_plan/operators.rs b/datafusion/src/logical_plan/operators.rs index 2f129284fa711..132f8a89f0639 100644 --- a/datafusion/src/logical_plan/operators.rs +++ b/datafusion/src/logical_plan/operators.rs @@ -16,32 +16,3 @@ // under the License. pub use datafusion_expr::Operator; - -#[cfg(test)] -mod tests { - use crate::prelude::lit; - - #[test] - fn test_operators() { - assert_eq!( - format!("{:?}", lit(1u32) + lit(2u32)), - "UInt32(1) + UInt32(2)" - ); - assert_eq!( - format!("{:?}", lit(1u32) - lit(2u32)), - "UInt32(1) - UInt32(2)" - ); - assert_eq!( - format!("{:?}", lit(1u32) * lit(2u32)), - "UInt32(1) * UInt32(2)" - ); - assert_eq!( - format!("{:?}", lit(1u32) / lit(2u32)), - "UInt32(1) / UInt32(2)" - ); - assert_eq!( - format!("{:?}", lit(1u32) % lit(2u32)), - "UInt32(1) % UInt32(2)" - ); - } -}