Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 51 additions & 48 deletions rust/datafusion/src/logical_plan/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,13 +318,13 @@ impl Expr {
///
/// This function errors when it is impossible to cast the
/// expression to the target [arrow::datatypes::DataType].
pub fn cast_to(&self, cast_to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
pub fn cast_to(self, cast_to_type: &DataType, schema: &DFSchema) -> Result<Expr> {
let this_type = self.get_type(schema)?;
if this_type == *cast_to_type {
Ok(self.clone())
Ok(self)
} else if can_cast_types(&this_type, cast_to_type) {
Ok(Expr::Cast {
expr: Box::new(self.clone()),
expr: Box::new(self),
data_type: cast_to_type.clone(),
})
} else {
Expand All @@ -335,75 +335,78 @@ impl Expr {
}
}

/// Equal
pub fn eq(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Eq, other)
/// Return `self == other`
pub fn eq(self, other: Expr) -> Expr {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change is to have all these functions take self rather than &self.

Given they almost always are used like let expr = lit("foo").eq(lit("bar")) self is not reused.

Furthermore, the clone() is actually doing a deep clone, so in the above example lit("foo") gets copied twice.

binary_expr(self, Operator::Eq, other)
}

/// Not equal
pub fn not_eq(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::NotEq, other)
/// Return `self != other`
pub fn not_eq(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotEq, other)
}

/// Greater than
pub fn gt(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Gt, other)
/// Return `self > other`
pub fn gt(self, other: Expr) -> Expr {
binary_expr(self, Operator::Gt, other)
}

/// Greater than or equal to
pub fn gt_eq(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::GtEq, other)
/// Return `self >= other`
pub fn gt_eq(self, other: Expr) -> Expr {
binary_expr(self, Operator::GtEq, other)
}

/// Less than
pub fn lt(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Lt, other)
/// Return `self < other`
pub fn lt(self, other: Expr) -> Expr {
binary_expr(self, Operator::Lt, other)
}

/// Less than or equal to
pub fn lt_eq(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::LtEq, other)
/// Return `self <= other`
pub fn lt_eq(self, other: Expr) -> Expr {
binary_expr(self, Operator::LtEq, other)
}

/// And
pub fn and(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::And, other)
/// Return `self && other`
pub fn and(self, other: Expr) -> Expr {
binary_expr(self, Operator::And, other)
}

/// Or
pub fn or(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Or, other)
/// Return `self || other`
pub fn or(self, other: Expr) -> Expr {
binary_expr(self, Operator::Or, other)
}

/// Not
pub fn not(&self) -> Expr {
Expr::Not(Box::new(self.clone()))
/// Return `!self`
#[allow(clippy::should_implement_trait)]
pub fn not(self) -> Expr {
Expr::Not(Box::new(self))
}

/// Calculate the modulus of two expressions
pub fn modulus(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Modulus, other)
/// Calculate the modulus of two expressions.
/// Return `self % other`
pub fn modulus(self, other: Expr) -> Expr {
binary_expr(self, Operator::Modulus, other)
}

/// like (string) another expression
pub fn like(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::Like, other)
/// Return `self LIKE other`
pub fn like(self, other: Expr) -> Expr {
binary_expr(self, Operator::Like, other)
}

/// not like another expression
pub fn not_like(&self, other: Expr) -> Expr {
binary_expr(self.clone(), Operator::NotLike, other)
/// Return `self NOT LIKE other`
pub fn not_like(self, other: Expr) -> Expr {
binary_expr(self, Operator::NotLike, other)
}

/// Alias
pub fn alias(&self, name: &str) -> Expr {
Expr::Alias(Box::new(self.clone()), name.to_owned())
/// Return `self AS name` alias expression
pub fn alias(self, name: &str) -> Expr {
Expr::Alias(Box::new(self), name.to_owned())
}

/// InList
pub fn in_list(&self, list: Vec<Expr>, negated: bool) -> Expr {
/// Return `self IN <list>` if `negated` is false, otherwise
/// return `self NOT IN <list>`.a
pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr {
Expr::InList {
expr: Box::new(self.clone()),
expr: Box::new(self),
list,
negated,
}
Expand All @@ -415,9 +418,9 @@ impl Expr {
/// # use datafusion::logical_plan::col;
/// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST
/// ```
pub fn sort(&self, asc: bool, nulls_first: bool) -> Expr {
pub fn sort(self, asc: bool, nulls_first: bool) -> Expr {
Expr::Sort {
expr: Box::new(self.clone()),
expr: Box::new(self),
asc,
nulls_first,
}
Expand Down Expand Up @@ -752,7 +755,7 @@ pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
}
}

/// Whether it can be represented as a literal expression
/// Trait for converting a type to a [`Literal`] literal expression.
pub trait Literal {
/// convert the value to a Literal expression
fn lit(&self) -> Expr;
Expand Down
8 changes: 4 additions & 4 deletions rust/datafusion/src/physical_plan/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ pub struct RowGroupPredicateBuilder {
}

impl RowGroupPredicateBuilder {
/// Try to create a new instance of PredicateExpressionBuilder.
/// Try to create a new instance of PredicateExpressionBuilder.
/// This will translate the filter expression into a statistics predicate expression
/// (for example (column / 2) = 4 becomes (column_min / 2) <= 4 && 4 <= (column_max / 2)),
/// then convert it to a DataFusion PhysicalExpression and cache it for later use by build_row_group_predicate.
Expand Down Expand Up @@ -340,11 +340,11 @@ impl RowGroupPredicateBuilder {
})
}

/// Generate a predicate function used to filter row group metadata.
/// Generate a predicate function used to filter row group metadata.
/// This function takes a list of all row groups as parameter,
/// so that DataFusion's physical expressions can be re-used by
/// generating a RecordBatch, containing statistics arrays,
/// on which the physical predicate expression is executed to generate a row group filter array.
/// on which the physical predicate expression is executed to generate a row group filter array.
/// The generated filter array is then used in the returned closure to filter row groups.
pub fn build_row_group_predicate(
&self,
Expand Down Expand Up @@ -611,7 +611,7 @@ fn build_predicate_expression(
let max_column_expr = expr_builder.max_column_expr()?;
min_column_expr
.lt_eq(expr_builder.scalar_expr().clone())
.and(expr_builder.scalar_expr().lt_eq(max_column_expr))
.and(expr_builder.scalar_expr().clone().lt_eq(max_column_expr))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the only change where the caller had to do an extra clone (and in this case it was actually needed as scalar_expr() is used twice. In other locations the expr was ignored and this can be consumed without issue

}
Operator::Gt => {
// column > literal => (min, max) > literal => max > literal
Expand Down