Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions datafusion/core/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,21 @@ impl DataFrame {
Ok(DataFrame::new(self.session_state, plan))
}

/// Join this DataFrame with another DataFrame using the specified columns as join keys.
/// Join this `DataFrame` with another `DataFrame` using explicitly specified
/// columns and an optional filter expression.
///
/// Filter expression expected to contain non-equality predicates that can not be pushed
/// down to any of join inputs.
/// In case of outer join, filter applied to only matched rows.
/// See [`join_on`](Self::join_on) for a more concise way to specify the
/// join condition. Since DataFusion will automatically identify and
/// optimize equality predicates there is no performance difference between
/// this function and `join_on`
///
/// `left_cols` and `right_cols` are used to form "equijoin" predicates (see
/// example below), which are then combined with the optional `filter`
/// expression.
///
/// Note that in case of outer join, the `filter` is applied to only matched rows.
///
/// # Example
/// ```
/// # use datafusion::prelude::*;
/// # use datafusion::error::Result;
Expand All @@ -600,11 +609,14 @@ impl DataFrame {
/// col("a").alias("a2"),
/// col("b").alias("b2"),
/// col("c").alias("c2")])?;
/// // Perform the equivalent of `left INNER JOIN right ON (a = a2 AND b = b2)`
/// // finding all pairs of rows from `left` and `right` where `a = a2` and `b = b2`.
/// let join = left.join(right, JoinType::Inner, &["a", "b"], &["a2", "b2"], None)?;
/// let batches = join.collect().await?;
/// # Ok(())
/// # }
/// ```
///
pub fn join(
self,
right: DataFrame,
Expand All @@ -624,10 +636,13 @@ impl DataFrame {
Ok(DataFrame::new(self.session_state, plan))
}

/// Join this DataFrame with another DataFrame using the specified expressions.
/// Join this `DataFrame` with another `DataFrame` using the specified
/// expressions.
///
/// Simply a thin wrapper over [`join`](Self::join) where the join keys are not provided,
/// and the provided expressions are AND'ed together to form the filter expression.
/// Note that DataFusion automatically optimizes joins, including
/// identifying and optimizing equality predicates.
///
/// # Example
///
/// ```
/// # use datafusion::prelude::*;
Expand All @@ -646,6 +661,10 @@ impl DataFrame {
/// col("b").alias("b2"),
/// col("c").alias("c2"),
/// ])?;
///
/// // Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)`
/// // finding all pairs of rows from `left` and `right` where
/// // where `a != a2` and `b != b2`.
/// let join_on = left.join_on(
/// right,
/// JoinType::Inner,
Expand Down
44 changes: 33 additions & 11 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,11 +611,19 @@ impl LogicalPlanBuilder {
})))
}

/// Apply a join with on constraint.
/// Apply a join to `right` using explicitly specified columns and an
/// optional filter expression.
///
/// Filter expression expected to contain non-equality predicates that can not be pushed
/// down to any of join inputs.
/// In case of outer join, filter applied to only matched rows.
/// See [`join_on`](Self::join_on) for a more concise way to specify the
/// join condition. Since DataFusion will automatically identify and
/// optimize equality predicates there is no performance difference between
/// this function and `join_on`
///
/// `left_cols` and `right_cols` are used to form "equijoin" predicates (see
/// example below), which are then combined with the optional `filter`
/// expression.
///
/// Note that in case of outer join, the `filter` is applied to only matched rows.
pub fn join(
self,
right: LogicalPlan,
Expand All @@ -626,11 +634,12 @@ impl LogicalPlanBuilder {
self.join_detailed(right, join_type, join_keys, filter, false)
}

/// Apply a join with on constraint.
/// Apply a join with using the specified expressions.
///
/// Note that DataFusion automatically optimizes joins, including
/// identifying and optimizing equality predicates.
///
/// The `ExtractEquijoinPredicate` optimizer pass has the ability to split join predicates into
/// equijoin predicates and (other) filter predicates. Therefore, if you prefer not to manually split the
/// join predicates, it is recommended to use the `join_on` method instead of the `join` method.
/// # Example
///
/// ```
/// # use datafusion_expr::{Expr, col, LogicalPlanBuilder,
Expand All @@ -650,8 +659,15 @@ impl LogicalPlanBuilder {
///
/// let right_plan = LogicalPlanBuilder::scan("right", right_table, None)?.build()?;
///
/// let exprs = vec![col("left.a").eq(col("right.a")), col("left.b").not_eq(col("right.b"))];
/// // Form the expression `(left.a != right.a)` AND `(left.b != right.b)`
/// let exprs = vec![
/// col("left.a").eq(col("right.a")),
/// col("left.b").not_eq(col("right.b"))
/// ];
///
/// // Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)`
/// // finding all pairs of rows from `left` and `right` where
/// // where `a = a2` and `b != b2`.
/// let plan = LogicalPlanBuilder::scan("left", left_table, None)?
/// .join_on(right_plan, JoinType::Inner, exprs)?
/// .build()?;
Expand Down Expand Up @@ -688,8 +704,14 @@ impl LogicalPlanBuilder {
)
}

/// Apply a join with on constraint and specified null equality
/// If null_equals_null is true then null == null, else null != null
/// Apply a join with on constraint and specified null equality.
///
/// The behavior is the same as [`join`](Self::join) except that it allows
/// specifying the null equality behavior.
///
/// If `null_equals_null=true`, rows where both join keys are `null` will be
/// emitted. Otherwise rows where either or both join keys are `null` will be
/// omitted.
pub fn join_detailed(
self,
right: LogicalPlan,
Expand Down