Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion be/src/vec/exec/join/vnested_loop_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ Status VNestedLoopJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
}
RETURN_IF_ERROR(
vectorized::VExpr::create_expr_trees(_pool, filter_src_exprs, &_filter_src_expr_ctxs));
DCHECK(!filter_src_exprs.empty() == _is_output_left_side_only);
return Status::OK();
}

Expand Down Expand Up @@ -189,6 +188,13 @@ Status VNestedLoopJoinNode::sink(doris::RuntimeState* state, vectorized::Block*
if (eos) {
COUNTER_UPDATE(_build_rows_counter, _build_rows);
RuntimeFilterBuild(this)(state);

// optimize `in bitmap`, see https://github.com/apache/doris/issues/14338
if (_is_output_left_side_only &&
((_join_op == TJoinOp::type::LEFT_SEMI_JOIN && _build_blocks.empty()) ||
(_join_op == TJoinOp::type::LEFT_ANTI_JOIN && !_build_blocks.empty()))) {
_left_side_eos = true;
}
}

return Status::OK();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ private static boolean mergeExpr(SelectStmt stmt, Expr expr,
break;
}

boolean isInBitmap = false;
if (!hasEqJoinPred && !inlineView.isCorrelated()) {
// Join with InPredicate is actually an equal join, so we choose HashJoin.
if (expr instanceof ExistsPredicate) {
Expand All @@ -811,6 +812,7 @@ private static boolean mergeExpr(SelectStmt stmt, Expr expr,
&& (((FunctionCallExpr) joinConjunct).getFnName().getFunction()
.equalsIgnoreCase(BITMAP_CONTAINS))) {
joinOp = ((InPredicate) expr).isNotIn() ? JoinOperator.LEFT_ANTI_JOIN : JoinOperator.LEFT_SEMI_JOIN;
isInBitmap = true;
} else {
joinOp = JoinOperator.CROSS_JOIN;
// We can equal the aggregate subquery using a cross join. All conjuncts
Expand All @@ -829,6 +831,7 @@ private static boolean mergeExpr(SelectStmt stmt, Expr expr,

inlineView.setMark(markTuple);
inlineView.setJoinOp(joinOp);
inlineView.setInBitmap(isInBitmap);
if (joinOp != JoinOperator.CROSS_JOIN) {
inlineView.setOnClause(onClausePredicate);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public class TableRef implements ParseNode, Writable {
// Indicates whether this table ref is given an explicit alias,
protected boolean hasExplicitAlias;
protected JoinOperator joinOp;
protected boolean isInBitmap;
// for mark join
protected boolean isMark;
// we must record mark tuple name for re-analyze
Expand Down Expand Up @@ -280,6 +281,14 @@ public void setJoinOp(JoinOperator op) {
this.joinOp = op;
}

public boolean isInBitmap() {
return isInBitmap;
}

public void setInBitmap(boolean inBitmap) {
isInBitmap = inBitmap;
}

public boolean isMark() {
return isMark;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@
public class NestedLoopJoinNode extends JoinNodeBase {
private static final Logger LOG = LogManager.getLogger(NestedLoopJoinNode.class);

// If isOutputLeftSideOnly=true, the data from the left table is returned directly without a join operation.
// This is used to optimize `in bitmap`, because bitmap will make a lot of copies when doing Nested Loop Join,
// which is very resource intensive.
// `in bitmap` has two cases:
// 1. select * from tbl1 where k1 in (select bitmap_col from tbl2);
// This will generate a bitmap runtime filter to filter the left table, because the bitmap is an exact filter
// and does not need to be filtered again in the NestedLoopJoinNode, so it returns the left table data directly.
// 2. select * from tbl1 where 1 in (select bitmap_col from tbl2);
// This sql will be rewritten to
// "select * from tbl1 left semi join tbl2 where bitmap_contains(tbl2.bitmap_col, 1);"
// return all data in the left table to parent node when there is data on the build side, and return empty when
// there is no data on the build side.
private boolean isOutputLeftSideOnly = false;

private List<Expr> runtimeFilterExpr = Lists.newArrayList();
Expand Down Expand Up @@ -246,8 +258,8 @@ public String getNodeExplainString(String detailPrefix, TExplainLevel detailLeve
if (!runtimeFilters.isEmpty()) {
output.append(detailPrefix).append("runtime filters: ");
output.append(getRuntimeFilterExplainString(true));
output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n");
}
output.append(detailPrefix).append("is output left side only: ").append(isOutputLeftSideOnly).append("\n");
output.append(detailPrefix).append(String.format("cardinality=%,d", cardinality)).append("\n");
// todo unify in plan node
if (vOutputTupleDesc != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2115,6 +2115,7 @@ private PlanNode createJoinNodeBase(Analyzer analyzer, PlanNode outer, PlanNode
result.setJoinConjuncts(joinConjuncts);
result.addConjuncts(analyzer.getMarkConjuncts(innerRef));
result.init(analyzer);
result.setOutputLeftSideOnly(innerRef.isInBitmap() && joinConjuncts.isEmpty());
return result;
}

Expand Down
40 changes: 40 additions & 0 deletions regression-test/data/query_p0/join/test_bitmap_filter.out
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,43 @@
1 1989
10 1991

-- !sql15 --
\N \N
1 1989
2 1986
3 1989
4 1991
5 1985
6 32767
7 -32767
8 255
9 1991
10 1991
11 1989
12 32767
13 -32767
14 255
15 1992

-- !sql16 --

-- !sql17 --

-- !sql18 --
\N \N
1 1989
2 1986
3 1989
4 1991
5 1985
6 32767
7 -32767
8 255
9 1991
10 1991
11 1989
12 32767
13 -32767
14 255
15 1992

Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ suite("test_bitmap_filter", "query_p0") {

qt_sql14 "select k1, k2 from ${tbl1} where k1 in (select bitmap_from_string('1,10')) order by 1, 2"

qt_sql15 "select k1, k2 from ${tbl1} t where 11 in (select k2 from ${tbl2}) order by 1, 2;"

qt_sql16 "select k1, k2 from ${tbl1} t where 100 in (select k2 from ${tbl2}) order by 1, 2;"

qt_sql17 "select k1, k2 from ${tbl1} t where 10 not in (select k2 from ${tbl2}) order by 1, 2;"

qt_sql18 "select k1, k2 from ${tbl1} t where 100 not in (select k2 from ${tbl2}) order by 1, 2;"

test {
sql "select k1, k2 from ${tbl1} b1 where k1 in (select k2 from ${tbl2} b2 where b1.k2 = b2.k1) order by k1;"
exception "In bitmap does not support correlated subquery"
Expand Down