Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,15 @@ public Cost visitPhysicalHashJoin(
);
}

double probeShortcutFactor = 1.0;
if (ConnectContext.get() != null && ConnectContext.get().getStatementContext() != null
&& !ConnectContext.get().getStatementContext().isHasUnknownColStats()
&& physicalHashJoin.getJoinType().isLeftSemiOrAntiJoin()
&& physicalHashJoin.getOtherJoinConjuncts().isEmpty()
&& physicalHashJoin.getMarkJoinConjuncts().isEmpty()) {
// left semi/anti has short-cut opt, add probe side factor for distinguishing from the right ones
probeShortcutFactor = context.getSessionVariable().getLeftSemiOrAntiProbeFactor();
}
if (context.isBroadcastJoin()) {
// compared with shuffle join, bc join will be taken a penalty for both build and probe side;
// currently we use the following factor as the penalty factor:
Expand All @@ -417,14 +426,15 @@ public Cost visitPhysicalHashJoin(
}
}
return CostV1.of(context.getSessionVariable(),
leftRowCount + rightRowCount * buildSideFactor + outputRowCount * probeSideFactor,
leftRowCount * probeShortcutFactor + rightRowCount * probeShortcutFactor * buildSideFactor
+ outputRowCount * probeSideFactor,
rightRowCount,
0
);
}
return CostV1.of(context.getSessionVariable(), leftRowCount + rightRowCount + outputRowCount,
rightRowCount,
0
return CostV1.of(context.getSessionVariable(),
leftRowCount * probeShortcutFactor + rightRowCount * probeShortcutFactor + outputRowCount,
rightRowCount, 0
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ public class SessionVariable implements Serializable, Writable {

public static final String FORBID_UNKNOWN_COLUMN_STATS = "forbid_unknown_col_stats";
public static final String BROADCAST_RIGHT_TABLE_SCALE_FACTOR = "broadcast_right_table_scale_factor";
public static final String LEFT_SEMI_OR_ANTI_PROBE_FACTOR = "left_semi_or_anti_probe_factor";
public static final String BROADCAST_ROW_COUNT_LIMIT = "broadcast_row_count_limit";

// percentage of EXEC_MEM_LIMIT
Expand Down Expand Up @@ -1253,6 +1254,9 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
@VariableMgr.VarAttr(name = BROADCAST_RIGHT_TABLE_SCALE_FACTOR)
private double broadcastRightTableScaleFactor = 0.0;

@VariableMgr.VarAttr(name = LEFT_SEMI_OR_ANTI_PROBE_FACTOR)
private double leftSemiOrAntiProbeFactor = 0.05;

@VariableMgr.VarAttr(name = BROADCAST_ROW_COUNT_LIMIT, needForward = true)
private double broadcastRowCountLimit = 30000000;

Expand Down Expand Up @@ -2705,6 +2709,14 @@ public void setBroadcastRightTableScaleFactor(double broadcastRightTableScaleFac
this.broadcastRightTableScaleFactor = broadcastRightTableScaleFactor;
}

public double getLeftSemiOrAntiProbeFactor() {
return leftSemiOrAntiProbeFactor;
}

public void setLeftSemiOrAntiProbeFactor(double leftSemiOrAntiProbeFactor) {
this.leftSemiOrAntiProbeFactor = leftSemiOrAntiProbeFactor;
}

public double getBroadcastRowCountLimit() {
return broadcastRowCountLimit;
}
Expand Down
40 changes: 20 additions & 20 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query16.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,33 @@ PhysicalResultSink
----------hashAgg[GLOBAL]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number]
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4
----------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk]
------------------PhysicalProject
--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number]
--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk]
--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
--------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2
----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk]
------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk]
--------------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=()
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2
------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
----------------------------------filter((customer_address.ca_state = 'PA'))
------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalOlapScan[catalog_returns]
--------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------PhysicalProject
------------------------------filter((call_center.cc_county = 'Williamson County'))
--------------------------------PhysicalOlapScan[call_center]
------------------------------filter((customer_address.ca_state = 'PA'))
--------------------------------PhysicalOlapScan[customer_address]
------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------PhysicalProject
----------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01'))
------------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------PhysicalProject
----------------------filter((call_center.cc_county = 'Williamson County'))
------------------------PhysicalOlapScan[call_center]

Hint log:
Used: leading(catalog_sales { cs1 customer_address date_dim call_center } )
Expand Down
28 changes: 14 additions & 14 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query33.out
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ PhysicalResultSink
------------hashAgg[LOCAL]
--------------PhysicalUnion
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF3 i_manufact_id->[i_manufact_id]
--------------------PhysicalDistribute[DistributionSpecHash]
----------------------PhysicalProject
------------------------filter((item.i_category = 'Books'))
--------------------------PhysicalOlapScan[item] apply RFs: RF3
------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF3 i_manufact_id->[i_manufact_id]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
Expand All @@ -37,13 +33,13 @@ PhysicalResultSink
------------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF7 i_manufact_id->[i_manufact_id]
----------------------------------PhysicalOlapScan[item] apply RFs: RF3
--------------------PhysicalDistribute[DistributionSpecHash]
----------------------PhysicalProject
------------------------filter((item.i_category = 'Books'))
--------------------------PhysicalOlapScan[item] apply RFs: RF7
--------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF7 i_manufact_id->[i_manufact_id]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
Expand All @@ -67,13 +63,13 @@ PhysicalResultSink
------------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF11 i_manufact_id->[i_manufact_id]
----------------------------------PhysicalOlapScan[item] apply RFs: RF7
--------------------PhysicalDistribute[DistributionSpecHash]
----------------------PhysicalProject
------------------------filter((item.i_category = 'Books'))
--------------------------PhysicalOlapScan[item] apply RFs: RF11
--------------------------PhysicalOlapScan[item]
----------------PhysicalProject
------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((item.i_manufact_id = item.i_manufact_id)) otherCondition=() build RFs:RF11 i_manufact_id->[i_manufact_id]
--------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------hashAgg[LOCAL]
Expand All @@ -97,7 +93,11 @@ PhysicalResultSink
------------------------------------------PhysicalOlapScan[customer_address]
------------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
----------------------------------PhysicalOlapScan[item] apply RFs: RF11
--------------------PhysicalDistribute[DistributionSpecHash]
----------------------PhysicalProject
------------------------filter((item.i_category = 'Books'))
--------------------------PhysicalOlapScan[item]

Hint log:
Used: leading(store_sales date_dim customer_address item ) leading(catalog_sales date_dim customer_address item ) leading(web_sales date_dim customer_address item )
Expand Down
52 changes: 26 additions & 26 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query35.out
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,50 @@ PhysicalResultSink
--------------hashAgg[LOCAL]
----------------PhysicalProject
------------------filter((ifnull($c$1, FALSE) OR ifnull($c$2, FALSE)))
--------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=()
--------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = catalog_sales.cs_ship_customer_sk)) otherCondition=()
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF5 d_date_sk->[cs_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF5
----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk]
--------------------------hashJoin[INNER_JOIN] hashCondition=((customer_demographics.cd_demo_sk = c.c_current_cdemo_sk)) otherCondition=() build RFs:RF5 cd_demo_sk->[c_current_cdemo_sk]
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk]
--------------------------------hashJoin[INNER_JOIN] hashCondition=((c.c_current_addr_sk = ca.ca_address_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk]
----------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ss_customer_sk]
--------------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------------PhysicalProject
------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 RF2
--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999))
--------------------------------------------------PhysicalOlapScan[date_dim]
--------------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=()
------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((c.c_customer_sk = web_sales.ws_bill_customer_sk)) otherCondition=()
--------------------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((c.c_customer_sk = store_sales.ss_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ss_customer_sk]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk]
--------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0
------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3
----------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------------------------PhysicalProject
--------------------------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999))
----------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4
--------------------------------------------PhysicalOlapScan[customer] apply RFs: RF4 RF5
--------------------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------------------PhysicalProject
------------------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_sold_date_sk]
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[web_sales] apply RFs: RF1
--------------------------------------------PhysicalDistribute[DistributionSpecReplicated]
----------------------------------------------PhysicalProject
------------------------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[customer_address]
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[customer_demographics]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk]
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0
----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_qoy < 4) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]

Loading