Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,20 @@
public class ExpressionOptimization extends ExpressionRewrite {
public static final List<ExpressionRewriteRule> OPTIMIZE_REWRITE_RULES = ImmutableList.of(
bottomUp(
ExtractCommonFactorRule.INSTANCE,
DistinctPredicatesRule.INSTANCE,
SimplifyComparisonPredicate.INSTANCE,
SimplifyInPredicate.INSTANCE,
SimplifyDecimalV3Comparison.INSTANCE,
OrToIn.INSTANCE,
SimplifyRange.INSTANCE,
DateFunctionRewrite.INSTANCE,
ArrayContainToArrayOverlap.INSTANCE,
CaseWhenToIf.INSTANCE,
TopnToMax.INSTANCE,
NullSafeEqualToEqual.INSTANCE,
LikeToEqualRewrite.INSTANCE
ExtractCommonFactorRule.INSTANCE,
DistinctPredicatesRule.INSTANCE,
SimplifyComparisonPredicate.INSTANCE,
SimplifyInPredicate.INSTANCE,
SimplifyDecimalV3Comparison.INSTANCE,
SimplifyRange.INSTANCE,
OrToIn.INSTANCE,
SimplifyRange.INSTANCE,
DateFunctionRewrite.INSTANCE,
ArrayContainToArrayOverlap.INSTANCE,
CaseWhenToIf.INSTANCE,
TopnToMax.INSTANCE,
NullSafeEqualToEqual.INSTANCE,
LikeToEqualRewrite.INSTANCE
)
);
private static final ExpressionRuleExecutor EXECUTOR = new ExpressionRuleExecutor(OPTIMIZE_REWRITE_RULES);
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ public interface MutableState {
String KEY_PARENT = "parent";
String KEY_RF_JUMP = "rf-jump";
String KEY_PUSH_TOPN_TO_AGG = "pushTopnToAgg";

String KEY_OR_TO_IN = "or_to_in";

<T> Optional<T> get(String key);

MutableState set(String key, Object value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import org.apache.doris.nereids.rules.expression.ExpressionRewriteTestHelper;
import org.apache.doris.nereids.rules.expression.rules.OrToIn;
import org.apache.doris.nereids.trees.expressions.And;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.InPredicate;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
Expand All @@ -39,30 +38,16 @@ void test1() {
String expr = "col1 = 1 or col1 = 2 or col1 = 3 and (col2 = 4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Set<InPredicate> inPredicates = rewritten.collect(e -> e instanceof InPredicate);
Assertions.assertEquals(1, inPredicates.size());
InPredicate inPredicate = inPredicates.iterator().next();
NamedExpression namedExpression = (NamedExpression) inPredicate.getCompareExpr();
Assertions.assertEquals("col1", namedExpression.getName());
List<Expression> options = inPredicate.getOptions();
Assertions.assertEquals(2, options.size());
Set<Integer> opVals = ImmutableSet.of(1, 2);
for (Expression op : options) {
Literal literal = (Literal) op;
Assertions.assertTrue(opVals.contains(((Byte) literal.getValue()).intValue()));
}
Set<And> ands = rewritten.collect(e -> e instanceof And);
Assertions.assertEquals(1, ands.size());
And and = ands.iterator().next();
Assertions.assertEquals("((col1 = 3) AND (col2 = 4))", and.toSql());
Assertions.assertEquals("(col1 IN (1, 2, 3) AND (col1 IN (1, 2) OR ((col1 = 3) AND (col2 = 4))))",
rewritten.toSql());
}

@Test
void test2() {
String expr = "col1 = 1 and col1 = 3 and col2 = 3 or col2 = 4";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((((col1 = 1) AND (col1 = 3)) AND (col2 = 3)) OR (col2 = 4))",
Assertions.assertEquals("(col2 = 4)",
rewritten.toSql());
}

Expand Down Expand Up @@ -104,7 +89,7 @@ void test5() {
String expr = "col = 1 or (col = 2 and (col = 3 or col = 4 or col = 5))";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((col = 1) OR ((col = 2) AND col IN (3, 4, 5)))",
Assertions.assertEquals("(col = 1)",
rewritten.toSql());
}

Expand All @@ -121,7 +106,7 @@ void test7() {
String expr = "A = 1 or A = 2 or abs(A)=5 or A in (1, 2, 3) or B = 1 or B = 2 or B in (1, 2, 3) or B+1 in (4, 5, 7)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(((A IN (1, 2, 3) OR B IN (1, 2, 3)) OR (abs(A) = 5)) OR (B + 1) IN (4, 5, 7))", rewritten.toSql());
Assertions.assertEquals("(((A IN (1, 2, 3) OR (abs(A) = 5)) OR B IN (1, 2, 3)) OR (B + 1) IN (4, 5, 7))", rewritten.toSql());
}

@Test
Expand All @@ -142,4 +127,82 @@ void testEnsureOrder() {
Assertions.assertEquals("(col1 IN (1, 2) OR col2 IN (1, 2))",
rewritten.toSql());
}

@Test
void test9() {
String expr = "col1=1 and (col2=1 or col2=2)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((col1 = 1) AND col2 IN (1, 2))",
rewritten.toSql());
}

@Test
void test10() {
// recursive rewrites
String expr = "col1=1 or (col2 = 2 and (col3=4 or col3=5))";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((col1 = 1) OR ((col2 = 2) AND col3 IN (4, 5)))",
rewritten.toSql());
}

@Test
void test11() {
// rewrite multi-inPredicates
String expr = "(a=1 and b=2 and c=3) or (a=2 and b=2 and c=4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((b = 2) AND ((a IN (1, 2) AND c IN (3, 4)) AND (((a = 1) AND (c = 3)) OR ((a = 2) AND (c = 4)))))",
rewritten.toSql());
}

@Test
void test12() {
// no rewrite
String expr = "a in (1, 2) and a in (3, 4)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("FALSE",
rewritten.toSql());
}

@Test
void test13() {
// no rewrite, because of "a like 'xyz'"
String expr = "a like 'xyz% or a=1 or a=2': no extract";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(a like 'xyz% or a=1 or a=2')",
rewritten.toSql());
}

@Test
void test14() {
// no rewrite, because of "f(a)"
String expr = "(a=1 and f(a)=2) or a=3";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(((a = 1) AND (f(a) = 2)) OR (a = 3))",
rewritten.toSql());
}

@Test
void test15() {
// no rewrite, because of "a like 'xyz'"
String expr = "x=1 or (a=1 and b=2) or (a=2 and c=3)";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("((x = 1) OR (((a = 1) AND (b = 2)) OR ((a = 2) AND (c = 3))))",
rewritten.toSql());
}

@Test
void test16() {
String expr = "a=1 or a=1 or a=1";
Expression expression = PARSER.parseExpression(expr);
Expression rewritten = OrToIn.INSTANCE.rewriteTree(expression, context);
Assertions.assertEquals("(a = 1)",
rewritten.toSql());
}
}
4 changes: 2 additions & 2 deletions regression-test/data/nereids_hint_tpcds_p0/shape/query13.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ PhysicalResultSink
--------PhysicalProject
----------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = store_sales.ss_store_sk)) otherCondition=() build RFs:RF4 s_store_sk->[ss_store_sk]
------------PhysicalProject
--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((household_demographics.hd_dep_count = 1) AND ((((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))))) OR ((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)))) build RFs:RF3 ss_cdemo_sk->[cd_demo_sk]
--------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)) otherCondition=((((((household_demographics.hd_dep_count = 1) AND cd_marital_status IN ('D', 'W')) AND cd_education_status IN ('2 yr Degree', 'Primary')) AND ((cd_marital_status IN ('D', 'W') AND cd_education_status IN ('2 yr Degree', 'Primary')) AND ((((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) OR (((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00)))))) OR ((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)))) build RFs:RF3 ss_cdemo_sk->[cd_demo_sk]
----------------PhysicalProject
------------------filter(((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary'))) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree'))))
------------------filter(((((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = 'College')) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Primary'))) OR ((customer_demographics.cd_marital_status = 'W') AND (customer_demographics.cd_education_status = '2 yr Degree'))) and cd_education_status IN ('2 yr Degree', 'College', 'Primary') and cd_marital_status IN ('D', 'M', 'W'))
--------------------PhysicalOlapScan[customer_demographics] apply RFs: RF3
----------------PhysicalProject
------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk)) otherCondition=() build RFs:RF2 hd_demo_sk->[ss_hdemo_sk]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecHash]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((ca_state IN ('CA', 'GA', 'WA') OR substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
----------------hashJoin[INNER_JOIN shuffle] hashCondition=((catalog_sales.cs_bill_customer_sk = customer.c_customer_sk)) otherCondition=(((substring(ca_zip, 1, 5) IN ('80348', '81792', '83405', '85392', '85460', '85669', '86197', '86475', '88274') OR ca_state IN ('CA', 'GA', 'WA')) OR (catalog_sales.cs_sales_price > 500.00))) build RFs:RF2 c_customer_sk->[cs_bill_customer_sk]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk]
----------------------PhysicalProject
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ PhysicalResultSink
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------hashAgg[LOCAL]
----------------------------PhysicalProject
------------------------------filter((((item.i_category = 'Men') AND (((((i_size IN ('economy', 'small') AND i_color IN ('maroon', 'smoke')) AND i_units IN ('Case', 'Ounce')) OR ((i_size IN ('economy', 'small') AND i_color IN ('firebrick', 'sienna')) AND i_units IN ('Cup', 'Each'))) OR ((i_color IN ('powder', 'sky') AND i_units IN ('Dozen', 'Lb')) AND i_size IN ('N/A', 'large'))) OR ((i_color IN ('papaya', 'peach') AND i_units IN ('Bundle', 'Carton')) AND i_size IN ('N/A', 'large')))) OR ((item.i_category = 'Women') AND (((((i_color IN ('forest', 'lime') AND i_units IN ('Pallet', 'Pound')) AND i_size IN ('economy', 'small')) OR ((i_color IN ('navy', 'slate') AND i_units IN ('Bunch', 'Gross')) AND i_size IN ('extra large', 'petite'))) OR ((i_color IN ('aquamarine', 'dark') AND i_units IN ('Tbl', 'Ton')) AND i_size IN ('economy', 'small'))) OR ((i_color IN ('frosted', 'plum') AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'petite'))))))
------------------------------filter((((i_color IN ('forest', 'lime', 'maroon', 'navy', 'powder', 'sky', 'slate', 'smoke') AND i_units IN ('Bunch', 'Case', 'Dozen', 'Gross', 'Lb', 'Ounce', 'Pallet', 'Pound')) AND ((((((item.i_category = 'Women') AND i_color IN ('forest', 'lime')) AND i_units IN ('Pallet', 'Pound')) AND i_size IN ('economy', 'small')) OR ((((item.i_category = 'Women') AND i_color IN ('navy', 'slate')) AND i_units IN ('Bunch', 'Gross')) AND i_size IN ('extra large', 'petite'))) OR (((((item.i_category = 'Men') AND i_color IN ('powder', 'sky')) AND i_units IN ('Dozen', 'Lb')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('maroon', 'smoke')) AND i_units IN ('Case', 'Ounce')) AND i_size IN ('economy', 'small'))))) OR ((i_color IN ('aquamarine', 'dark', 'firebrick', 'frosted', 'papaya', 'peach', 'plum', 'sienna') AND i_units IN ('Box', 'Bundle', 'Carton', 'Cup', 'Dram', 'Each', 'Tbl', 'Ton')) AND ((((((item.i_category = 'Women') AND i_color IN ('aquamarine', 'dark')) AND i_units IN ('Tbl', 'Ton')) AND i_size IN ('economy', 'small')) OR ((((item.i_category = 'Women') AND i_color IN ('frosted', 'plum')) AND i_units IN ('Box', 'Dram')) AND i_size IN ('extra large', 'petite'))) OR (((((item.i_category = 'Men') AND i_color IN ('papaya', 'peach')) AND i_units IN ('Bundle', 'Carton')) AND i_size IN ('N/A', 'large')) OR ((((item.i_category = 'Men') AND i_color IN ('firebrick', 'sienna')) AND i_units IN ('Cup', 'Each')) AND i_size IN ('economy', 'small')))))) and i_category IN ('Men', 'Women') and i_size IN ('N/A', 'economy', 'extra large', 'large', 'petite', 'small'))
--------------------------------PhysicalOlapScan[item]

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
------------------------------------PhysicalProject
--------------------------------------filter((((date_dim.d_year = 2000) OR ((date_dim.d_year = 1999) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2001) AND (date_dim.d_moy = 1))))
--------------------------------------filter((((date_dim.d_year = 2000) OR ((date_dim.d_year = 1999) AND (date_dim.d_moy = 12))) OR ((date_dim.d_year = 2001) AND (date_dim.d_moy = 1))) and d_year IN (1999, 2000, 2001) and d_year IN (1999, 2000, 2001))
----------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PhysicalResultSink
--------------------------filter((store_sales.ss_net_profit <= 25000.00) and (store_sales.ss_net_profit >= 0.00) and (store_sales.ss_sales_price <= 200.00) and (store_sales.ss_sales_price >= 50.00))
----------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------PhysicalProject
--------------------------filter(((((customer_demographics.cd_marital_status = 'S') AND (customer_demographics.cd_education_status = 'Secondary')) OR ((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = '2 yr Degree'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Advanced Degree'))))
--------------------------filter(((((customer_demographics.cd_marital_status = 'S') AND (customer_demographics.cd_education_status = 'Secondary')) OR ((customer_demographics.cd_marital_status = 'M') AND (customer_demographics.cd_education_status = '2 yr Degree'))) OR ((customer_demographics.cd_marital_status = 'D') AND (customer_demographics.cd_education_status = 'Advanced Degree'))) and cd_education_status IN ('2 yr Degree', 'Advanced Degree', 'Secondary') and cd_marital_status IN ('D', 'M', 'S'))
----------------------------PhysicalOlapScan[customer_demographics]
--------------------PhysicalProject
----------------------filter((customer_address.ca_country = 'United States') and ca_state IN ('CO', 'GA', 'KS', 'MD', 'MN', 'NC', 'ND', 'NY', 'SD'))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ PhysicalResultSink
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
--------------------------------------PhysicalProject
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))))
----------------------------------------filter((((i_category IN ('Books', 'Children', 'Electronics') AND i_class IN ('personal', 'portable', 'reference', 'self-help')) AND i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')) OR ((i_category IN ('Men', 'Music', 'Women') AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')) AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help'))
------------------------------------------PhysicalOlapScan[item]
----------------------------------PhysicalProject
------------------------------------filter(d_month_seq IN (1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197))
Expand Down
Loading