apache · morningman · Jul 1, 2021 · Jun 7, 2021 · Jun 23, 2021 · Jun 23, 2021
diff --git a/docs/en/administrator-guide/variables.md b/docs/en/administrator-guide/variables.md
@@ -388,3 +388,7 @@ Note that the comment must start with /*+ and can only follow the SELECT.
     In a sort query, when an upper level node receives the ordered data of the lower level node, it will sort the corresponding data on the exchange node to ensure that the final data is ordered. However, when a single thread merges multiple channels of data, if the amount of data is too large, it will lead to a single point of exchange node merge bottleneck.
 
     Doris optimizes this part if there are too many data nodes in the lower layer. Exchange node will start multithreading for parallel merging to speed up the sorting process. This parameter is false by default, which means that exchange node does not adopt parallel merge sort to reduce the extra CPU and memory consumption.
+
+* `extract_wide_range_expr`
+
+    Used to control whether turn on the 'Wide Common Factors' rule. The value has two: true or false. On by default.
diff --git a/docs/zh-CN/administrator-guide/variables.md b/docs/zh-CN/administrator-guide/variables.md
@@ -383,3 +383,7 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
     在一个排序的查询之中，一个上层节点接收下层节点有序数据时，会在exchange node上进行对应的排序来保证最终的数据是有序的。但是单线程进行多路数据归并时，如果数据量过大，会导致exchange node的单点的归并瓶颈。
 
     Doris在这部分进行了优化处理，如果下层的数据节点过多。exchange node会启动多线程进行并行归并来加速排序过程。该参数默认为False，即表示 exchange node 不采取并行的归并排序，来减少额外的CPU和内存消耗。
+
+* `extract_wide_range_expr`
+
+    用于控制是否开启 「宽泛公因式提取」的优化。取值有两种：true 和 false 。默认情况下开启。
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
@@ -38,6 +38,7 @@
 import org.apache.doris.rewrite.BetweenToCompoundRule;
 import org.apache.doris.rewrite.ExprRewriteRule;
 import org.apache.doris.rewrite.ExprRewriter;
+import org.apache.doris.rewrite.ExtractCommonFactorsRule;
 import org.apache.doris.rewrite.FoldConstantsRule;
 import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
 import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
@@ -259,7 +260,9 @@ public GlobalState(Catalog catalog, ConnectContext context) {
             rules.add(FoldConstantsRule.INSTANCE);
             rules.add(RewriteFromUnixTimeRule.INSTANCE);
             rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
-            exprRewriter_ = new ExprRewriter(rules);
+            List<ExprRewriteRule> onceRules = Lists.newArrayList();
+            onceRules.add(ExtractCommonFactorsRule.INSTANCE);
+            exprRewriter_ = new ExprRewriter(rules, onceRules);
             // init mv rewriter
             List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
             mvRewriteRules.add(ToBitmapToSlotRefRule.INSTANCE);

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java
@@ -33,6 +33,7 @@
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Range;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -465,7 +466,28 @@ public boolean slotIsLeft() {
         Preconditions.checkState(slotIsleft != null);
         return slotIsleft;
     }
-
+
+    public Range<LiteralExpr> convertToRange() {
+        Preconditions.checkState(getChild(0) instanceof SlotRef);
+        Preconditions.checkState(getChild(1) instanceof LiteralExpr);
+        LiteralExpr literalExpr = (LiteralExpr) getChild(1);
+        switch (op) {
+            case EQ:
+                return Range.singleton(literalExpr);
+            case GE:
+                return Range.atLeast(literalExpr);
+            case GT:
+                return Range.greaterThan(literalExpr);
+            case LE:
+                return Range.atMost(literalExpr);
+            case LT:
+                return Range.lessThan(literalExpr);
+            case NE:
+            default:
+                return null;
+        }
+    }
+
     //    public static enum Operator2 {
     //        EQ("=", FunctionOperator.EQ, FunctionOperator.FILTER_EQ),
     //        NE("!=", FunctionOperator.NE, FunctionOperator.FILTER_NE),

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -284,7 +284,7 @@ private Expr castTo(LiteralExpr value) throws AnalysisException {
             return new IntLiteral(value.getLongValue(), type);
         } else if (type.isLargeIntType()) {
             return new LargeIntLiteral(value.getStringValue());
-        } else if (type.isDecimal()) {
+        } else if (type.isDecimal() || type.isDecimalV2()) {
             return new DecimalLiteral(value.getStringValue());
         } else if (type.isFloatingPointType()) {
             return new FloatLiteral(value.getDoubleValue(), type);

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java
@@ -157,7 +157,17 @@ public int compareLiteral(LiteralExpr expr) {
         if (expr instanceof NullLiteral) {
             return 1;
         }
-        return this.value.compareTo(((DecimalLiteral) expr).value);
+        if (expr instanceof DecimalLiteral) {
+            return this.value.compareTo(((DecimalLiteral) expr).value);
+        } else {
+            try {
+                DecimalLiteral decimalLiteral = new DecimalLiteral(expr.getStringValue());
+                return this.compareLiteral(decimalLiteral);
+            } catch (AnalysisException e) {
+                throw new ClassCastException("Those two values cannot be compared: " + value
+                        + " and " + expr.toSqlImpl());
+            }
+        }
     }
 
     @Override

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionParams.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionParams.java
@@ -17,16 +17,16 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.common.io.Writable;
+
+import com.google.common.collect.Lists;
+
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.List;
 import java.util.Objects;
 
-import org.apache.doris.common.io.Writable;
-
-import com.google.common.collect.Lists;
-
 /**
  * Return value of the grammar production that parses function
  * parameters. These parameters can be for scalar or aggregate functions.
@@ -111,8 +111,10 @@ public static FunctionParams read(DataInput in) throws IOException {
     @Override
     public int hashCode() {
         int result = 31 * Boolean.hashCode(isStar) + Boolean.hashCode(isDistinct);
-        for (Expr expr : exprs) {
-            result = 31 * result + Objects.hashCode(expr);
+        if (exprs != null) {
+            for (Expr expr : exprs) {
+                result = 31 * result + Objects.hashCode(expr);
+            }
         }
         return result;
     }

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java
@@ -105,7 +105,7 @@ public int getInElementNum() {
     }
 
     @Override
-    public Expr clone() {
+    public InPredicate clone() {
         return new InPredicate(this);
     }
 
@@ -212,17 +212,37 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException {
 
         Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
         Reference<Integer> idxRef = new Reference<Integer>();
-        if (isSingleColumnPredicate(slotRefRef,
-          idxRef) && idxRef.getRef() == 0 && slotRefRef.getRef().getNumDistinctValues() > 0) {
-            selectivity =
-              (double) (getChildren().size() - 1) / (double) slotRefRef.getRef()
-                .getNumDistinctValues();
+        if (isSingleColumnPredicate(slotRefRef, idxRef)
+                && idxRef.getRef() == 0 && slotRefRef.getRef().getNumDistinctValues() > 0) {
+            selectivity = (double) (getChildren().size() - 1) / (double) slotRefRef.getRef()
+                    .getNumDistinctValues();
             selectivity = Math.max(0.0, Math.min(1.0, selectivity));
         } else {
             selectivity = Expr.DEFAULT_SELECTIVITY;
         }
     }
 
+    public InPredicate union(InPredicate inPredicate) {
+        Preconditions.checkState(inPredicate.isLiteralChildren());
+        Preconditions.checkState(this.isLiteralChildren());
+        Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
+        List<Expr> unionChildren = new ArrayList<>(getListChildren());
+        unionChildren.removeAll(inPredicate.getListChildren());
+        unionChildren.addAll(inPredicate.getListChildren());
+        InPredicate union = new InPredicate(getChild(0), unionChildren, isNotIn);
+        return union;
+    }
+
+    public InPredicate intersection(InPredicate inPredicate) {
+        Preconditions.checkState(inPredicate.isLiteralChildren());
+        Preconditions.checkState(this.isLiteralChildren());
+        Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
+        List<Expr> intersectChildren = new ArrayList<>(getListChildren());
+        intersectChildren.retainAll(inPredicate.getListChildren());
+        InPredicate intersection = new InPredicate(getChild(0), intersectChildren, isNotIn);
+        return intersection;
+    }
+
     @Override
     protected void toThrift(TExprNode msg) {
         // Can't serialize a predicate with a subquery

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LiteralExpr.java
@@ -32,7 +32,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
-public abstract class LiteralExpr extends Expr {
+public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr> {
     private static final Logger LOG = LogManager.getLogger(LiteralExpr.class);
 
     public LiteralExpr() {
@@ -128,6 +128,11 @@ public Object getRealValue() {
     // must handle MaxLiteral.
     public abstract int compareLiteral(LiteralExpr expr);
 
+    @Override
+    public int compareTo(LiteralExpr literalExpr) {
+        return compareLiteral(literalExpr);
+    }
+
     // Returns the string representation of the literal's value. Used when passing
     // literal values to the metastore rather than to Palo backends. This is similar to
     // the toSql() method, but does not perform any formatting of the string values. Neither

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -40,6 +40,7 @@
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.rewrite.ExprRewriter;
+
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
 import com.google.common.base.Strings;
@@ -56,7 +57,6 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -480,8 +480,7 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException {
                         "cannot combine SELECT DISTINCT with analytic functions");
             }
         }
-        // do this before whereClause.analyze , some expr is not analyzed, this may cause some
-        // function not work as expected such as equals;
+
         whereClauseRewrite();
         if (whereClause != null) {
             if (checkGroupingFn(whereClause)) {
@@ -579,162 +578,6 @@ private void whereClauseRewrite() {
                 whereClause = new BoolLiteral(true);
             }
         }
-        Expr deDuplicatedWhere = deduplicateOrs(whereClause);
-        if (deDuplicatedWhere != null) {
-            whereClause = deDuplicatedWhere;
-        }
-    }
-
-    /**
-     * this function only process (a and b and c) or (d and e and f) like clause,
-     * this function will extract this to [[a, b, c], [d, e, f]]
-     */
-    private List<List<Expr>> extractDuplicateOrs(CompoundPredicate expr) {
-        List<List<Expr>> orExprs = new ArrayList<>();
-        for (Expr child : expr.getChildren()) {
-            if (child instanceof CompoundPredicate) {
-                CompoundPredicate childCp = (CompoundPredicate) child;
-                if (childCp.getOp() == CompoundPredicate.Operator.OR) {
-                    orExprs.addAll(extractDuplicateOrs(childCp));
-                    continue;
-                } else if (childCp.getOp() == CompoundPredicate.Operator.AND) {
-                    orExprs.add(flatAndExpr(child));
-                    continue;
-                }
-            }
-            orExprs.add(Arrays.asList(child));
-        }
-        return orExprs;
-    }
-
-    /**
-     * This function attempts to apply the inverse OR distributive law:
-     * ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
-     * That is, locate OR clauses in which every subclause contains an
-     * identical term, and pull out the duplicated terms.
-     */
-    private Expr deduplicateOrs(Expr expr) {
-        if (expr == null) {
-            return null;
-        } else if (expr instanceof CompoundPredicate && ((CompoundPredicate) expr).getOp() == CompoundPredicate.Operator.OR) {
-            Expr rewritedExpr = processDuplicateOrs(extractDuplicateOrs((CompoundPredicate) expr));
-            if (rewritedExpr != null) {
-                return rewritedExpr;
-            }
-        } else {
-            for (int i = 0; i < expr.getChildren().size(); i++) {
-                Expr rewritedExpr = deduplicateOrs(expr.getChild(i));
-                if (rewritedExpr != null) {
-                    expr.setChild(i, rewritedExpr);
-                }
-            }
-        }
-        return expr;
-    }
-
-    /**
-     * try to flat and , a and b and c => [a, b, c]
-     */
-    private List<Expr> flatAndExpr(Expr expr) {
-        List<Expr> andExprs = new ArrayList<>();
-        if (expr instanceof CompoundPredicate && ((CompoundPredicate) expr).getOp() == CompoundPredicate.Operator.AND) {
-            andExprs.addAll(flatAndExpr(expr.getChild(0)));
-            andExprs.addAll(flatAndExpr(expr.getChild(1)));
-        } else {
-            andExprs.add(expr);
-        }
-        return andExprs;
-    }
-
-    /**
-     * the input is a list of list, the inner list is and connected exprs, the outer list is or connected
-     * for example clause (a and b and c) or (a and e and f) after extractDuplicateOrs will be [[a, b, c], [a, e, f]]
-     * this is the input of this function, first step is deduplicate [[a, b, c], [a, e, f]] => [[a], [b, c], [e, f]]
-     * then rebuild the expr to a and ((b and c) or (e and f))
-     */
-    private Expr processDuplicateOrs(List<List<Expr>> exprs) {
-        if (exprs.size() < 2) {
-            return null;
-        }
-        // 1. remove duplicated elements [[a,a], [a, b], [a,b]] => [[a], [a,b]]
-        Set<Set<Expr>> set = new LinkedHashSet<>();
-        for (List<Expr> ex : exprs) {
-            Set<Expr> es = new LinkedHashSet<>();
-            es.addAll(ex);
-            set.add(es);
-        }
-        List<List<Expr>> clearExprs = new ArrayList<>();
-        for (Set<Expr> es : set) {
-            List<Expr> el = new ArrayList<>();
-            el.addAll(es);
-            clearExprs.add(el);
-        }
-        if (clearExprs.size() == 1) {
-            return makeCompound(clearExprs.get(0), CompoundPredicate.Operator.AND);
-        }
-        // 2. find duplicate cross the clause
-        List<Expr> cloneExprs = new ArrayList<>(clearExprs.get(0));
-        for (int i = 1; i < clearExprs.size(); ++i) {
-            cloneExprs.retainAll(clearExprs.get(i));
-        }
-        List<Expr> temp = new ArrayList<>();
-        if (CollectionUtils.isNotEmpty(cloneExprs)) {
-            temp.add(makeCompound(cloneExprs, CompoundPredicate.Operator.AND));
-        }
-
-        Expr result;
-        boolean isReturnCommonFactorExpr = false;
-        for (List<Expr> exprList : clearExprs) {
-            exprList.removeAll(cloneExprs);
-            if (exprList.size() == 0) {
-                // For example, the sql is "where (a = 1) or (a = 1 and B = 2)"
-                // if "(a = 1)" is extracted as a common factor expression, then the first expression "(a = 1)" has no expression
-                // other than a common factor expression, and the second expression "(a = 1 and B = 2)" has an expression of "(B = 2)"
-                //
-                // In this case, the common factor expression ("a = 1") can be directly used to replace the whole CompoundOrPredicate.
-                // In Fact, the common factor expression is actually the parent set of expression "(a = 1)" and expression "(a = 1 and B = 2)"
-                //
-                // exprList.size() == 0 means one child of CompoundOrPredicate has no expression other than a common factor expression.
-                isReturnCommonFactorExpr = true;
-                break;
-            }
-            temp.add(makeCompound(exprList, CompoundPredicate.Operator.AND));
-        }
-        if (isReturnCommonFactorExpr) {
-            result = temp.get(0);
-        } else {
-            // rebuild CompoundPredicate if found duplicate predicate will build （predicate) and (.. or ..)  predicate in
-            // step 1: will build (.. or ..)
-            if (CollectionUtils.isNotEmpty(cloneExprs)) {
-                result = new CompoundPredicate(CompoundPredicate.Operator.AND, temp.get(0),
-                        makeCompound(temp.subList(1, temp.size()), CompoundPredicate.Operator.OR));
-                result.setPrintSqlInParens(true);
-            } else {
-                result = makeCompound(temp, CompoundPredicate.Operator.OR);
-            }
-        }
-        if (LOG.isDebugEnabled()) {
-            LOG.debug("equal ors: " + result.toSql());
-        }
-        return result;
-    }
-
-    /**
-     * Rebuild CompoundPredicate, [a, e, f] AND => a and e and f
-     */
-    private Expr makeCompound(List<Expr> exprs, CompoundPredicate.Operator op) {
-        if (CollectionUtils.isEmpty(exprs)) {
-            return null;
-        }
-        if (exprs.size() == 1) {
-            return exprs.get(0);
-        }
-        CompoundPredicate result = new CompoundPredicate(op, exprs.get(0), exprs.get(1));
-        for (int i = 2; i < exprs.size(); ++i) {
-            result = new CompoundPredicate(op, result.clone(), exprs.get(i));
-        }
-        result.setPrintSqlInParens(true);
-        return result;
     }
 
     /**