Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/en/administrator-guide/variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,7 @@ Note that the comment must start with /*+ and can only follow the SELECT.
In a sort query, when an upper level node receives the ordered data of the lower level node, it will sort the corresponding data on the exchange node to ensure that the final data is ordered. However, when a single thread merges multiple channels of data, if the amount of data is too large, it will lead to a single point of exchange node merge bottleneck.

Doris optimizes this part if there are too many data nodes in the lower layer. Exchange node will start multithreading for parallel merging to speed up the sorting process. This parameter is false by default, which means that exchange node does not adopt parallel merge sort to reduce the extra CPU and memory consumption.

* `extract_wide_range_expr`

Used to control whether turn on the 'Wide Common Factors' rule. The value has two: true or false. On by default.
4 changes: 4 additions & 0 deletions docs/zh-CN/administrator-guide/variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,7 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
在一个排序的查询之中,一个上层节点接收下层节点有序数据时,会在exchange node上进行对应的排序来保证最终的数据是有序的。但是单线程进行多路数据归并时,如果数据量过大,会导致exchange node的单点的归并瓶颈。

Doris在这部分进行了优化处理,如果下层的数据节点过多。exchange node会启动多线程进行并行归并来加速排序过程。该参数默认为False,即表示 exchange node 不采取并行的归并排序,来减少额外的CPU和内存消耗。

* `extract_wide_range_expr`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extract_wide_range_expr is the name of who this implementation, not the name of the function, you may change the name

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe extract_common_factors?


用于控制是否开启 「宽泛公因式提取」的优化。取值有两种:true 和 false 。默认情况下开启。
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.doris.rewrite.BetweenToCompoundRule;
import org.apache.doris.rewrite.ExprRewriteRule;
import org.apache.doris.rewrite.ExprRewriter;
import org.apache.doris.rewrite.ExtractCommonFactorsRule;
import org.apache.doris.rewrite.FoldConstantsRule;
import org.apache.doris.rewrite.RewriteFromUnixTimeRule;
import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule;
Expand Down Expand Up @@ -259,7 +260,9 @@ public GlobalState(Catalog catalog, ConnectContext context) {
rules.add(FoldConstantsRule.INSTANCE);
rules.add(RewriteFromUnixTimeRule.INSTANCE);
rules.add(SimplifyInvalidDateBinaryPredicatesDateRule.INSTANCE);
exprRewriter_ = new ExprRewriter(rules);
List<ExprRewriteRule> onceRules = Lists.newArrayList();
onceRules.add(ExtractCommonFactorsRule.INSTANCE);
exprRewriter_ = new ExprRewriter(rules, onceRules);
// init mv rewriter
List<ExprRewriteRule> mvRewriteRules = Lists.newArrayList();
mvRewriteRules.add(ToBitmapToSlotRefRule.INSTANCE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Range;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -465,7 +466,28 @@ public boolean slotIsLeft() {
Preconditions.checkState(slotIsleft != null);
return slotIsleft;
}


public Range<LiteralExpr> convertToRange() {
Preconditions.checkState(getChild(0) instanceof SlotRef);
Preconditions.checkState(getChild(1) instanceof LiteralExpr);
LiteralExpr literalExpr = (LiteralExpr) getChild(1);
switch (op) {
case EQ:
return Range.singleton(literalExpr);
case GE:
return Range.atLeast(literalExpr);
case GT:
return Range.greaterThan(literalExpr);
case LE:
return Range.atMost(literalExpr);
case LT:
return Range.lessThan(literalExpr);
case NE:
default:
return null;
}
}

// public static enum Operator2 {
// EQ("=", FunctionOperator.EQ, FunctionOperator.FILTER_EQ),
// NE("!=", FunctionOperator.NE, FunctionOperator.FILTER_NE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ private Expr castTo(LiteralExpr value) throws AnalysisException {
return new IntLiteral(value.getLongValue(), type);
} else if (type.isLargeIntType()) {
return new LargeIntLiteral(value.getStringValue());
} else if (type.isDecimal()) {
} else if (type.isDecimal() || type.isDecimalV2()) {
return new DecimalLiteral(value.getStringValue());
} else if (type.isFloatingPointType()) {
return new FloatLiteral(value.getDoubleValue(), type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,17 @@ public int compareLiteral(LiteralExpr expr) {
if (expr instanceof NullLiteral) {
return 1;
}
return this.value.compareTo(((DecimalLiteral) expr).value);
if (expr instanceof DecimalLiteral) {
return this.value.compareTo(((DecimalLiteral) expr).value);
} else {
try {
DecimalLiteral decimalLiteral = new DecimalLiteral(expr.getStringValue());
return this.compareLiteral(decimalLiteral);
} catch (AnalysisException e) {
throw new ClassCastException("Those two values cannot be compared: " + value
+ " and " + expr.toSqlImpl());
}
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@

package org.apache.doris.analysis;

import org.apache.doris.common.io.Writable;

import com.google.common.collect.Lists;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.Objects;

import org.apache.doris.common.io.Writable;

import com.google.common.collect.Lists;

/**
* Return value of the grammar production that parses function
* parameters. These parameters can be for scalar or aggregate functions.
Expand Down Expand Up @@ -111,8 +111,10 @@ public static FunctionParams read(DataInput in) throws IOException {
@Override
public int hashCode() {
int result = 31 * Boolean.hashCode(isStar) + Boolean.hashCode(isDistinct);
for (Expr expr : exprs) {
result = 31 * result + Objects.hashCode(expr);
if (exprs != null) {
for (Expr expr : exprs) {
result = 31 * result + Objects.hashCode(expr);
}
}
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public int getInElementNum() {
}

@Override
public Expr clone() {
public InPredicate clone() {
return new InPredicate(this);
}

Expand Down Expand Up @@ -212,17 +212,37 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException {

Reference<SlotRef> slotRefRef = new Reference<SlotRef>();
Reference<Integer> idxRef = new Reference<Integer>();
if (isSingleColumnPredicate(slotRefRef,
idxRef) && idxRef.getRef() == 0 && slotRefRef.getRef().getNumDistinctValues() > 0) {
selectivity =
(double) (getChildren().size() - 1) / (double) slotRefRef.getRef()
.getNumDistinctValues();
if (isSingleColumnPredicate(slotRefRef, idxRef)
&& idxRef.getRef() == 0 && slotRefRef.getRef().getNumDistinctValues() > 0) {
selectivity = (double) (getChildren().size() - 1) / (double) slotRefRef.getRef()
.getNumDistinctValues();
selectivity = Math.max(0.0, Math.min(1.0, selectivity));
} else {
selectivity = Expr.DEFAULT_SELECTIVITY;
}
}

public InPredicate union(InPredicate inPredicate) {
Preconditions.checkState(inPredicate.isLiteralChildren());
Preconditions.checkState(this.isLiteralChildren());
Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
List<Expr> unionChildren = new ArrayList<>(getListChildren());
unionChildren.removeAll(inPredicate.getListChildren());
unionChildren.addAll(inPredicate.getListChildren());
InPredicate union = new InPredicate(getChild(0), unionChildren, isNotIn);
return union;
}

public InPredicate intersection(InPredicate inPredicate) {
Preconditions.checkState(inPredicate.isLiteralChildren());
Preconditions.checkState(this.isLiteralChildren());
Preconditions.checkState(getChild(0).equals(inPredicate.getChild(0)));
List<Expr> intersectChildren = new ArrayList<>(getListChildren());
intersectChildren.retainAll(inPredicate.getListChildren());
InPredicate intersection = new InPredicate(getChild(0), intersectChildren, isNotIn);
return intersection;
}

@Override
protected void toThrift(TExprNode msg) {
// Can't serialize a predicate with a subquery
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;

public abstract class LiteralExpr extends Expr {
public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr> {
private static final Logger LOG = LogManager.getLogger(LiteralExpr.class);

public LiteralExpr() {
Expand Down Expand Up @@ -128,6 +128,11 @@ public Object getRealValue() {
// must handle MaxLiteral.
public abstract int compareLiteral(LiteralExpr expr);

@Override
public int compareTo(LiteralExpr literalExpr) {
return compareLiteral(literalExpr);
}

// Returns the string representation of the literal's value. Used when passing
// literal values to the metastore rather than to Palo backends. This is similar to
// the toSql() method, but does not perform any formatting of the string values. Neither
Expand Down
161 changes: 2 additions & 159 deletions fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.rewrite.ExprRewriter;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.base.Strings;
Expand All @@ -56,7 +57,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -480,8 +480,7 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException {
"cannot combine SELECT DISTINCT with analytic functions");
}
}
// do this before whereClause.analyze , some expr is not analyzed, this may cause some
// function not work as expected such as equals;

whereClauseRewrite();
if (whereClause != null) {
if (checkGroupingFn(whereClause)) {
Expand Down Expand Up @@ -579,162 +578,6 @@ private void whereClauseRewrite() {
whereClause = new BoolLiteral(true);
}
}
Expr deDuplicatedWhere = deduplicateOrs(whereClause);
if (deDuplicatedWhere != null) {
whereClause = deDuplicatedWhere;
}
}

/**
* this function only process (a and b and c) or (d and e and f) like clause,
* this function will extract this to [[a, b, c], [d, e, f]]
*/
private List<List<Expr>> extractDuplicateOrs(CompoundPredicate expr) {
List<List<Expr>> orExprs = new ArrayList<>();
for (Expr child : expr.getChildren()) {
if (child instanceof CompoundPredicate) {
CompoundPredicate childCp = (CompoundPredicate) child;
if (childCp.getOp() == CompoundPredicate.Operator.OR) {
orExprs.addAll(extractDuplicateOrs(childCp));
continue;
} else if (childCp.getOp() == CompoundPredicate.Operator.AND) {
orExprs.add(flatAndExpr(child));
continue;
}
}
orExprs.add(Arrays.asList(child));
}
return orExprs;
}

/**
* This function attempts to apply the inverse OR distributive law:
* ((A AND B) OR (A AND C)) => (A AND (B OR C))
* That is, locate OR clauses in which every subclause contains an
* identical term, and pull out the duplicated terms.
*/
private Expr deduplicateOrs(Expr expr) {
if (expr == null) {
return null;
} else if (expr instanceof CompoundPredicate && ((CompoundPredicate) expr).getOp() == CompoundPredicate.Operator.OR) {
Expr rewritedExpr = processDuplicateOrs(extractDuplicateOrs((CompoundPredicate) expr));
if (rewritedExpr != null) {
return rewritedExpr;
}
} else {
for (int i = 0; i < expr.getChildren().size(); i++) {
Expr rewritedExpr = deduplicateOrs(expr.getChild(i));
if (rewritedExpr != null) {
expr.setChild(i, rewritedExpr);
}
}
}
return expr;
}

/**
* try to flat and , a and b and c => [a, b, c]
*/
private List<Expr> flatAndExpr(Expr expr) {
List<Expr> andExprs = new ArrayList<>();
if (expr instanceof CompoundPredicate && ((CompoundPredicate) expr).getOp() == CompoundPredicate.Operator.AND) {
andExprs.addAll(flatAndExpr(expr.getChild(0)));
andExprs.addAll(flatAndExpr(expr.getChild(1)));
} else {
andExprs.add(expr);
}
return andExprs;
}

/**
* the input is a list of list, the inner list is and connected exprs, the outer list is or connected
* for example clause (a and b and c) or (a and e and f) after extractDuplicateOrs will be [[a, b, c], [a, e, f]]
* this is the input of this function, first step is deduplicate [[a, b, c], [a, e, f]] => [[a], [b, c], [e, f]]
* then rebuild the expr to a and ((b and c) or (e and f))
*/
private Expr processDuplicateOrs(List<List<Expr>> exprs) {
if (exprs.size() < 2) {
return null;
}
// 1. remove duplicated elements [[a,a], [a, b], [a,b]] => [[a], [a,b]]
Set<Set<Expr>> set = new LinkedHashSet<>();
for (List<Expr> ex : exprs) {
Set<Expr> es = new LinkedHashSet<>();
es.addAll(ex);
set.add(es);
}
List<List<Expr>> clearExprs = new ArrayList<>();
for (Set<Expr> es : set) {
List<Expr> el = new ArrayList<>();
el.addAll(es);
clearExprs.add(el);
}
if (clearExprs.size() == 1) {
return makeCompound(clearExprs.get(0), CompoundPredicate.Operator.AND);
}
// 2. find duplicate cross the clause
List<Expr> cloneExprs = new ArrayList<>(clearExprs.get(0));
for (int i = 1; i < clearExprs.size(); ++i) {
cloneExprs.retainAll(clearExprs.get(i));
}
List<Expr> temp = new ArrayList<>();
if (CollectionUtils.isNotEmpty(cloneExprs)) {
temp.add(makeCompound(cloneExprs, CompoundPredicate.Operator.AND));
}

Expr result;
boolean isReturnCommonFactorExpr = false;
for (List<Expr> exprList : clearExprs) {
exprList.removeAll(cloneExprs);
if (exprList.size() == 0) {
// For example, the sql is "where (a = 1) or (a = 1 and B = 2)"
// if "(a = 1)" is extracted as a common factor expression, then the first expression "(a = 1)" has no expression
// other than a common factor expression, and the second expression "(a = 1 and B = 2)" has an expression of "(B = 2)"
//
// In this case, the common factor expression ("a = 1") can be directly used to replace the whole CompoundOrPredicate.
// In Fact, the common factor expression is actually the parent set of expression "(a = 1)" and expression "(a = 1 and B = 2)"
//
// exprList.size() == 0 means one child of CompoundOrPredicate has no expression other than a common factor expression.
isReturnCommonFactorExpr = true;
break;
}
temp.add(makeCompound(exprList, CompoundPredicate.Operator.AND));
}
if (isReturnCommonFactorExpr) {
result = temp.get(0);
} else {
// rebuild CompoundPredicate if found duplicate predicate will build (predicate) and (.. or ..) predicate in
// step 1: will build (.. or ..)
if (CollectionUtils.isNotEmpty(cloneExprs)) {
result = new CompoundPredicate(CompoundPredicate.Operator.AND, temp.get(0),
makeCompound(temp.subList(1, temp.size()), CompoundPredicate.Operator.OR));
result.setPrintSqlInParens(true);
} else {
result = makeCompound(temp, CompoundPredicate.Operator.OR);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("equal ors: " + result.toSql());
}
return result;
}

/**
* Rebuild CompoundPredicate, [a, e, f] AND => a and e and f
*/
private Expr makeCompound(List<Expr> exprs, CompoundPredicate.Operator op) {
if (CollectionUtils.isEmpty(exprs)) {
return null;
}
if (exprs.size() == 1) {
return exprs.get(0);
}
CompoundPredicate result = new CompoundPredicate(op, exprs.get(0), exprs.get(1));
for (int i = 2; i < exprs.size(); ++i) {
result = new CompoundPredicate(op, result.clone(), exprs.get(i));
}
result.setPrintSqlInParens(true);
return result;
}

/**
Expand Down
Loading