Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/src/main/java/org/apache/druid/math/expr/ExprEval.java
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ private static class DoubleExprEval extends NumericExprEval
{
private DoubleExprEval(@Nullable Number value)
{
super(value == null ? NullHandling.defaultDoubleValue() : value);
super(value == null ? NullHandling.defaultDoubleValue() : (Double) value.doubleValue());
}

@Override
Expand Down Expand Up @@ -304,7 +304,7 @@ private static class LongExprEval extends NumericExprEval
{
private LongExprEval(@Nullable Number value)
{
super(value == null ? NullHandling.defaultLongValue() : value);
super(value == null ? NullHandling.defaultLongValue() : (Long) value.longValue());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public void testArrayConstructor()
public void testArrayLength()
{
assertExpr("array_length([1,2,3])", 3L);
assertExpr("array_length(a)", 4);
assertExpr("array_length(a)", 4L);
}

@Test
Expand All @@ -199,15 +199,15 @@ public void testArrayOffsetOf()
{
assertExpr("array_offset_of([1, 2, 3], 3)", 2L);
assertExpr("array_offset_of([1, 2, 3], 4)", NullHandling.replaceWithDefault() ? -1L : null);
assertExpr("array_offset_of(a, 'baz')", 2);
assertExpr("array_offset_of(a, 'baz')", 2L);
}

@Test
public void testArrayOrdinalOf()
{
assertExpr("array_ordinal_of([1, 2, 3], 3)", 3L);
assertExpr("array_ordinal_of([1, 2, 3], 4)", NullHandling.replaceWithDefault() ? -1L : null);
assertExpr("array_ordinal_of(a, 'baz')", 3);
assertExpr("array_ordinal_of(a, 'baz')", 3L);
}

@Test
Expand Down
40 changes: 18 additions & 22 deletions sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@
import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
import org.apache.calcite.rel.rules.FilterTableScanRule;
import org.apache.calcite.rel.rules.IntersectToDistinctRule;
import org.apache.calcite.rel.rules.JoinCommuteRule;
import org.apache.calcite.rel.rules.JoinProjectTransposeRule;
import org.apache.calcite.rel.rules.JoinPushExpressionsRule;
import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
import org.apache.calcite.rel.rules.MatchRule;
import org.apache.calcite.rel.rules.ProjectFilterTransposeRule;
import org.apache.calcite.rel.rules.ProjectMergeRule;
Expand Down Expand Up @@ -95,6 +92,8 @@ public class Rules
// 2) AggregateReduceFunctionsRule (it'll be added back for the Bindable rule set, but we don't want it for Druid
// rules since it expands AVG, STDDEV, VAR, etc, and we have aggregators specifically designed for those
// functions).
// 3) JoinCommuteRule (we don't support reordering joins yet).
// 4) JoinPushThroughJoinRule (we don't support reordering joins yet).
private static final List<RelOptRule> BASE_RULES =
ImmutableList.of(
AggregateStarTableRule.INSTANCE,
Expand All @@ -110,9 +109,6 @@ public class Rules
FilterAggregateTransposeRule.INSTANCE,
ProjectWindowTransposeRule.INSTANCE,
MatchRule.INSTANCE,
JoinCommuteRule.SWAP_OUTER,
JoinPushThroughJoinRule.RIGHT,
JoinPushThroughJoinRule.LEFT,
SortProjectTransposeRule.INSTANCE,
SortJoinTransposeRule.INSTANCE,
SortRemoveConstantKeysRule.INSTANCE,
Expand Down Expand Up @@ -167,8 +163,12 @@ public class Rules
IntersectToDistinctRule.INSTANCE
);

// Rules from RelOptUtil's registerAbstractRelationalRules, except AggregateMergeRule. (It causes
// testDoubleNestedGroupBy2 to fail).
// Rules from RelOptUtil's registerAbstractRelationalRules, minus:
//
// 1) AggregateMergeRule (it causes testDoubleNestedGroupBy2 to fail)
// 2) SemiJoinRule.PROJECT and SemiJoinRule.JOIN (we don't need to detect semi-joins, because they are handled
// fine as-is by DruidJoinRule).
// 3) JoinCommuteRule (we don't support reordering joins yet).
private static final List<RelOptRule> ABSTRACT_RELATIONAL_RULES =
ImmutableList.of(
FilterJoinRule.FILTER_ON_JOIN,
Expand All @@ -183,29 +183,24 @@ public class Rules
SortRemoveRule.INSTANCE
);

// Rules that pull projections up above a join. This lets us eliminate some subqueries.
private static final List<RelOptRule> JOIN_PROJECT_TRANSPOSE_RULES =
ImmutableList.of(
JoinProjectTransposeRule.RIGHT_PROJECT,
JoinProjectTransposeRule.LEFT_PROJECT
);

private Rules()
{
// No instantiation.
}

public static List<Program> programs(final PlannerContext plannerContext, final QueryMaker queryMaker)
{
final Program hepProgram =
// Program that pre-processes the tree before letting the full-on VolcanoPlanner loose.
final Program preProgram =
Programs.sequence(
Programs.subQuery(DefaultRelMetadataProvider.INSTANCE),
new DecorrelateAndTrimFieldsProgram(),
DecorrelateAndTrimFieldsProgram.INSTANCE,
Programs.hep(REDUCTION_RULES, true, DefaultRelMetadataProvider.INSTANCE)
);

return ImmutableList.of(
Programs.sequence(hepProgram, Programs.ofRules(druidConventionRuleSet(plannerContext, queryMaker))),
Programs.sequence(hepProgram, Programs.ofRules(bindableConventionRuleSet(plannerContext)))
Programs.sequence(preProgram, Programs.ofRules(druidConventionRuleSet(plannerContext, queryMaker))),
Programs.sequence(preProgram, Programs.ofRules(bindableConventionRuleSet(plannerContext)))
);
}

Expand Down Expand Up @@ -242,7 +237,6 @@ private static List<RelOptRule> baseRuleSet(final PlannerContext plannerContext)
rules.addAll(BASE_RULES);
rules.addAll(ABSTRACT_RULES);
rules.addAll(ABSTRACT_RELATIONAL_RULES);
rules.addAll(JOIN_PROJECT_TRANSPOSE_RULES);

if (!plannerConfig.isUseApproximateCountDistinct()) {
// For some reason, even though we support grouping sets, using AggregateExpandDistinctAggregatesRule.INSTANCE
Expand All @@ -261,6 +255,8 @@ private static List<RelOptRule> baseRuleSet(final PlannerContext plannerContext)
// accessible through Programs.standard (which we don't want, since it also adds Enumerable rules).
private static class DecorrelateAndTrimFieldsProgram implements Program
{
private static final DecorrelateAndTrimFieldsProgram INSTANCE = new DecorrelateAndTrimFieldsProgram();

@Override
public RelNode run(
RelOptPlanner planner,
Expand All @@ -270,8 +266,8 @@ public RelNode run(
List<RelOptLattice> lattices
)
{
final RelNode decorrelatedRel = RelDecorrelator.decorrelateQuery(rel);
final RelBuilder relBuilder = RelFactories.LOGICAL_BUILDER.create(decorrelatedRel.getCluster(), null);
final RelBuilder relBuilder = RelFactories.LOGICAL_BUILDER.create(rel.getCluster(), null);
final RelNode decorrelatedRel = RelDecorrelator.decorrelateQuery(rel, relBuilder);
return new RelFieldTrimmer(null, relBuilder).trim(decorrelatedRel);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,19 @@ public class CostEstimates
* Multiplier to apply to an outer query via {@link DruidOuterQueryRel}. Encourages pushing down time-saving
* operations to the lowest level of the query stack, because they'll have bigger impact there.
*/
static final double MULTIPLIER_OUTER_QUERY = 0.1;
static final double MULTIPLIER_OUTER_QUERY = .1;

/**
* Multiplier to apply to a join when the left-hand side is a subquery. Encourages avoiding subqueries. Subqueries
* inside joins must be inlined, which incurs substantial reduction in scalability, so this high number is justified.
* Cost to add to a join when either side is a subquery. Strongly encourages avoiding subqueries, since they must be
* inlined and then the join must run on the Broker.
*/
static final double MULTIPLIER_JOIN_SUBQUERY = 1000000000;
static final double COST_JOIN_SUBQUERY = 1e5;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity where did these numbers come from? Experiments I guess?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly I made them up, but then verified through experiments that they achieve the plans that we want to achieve.


/**
* Cost to perform a cross join. Strongly encourages pushing down filters into join conditions, even if it means
* we need to add a subquery (this is higher than {@link #COST_JOIN_SUBQUERY}).
*/
static final double COST_JOIN_CROSS = 1e8;

private CostEstimates()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,21 @@
import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
Expand Down Expand Up @@ -66,32 +69,10 @@ public class DruidJoinQueryRel extends DruidRel<DruidJoinQueryRel>
private RelNode left;
private RelNode right;

/**
* True if {@link #left} requires a subquery.
*
* This is useful to store in a variable because {@link #left} is sometimes not actually a {@link DruidRel} when
* {@link #computeSelfCost} is called. (It might be a {@link org.apache.calcite.plan.volcano.RelSubset}.)
*
* @see #computeLeftRequiresSubquery(DruidRel)
*/
private final boolean leftRequiresSubquery;

/**
* True if {@link #right} requires a subquery.
*
* This is useful to store in a variable because {@link #left} is sometimes not actually a {@link DruidRel} when
* {@link #computeSelfCost} is called. (It might be a {@link org.apache.calcite.plan.volcano.RelSubset}.)
*
* @see #computeLeftRequiresSubquery(DruidRel)
*/
private final boolean rightRequiresSubquery;

private DruidJoinQueryRel(
RelOptCluster cluster,
RelTraitSet traitSet,
Join joinRel,
boolean leftRequiresSubquery,
boolean rightRequiresSubquery,
PartialDruidQuery partialQuery,
QueryMaker queryMaker
)
Expand All @@ -100,8 +81,6 @@ private DruidJoinQueryRel(
this.joinRel = joinRel;
this.left = joinRel.getLeft();
this.right = joinRel.getRight();
this.leftRequiresSubquery = leftRequiresSubquery;
this.rightRequiresSubquery = rightRequiresSubquery;
this.partialQuery = partialQuery;
}

Expand All @@ -110,18 +89,15 @@ private DruidJoinQueryRel(
*/
public static DruidJoinQueryRel create(
final Join joinRel,
final DruidRel<?> left,
final DruidRel<?> right
final QueryMaker queryMaker
)
{
return new DruidJoinQueryRel(
joinRel.getCluster(),
joinRel.getTraitSet(),
joinRel,
computeLeftRequiresSubquery(left),
computeRightRequiresSubquery(right),
PartialDruidQuery.create(joinRel),
left.getQueryMaker()
queryMaker
);
}

Expand Down Expand Up @@ -149,19 +125,11 @@ public DruidJoinQueryRel withPartialQuery(final PartialDruidQuery newQueryBuilde
getCluster(),
getTraitSet().plusAll(newQueryBuilder.getRelTraits()),
joinRel,
leftRequiresSubquery,
rightRequiresSubquery,
newQueryBuilder,
getQueryMaker()
);
}

@Override
public int getQueryCount()
{
return ((DruidRel<?>) left).getQueryCount() + ((DruidRel<?>) right).getQueryCount();
}

@Override
public DruidQuery toDruidQuery(final boolean finalizeAggregations)
{
Expand All @@ -176,18 +144,14 @@ public DruidQuery toDruidQuery(final boolean finalizeAggregations)
final DataSource rightDataSource;

if (computeLeftRequiresSubquery(leftDruidRel)) {
assert leftRequiresSubquery;
leftDataSource = new QueryDataSource(leftQuery.getQuery());
} else {
assert !leftRequiresSubquery;
leftDataSource = leftQuery.getDataSource();
}

if (computeRightRequiresSubquery(rightDruidRel)) {
assert rightRequiresSubquery;
rightDataSource = new QueryDataSource(rightQuery.getQuery());
} else {
assert !rightRequiresSubquery;
rightDataSource = rightQuery.getDataSource();
}

Expand Down Expand Up @@ -250,8 +214,6 @@ public DruidJoinQueryRel asDruidConvention()
.map(input -> RelOptRule.convert(input, DruidConvention.instance()))
.collect(Collectors.toList())
),
leftRequiresSubquery,
rightRequiresSubquery,
partialQuery,
getQueryMaker()
);
Expand Down Expand Up @@ -290,8 +252,6 @@ public RelNode copy(final RelTraitSet traitSet, final List<RelNode> inputs)
getCluster(),
traitSet,
joinRel.copy(joinRel.getTraitSet(), inputs),
leftRequiresSubquery,
rightRequiresSubquery,
getPartialDruidQuery(),
getQueryMaker()
);
Expand Down Expand Up @@ -319,12 +279,9 @@ public RelWriter explainTerms(RelWriter pw)
throw new RuntimeException(e);
}

return pw.input("left", left)
.input("right", right)
.item("condition", joinRel.getCondition())
.item("joinType", joinRel.getJoinType())
.item("query", queryString)
.item("signature", druidQuery.getOutputRowSignature());
return joinRel.explainTerms(pw)
.item("query", queryString)
.item("signature", druidQuery.getOutputRowSignature());
}

@Override
Expand All @@ -336,10 +293,23 @@ protected RelDataType deriveRowType()
@Override
public RelOptCost computeSelfCost(final RelOptPlanner planner, final RelMetadataQuery mq)
{
return planner.getCostFactory()
.makeCost(partialQuery.estimateCost(), 0, 0)
.multiplyBy(leftRequiresSubquery ? CostEstimates.MULTIPLIER_JOIN_SUBQUERY : 1)
.multiplyBy(rightRequiresSubquery ? CostEstimates.MULTIPLIER_JOIN_SUBQUERY : 1);
double cost;

if (computeLeftRequiresSubquery(getSomeDruidChild(left))) {
cost = CostEstimates.COST_JOIN_SUBQUERY;
} else {
cost = partialQuery.estimateCost();
}

if (computeRightRequiresSubquery(getSomeDruidChild(right))) {
cost += CostEstimates.COST_JOIN_SUBQUERY;
}

if (joinRel.getCondition().isA(SqlKind.LITERAL) && !joinRel.getCondition().isAlwaysFalse()) {
cost += CostEstimates.COST_JOIN_CROSS;
}
Copy link
Copy Markdown
Contributor

@suneet-s suneet-s Apr 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we add a branch to set the cost to 0 if the joinCondition is a literal and is always false?

since a false join condition means nothing will match therefore you don't need to do work for either the left or the right hand side?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the native query join handling isn't smart enough to totally eliminate a join that has a false condition. It will still evaluate the left and right hand sides if they are subqueries. And it will still walk through every row on the left hand side. So I think it is fair to keep considering these costs in the cost estimator.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah makes sense


return planner.getCostFactory().makeCost(cost, 0, 0);
}

private static JoinType toDruidJoinType(JoinRelType calciteJoinType)
Expand Down Expand Up @@ -395,4 +365,14 @@ private static Pair<String, RowSignature> computeJoinRowSignature(

return Pair.of(rightPrefix, signatureBuilder.build());
}

private static DruidRel<?> getSomeDruidChild(final RelNode child)
{
if (child instanceof DruidRel) {
return (DruidRel<?>) child;
} else {
final RelSubset subset = (RelSubset) child;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do we know this will be a RelSubset? I couldn't trace that path down here

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding of Calcite's planner tells me that the children will either be single rels or will be a subset of equivalent rels. So DruidRel and RelSubset are the two cases that can happen.

I hope I understand the planner correctly — I can't point to any specific code that guarantees what I am saying is true.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when I wrote this, I was wondering whether bindable parameters would change the node here somehow. The calcite query tests are pretty comprehensive around different types of JOINs and nested queries, that I feel pretty confident to agree with your understanding. Any chance you can test this with parameterized sql.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will look at adding some tests for this in the same followup as #9648 (comment).

Copy link
Copy Markdown
Contributor

@maytasm maytasm Apr 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"subset of equivalent rels" -> Does this imply that the cost of each RelNode will be equal? I saw that computeLeftRequiresSubquery can returns different depending on which child we pick from the list of RelList. This then result in a very very different cost since a child that results in requiring subquery will have very high cost and a child that doesnt will have a much lower cost.

More specifically, I saw two RelNode in the list. One RelNode has a filter = null and the other has "filter":{"type":"selector","dimension":"v","value":"xa","extractionFn":null}

return (DruidRel<?>) Iterables.getFirst(subset.getRels(), null);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,6 @@ public DruidOuterQueryRel withPartialQuery(final PartialDruidQuery newQueryBuild
);
}

@Override
public int getQueryCount()
{
return 1 + ((DruidRel) sourceRel).getQueryCount();
}

@Override
public DruidQuery toDruidQuery(final boolean finalizeAggregations)
{
Expand Down
Loading