From 33e2ec27ff758e3f77e1aca82821982dffa55dd0 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Wed, 16 Oct 2024 10:37:30 +0530 Subject: [PATCH 1/4] join hints draft --- .../simpleJoinHint.iq | 255 ++++++++++++++++++ .../sql/calcite/planner/CalcitePlanner.java | 20 +- .../quidem/DruidQuidemCommandHandler.java | 154 ++++++++++- .../simpleJoinHint.iq | 255 ++++++++++++++++++ 4 files changed, 682 insertions(+), 2 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq create mode 100644 sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq diff --git a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq new file mode 100644 index 000000000000..e70c4c18366c --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq @@ -0,0 +1,255 @@ +!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier +!set outputformat mysql + +select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName +from wikipedia w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; ++----------+---------------+ +| cityName | countryName | ++----------+---------------+ +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | ++----------+---------------+ +(169 rows) + +!ok + +{ + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "table", + "name" : "wikipedia" + }, + "right" : { + "type" : "query", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "table", + "name" : "wikipedia" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "j0.countryName", "v0" ], + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false +} +!nativePlan + +LogicalProject(cityName=[$2], countryName=[$25]) + LogicalFilter(condition=[=($2, 'New York')]) + LogicalJoin(condition=[=($2, $22)], joinType=[inner]) + LogicalTableScan(table=[[druid, wikipedia]]) + LogicalTableScan(table=[[druid, wikipedia]]) + +!convertedPlan + + +LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] + +!hints diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java index 8eb9541961c6..94b078768978 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java @@ -38,6 +38,14 @@ import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Snapshot; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintPredicate; +import org.apache.calcite.rel.hint.HintPredicates; +import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -60,12 +68,16 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; import javax.annotation.Nullable; import java.io.Reader; import java.util.List; import java.util.Objects; import java.util.Properties; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Calcite planner. Clone of Calcite's @@ -294,8 +306,14 @@ public RelRoot rel(SqlNode sql) Objects.requireNonNull(planner, "planner"), rexBuilder ); + HintStrategyTable hintStrategyTable = HintStrategyTable.builder() + .hintStrategy("use_hash_join", HintPredicates.JOIN) + .hintStrategy("use_merge_join", HintPredicates.JOIN) + .hintStrategy("no_hash_join", HintPredicates.JOIN) + .build(); final SqlToRelConverter.Config config = - sqlToRelConverterConfig.withTrimUnusedFields(false); + sqlToRelConverterConfig.withTrimUnusedFields(false) + .withHintStrategyTable(hintStrategyTable); final SqlToRelConverter sqlToRelConverter = new DruidSqlToRelConverter(this, validator, createCatalogReader(), cluster, convertletTable, config diff --git a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java index a183ba9ecad1..b5bf4d7690dc 100644 --- a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java +++ b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java @@ -27,6 +27,21 @@ import net.hydromatic.quidem.Quidem.SqlCommand; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttleImpl; +import org.apache.calcite.rel.core.Snapshot; +import org.apache.calcite.rel.core.TableFunctionScan; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.core.Window; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalCorrelate; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; +import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.LogicalMinus; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalSort; +import org.apache.calcite.rel.logical.LogicalUnion; +import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.SqlExplainFormat; import org.apache.calcite.sql.SqlExplainLevel; @@ -59,6 +74,9 @@ public Command parseCommand(List lines, List content, String lin if (line.startsWith("druidPlan")) { return new DruidPlanCommand(lines, content); } + if (line.startsWith("hints")) { + return new HintPlanCommand(lines, content); + } if (line.startsWith("nativePlan")) { return new NativePlanCommand(lines, content); } @@ -202,10 +220,16 @@ protected final void executeExplain(Context context) throws IOException if (node instanceof DruidRel) { node = ((DruidRel) node).unwrapLogicalPlan(); } - String str = RelOptUtil.dumpPlan("", node, SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES); + String str = getString(node); context.echo(ImmutableList.of(str)); } } + + protected String getString(RelNode node) + { + String str = RelOptUtil.dumpPlan("", node, SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES); + return str; + } } /** @@ -245,6 +269,134 @@ static class DruidPlanCommand extends AbstractRelPlanCommand } } + static class HintPlanCommand extends AbstractRelPlanCommand + { + HintPlanCommand(List lines, List content) + { + super(lines, content, DruidHook.DRUID_PLAN); + } + + @Override + protected String getString(RelNode node) + { + final List hintsCollect = new ArrayList<>(); + final HintCollector collector = new HintCollector(hintsCollect); + node.accept(collector); + StringBuilder builder = new StringBuilder(); + for (String hintLine : hintsCollect) { + builder.append(hintLine).append("\n"); + } + + return builder.toString(); + } + + private static class HintCollector extends RelShuttleImpl + { + private final List hintsCollect; + + HintCollector(List hintsCollect) { + this.hintsCollect = hintsCollect; + } + + @Override public RelNode visit(TableScan scan) { + if (!scan.getHints().isEmpty()) { + this.hintsCollect.add("TableScan:" + scan.getHints()); + } + return super.visit(scan); + } + + @Override public RelNode visit(LogicalJoin join) { + if (!join.getHints().isEmpty()) { + this.hintsCollect.add("LogicalJoin:" + join.getHints()); + } + return super.visit(join); + } + + @Override public RelNode visit(LogicalProject project) { + if (!project.getHints().isEmpty()) { + this.hintsCollect.add("Project:" + project.getHints()); + } + return super.visit(project); + } + + @Override public RelNode visit(LogicalAggregate aggregate) { + if (!aggregate.getHints().isEmpty()) { + this.hintsCollect.add("Aggregate:" + aggregate.getHints()); + } + return super.visit(aggregate); + } + + @Override public RelNode visit(LogicalCorrelate correlate) { + if (!correlate.getHints().isEmpty()) { + this.hintsCollect.add("Correlate:" + correlate.getHints()); + } + return super.visit(correlate); + } + + @Override public RelNode visit(LogicalFilter filter) { + if (!filter.getHints().isEmpty()) { + this.hintsCollect.add("Filter:" + filter.getHints()); + } + return super.visit(filter); + } + + @Override public RelNode visit(LogicalUnion union) { + if (!union.getHints().isEmpty()) { + this.hintsCollect.add("Union:" + union.getHints()); + } + return super.visit(union); + } + + @Override public RelNode visit(LogicalIntersect intersect) { + if (!intersect.getHints().isEmpty()) { + this.hintsCollect.add("Intersect:" + intersect.getHints()); + } + return super.visit(intersect); + } + + @Override public RelNode visit(LogicalMinus minus) { + if (!minus.getHints().isEmpty()) { + this.hintsCollect.add("Minus:" + minus.getHints()); + } + return super.visit(minus); + } + + @Override public RelNode visit(LogicalSort sort) { + if (!sort.getHints().isEmpty()) { + this.hintsCollect.add("Sort:" + sort.getHints()); + } + return super.visit(sort); + } + + @Override public RelNode visit(LogicalValues values) { + if (!values.getHints().isEmpty()) { + this.hintsCollect.add("Values:" + values.getHints()); + } + return super.visit(values); + } + + @Override public RelNode visit(RelNode other) { + if (other instanceof Window) { + Window window = (Window) other; + if (!window.getHints().isEmpty()) { + this.hintsCollect.add("Window:" + window.getHints()); + } + } else if (other instanceof Snapshot) { + Snapshot snapshot = (Snapshot) other; + if (!snapshot.getHints().isEmpty()) { + this.hintsCollect.add("Snapshot:" + snapshot.getHints()); + } + } else if (other instanceof TableFunctionScan) { + TableFunctionScan scan = (TableFunctionScan) other; + if (!scan.getHints().isEmpty()) { + this.hintsCollect.add("TableFunctionScan:" + scan.getHints()); + } + } + return super.visit(other); + } + } + } + static class ConvertedPlanCommand extends AbstractRelPlanCommand { ConvertedPlanCommand(List lines, List content) diff --git a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq new file mode 100644 index 000000000000..5a4a0f15fab7 --- /dev/null +++ b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq @@ -0,0 +1,255 @@ +!use druidtest:/// +!set outputformat mysql + +select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName +from wikipedia w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; ++----------+---------------+ +| cityName | countryName | ++----------+---------------+ +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | +| New York | United States | ++----------+---------------+ +(169 rows) + +!ok + +{ + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "table", + "name" : "wikipedia" + }, + "right" : { + "type" : "query", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "table", + "name" : "wikipedia" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "j0.countryName", "v0" ], + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false +} +!nativePlan + +LogicalProject(cityName=[$2], countryName=[$25]) + LogicalFilter(condition=[=($2, 'New York')]) + LogicalJoin(condition=[=($2, $22)], joinType=[inner]) + LogicalTableScan(table=[[druid, wikipedia]]) + LogicalTableScan(table=[[druid, wikipedia]]) + +!convertedPlan + + +LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] + +!hints From f3f779e2afd1bc849b50504dd287eb4095da2d07 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Fri, 18 Oct 2024 16:20:32 +0530 Subject: [PATCH 2/4] join algo --- .../error/BroadcastTablesTooLargeFault.java | 2 +- .../BroadcastJoinSegmentMapFnProcessor.java | 2 +- .../druid/msq/querykit/DataSourcePlan.java | 13 +- .../apache/druid/msq/exec/MSQSelectTest.java | 2 +- .../msq/indexing/error/MSQFaultSerdeTest.java | 2 +- ...roadcastJoinSegmentMapFnProcessorTest.java | 2 +- .../test/CalciteSelectJoinQueryMSQTest.java | 2 +- .../simpleJoinHint.iq | 379 +++++++----------- .../apache/druid/query}/JoinAlgorithm.java | 2 +- .../apache/druid/query/JoinDataSource.java | 66 ++- .../calcite/planner/CalciteRulesManager.java | 1 + .../druid/sql/calcite/planner/JoinHint.java | 81 ++++ .../sql/calcite/planner/PlannerContext.java | 1 + .../druid/sql/calcite/planner/QueryUtils.java | 11 + .../sql/calcite/planner/QueryValidations.java | 1 + .../sql/calcite/rel/DruidJoinQueryRel.java | 4 +- .../druid/sql/calcite/rule/DruidJoinRule.java | 7 +- .../druid/sql/calcite/run/EngineFeature.java | 3 +- .../sql/calcite/run/NativeSqlEngine.java | 2 +- .../sql/calcite/rule/DruidJoinRuleTest.java | 2 +- .../simpleJoinHint.iq | 255 ------------ 21 files changed, 335 insertions(+), 505 deletions(-) rename {sql/src/main/java/org/apache/druid/sql/calcite/planner => processing/src/main/java/org/apache/druid/query}/JoinAlgorithm.java (97%) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java delete mode 100644 sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/BroadcastTablesTooLargeFault.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/BroadcastTablesTooLargeFault.java index 6912fefa6b65..31337cbcfe69 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/BroadcastTablesTooLargeFault.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/BroadcastTablesTooLargeFault.java @@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import javax.annotation.Nullable; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java index cbb79c45702b..cf3813ed7939 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java @@ -42,7 +42,7 @@ import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.SegmentReference; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.io.IOException; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java index 21848813e5d3..6a19768692f1 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java @@ -64,7 +64,7 @@ import org.apache.druid.segment.join.JoinConditionAnalysis; import org.apache.druid.sql.calcite.external.ExternalDataSource; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.joda.time.Interval; @@ -212,10 +212,13 @@ public static DataSourcePlan forDataSource( broadcast ); } else if (dataSource instanceof JoinDataSource) { - final JoinAlgorithm preferredJoinAlgorithm = PlannerContext.getJoinAlgorithm(queryContext); + JoinDataSource joinDataSource = (JoinDataSource) dataSource; + final JoinAlgorithm preferredJoinAlgorithm = joinDataSource.getPreferredJoinAlgorithm() != null + ? joinDataSource.getPreferredJoinAlgorithm() + : PlannerContext.getJoinAlgorithm(queryContext); final JoinAlgorithm deducedJoinAlgorithm = deduceJoinAlgorithm( preferredJoinAlgorithm, - ((JoinDataSource) dataSource) + joinDataSource ); switch (deducedJoinAlgorithm) { @@ -223,7 +226,7 @@ public static DataSourcePlan forDataSource( return forBroadcastHashJoin( queryKitSpec, queryContext, - (JoinDataSource) dataSource, + joinDataSource, querySegmentSpec, filter, filterFields, @@ -234,7 +237,7 @@ public static DataSourcePlan forDataSource( case SORT_MERGE: return forSortMergeJoin( queryKitSpec, - (JoinDataSource) dataSource, + joinDataSource, querySegmentSpec, minStageNumber, broadcast diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index 91a1983bfd68..c47b0be821cf 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -81,7 +81,7 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.ColumnMapping; import org.apache.druid.sql.calcite.planner.ColumnMappings; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.util.CalciteTests; import org.hamcrest.CoreMatchers; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index 55c6c48c1afe..92c905b87acb 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -28,7 +28,7 @@ import org.apache.druid.msq.guice.MSQIndexingModule; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.junit.Assert; import org.junit.Before; import org.junit.Test; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java index 4270fe8bdccc..ab47b9fabe8b 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java @@ -50,7 +50,7 @@ import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.join.JoinConditionAnalysis; import org.apache.druid.segment.join.JoinType; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.testing.InitializedNullHandlingTest; import org.easymock.EasyMock; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java index 30724dcd4ce0..00378824f3fb 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java @@ -25,7 +25,7 @@ import org.apache.druid.sql.calcite.CalciteJoinQueryTest; import org.apache.druid.sql.calcite.QueryTestBuilder; import org.apache.druid.sql.calcite.SqlTestFrameworkConfig; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.util.Map; diff --git a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq index e70c4c18366c..3064a6ae6be7 100644 --- a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq +++ b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq @@ -5,198 +5,28 @@ select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName from wikipedia w1 JOIN wikipedia w2 ON w1.cityName = w2.cityName where w1.cityName='New York'; -+----------+---------------+ -| cityName | countryName | -+----------+---------------+ -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -+----------+---------------+ -(169 rows) -!ok -{ - "queryType" : "scan", - "dataSource" : { - "type" : "join", - "left" : { +LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "_0", + "input" : [ { "type" : "table", - "name" : "wikipedia" - }, - "right" : { - "type" : "query", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", "query" : { "queryType" : "scan", "dataSource" : { - "type" : "table", - "name" : "wikipedia" + "type" : "inputNumber", + "inputNumber" : 0 }, "intervals" : { "type" : "intervals", @@ -204,6 +34,12 @@ where w1.cityName='New York'; }, "resultFormat" : "compactedList", "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, "columnTypes" : [ "STRING", "STRING" ], "granularity" : { "type" : "all" @@ -211,45 +47,136 @@ where w1.cityName='New York'; "legacy" : false } }, - "rightPrefix" : "j0.", - "condition" : "(\"cityName\" == \"j0.cityName\")", - "joinType" : "INNER" - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "virtualColumns" : [ { - "type" : "expression", - "name" : "v0", - "expression" : "'New York'", - "outputType" : "STRING" - } ], - "resultFormat" : "compactedList", - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 }, - "columns" : [ "j0.countryName", "v0" ], - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + }, { + "type" : "stage", + "stage" : 0 + } ], + "broadcast" : [ 1 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER" + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "j0.countryName", "v0" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + }, { + "name" : "v0", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 }, - "legacy" : false -} -!nativePlan - -LogicalProject(cityName=[$2], countryName=[$25]) - LogicalFilter(condition=[=($2, 'New York')]) - LogicalJoin(condition=[=($2, $22)], joinType=[inner]) - LogicalTableScan(table=[[druid, wikipedia]]) - LogicalTableScan(table=[[druid, wikipedia]]) - -!convertedPlan - - -LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] - -!hints + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinAlgorithm.java b/processing/src/main/java/org/apache/druid/query/JoinAlgorithm.java similarity index 97% rename from sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinAlgorithm.java rename to processing/src/main/java/org/apache/druid/query/JoinAlgorithm.java index 2e53795c4ffa..94cd209e5780 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinAlgorithm.java +++ b/processing/src/main/java/org/apache/druid/query/JoinAlgorithm.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.druid.sql.calcite.planner; +package org.apache.druid.query; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; diff --git a/processing/src/main/java/org/apache/druid/query/JoinDataSource.java b/processing/src/main/java/org/apache/druid/query/JoinDataSource.java index 220f18a94855..0981d644a9ef 100644 --- a/processing/src/main/java/org/apache/druid/query/JoinDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/JoinDataSource.java @@ -96,6 +96,8 @@ public class JoinDataSource implements DataSource private final DimFilter leftFilter; @Nullable private final JoinableFactoryWrapper joinableFactoryWrapper; + @Nullable + private final JoinAlgorithm preferredJoinAlgorithm; private static final Logger log = new Logger(JoinDataSource.class); private final DataSourceAnalysis analysis; @@ -106,7 +108,8 @@ private JoinDataSource( JoinConditionAnalysis conditionAnalysis, JoinType joinType, @Nullable DimFilter leftFilter, - @Nullable JoinableFactoryWrapper joinableFactoryWrapper + @Nullable JoinableFactoryWrapper joinableFactoryWrapper, + @Nullable JoinAlgorithm preferredJoinAlgorithm ) { this.left = Preconditions.checkNotNull(left, "left"); @@ -116,6 +119,7 @@ private JoinDataSource( this.joinType = Preconditions.checkNotNull(joinType, "joinType"); this.leftFilter = validateLeftFilter(left, leftFilter); this.joinableFactoryWrapper = joinableFactoryWrapper; + this.preferredJoinAlgorithm = preferredJoinAlgorithm; this.analysis = this.getAnalysisForDataSource(); } @@ -123,7 +127,7 @@ private JoinDataSource( /** * Create a join dataSource from a string condition. */ - @JsonCreator + public static JoinDataSource create( @JsonProperty("left") DataSource left, @JsonProperty("right") DataSource right, @@ -134,6 +138,22 @@ public static JoinDataSource create( @JacksonInject ExprMacroTable macroTable, @Nullable @JacksonInject JoinableFactoryWrapper joinableFactoryWrapper ) + { + return create(left, right, rightPrefix, condition, joinType, leftFilter, macroTable, joinableFactoryWrapper, null); + } + + @JsonCreator + public static JoinDataSource create( + @JsonProperty("left") DataSource left, + @JsonProperty("right") DataSource right, + @JsonProperty("rightPrefix") String rightPrefix, + @JsonProperty("condition") String condition, + @JsonProperty("joinType") JoinType joinType, + @Nullable @JsonProperty("leftFilter") DimFilter leftFilter, + @JacksonInject ExprMacroTable macroTable, + @Nullable @JacksonInject JoinableFactoryWrapper joinableFactoryWrapper, + @Nullable @JsonProperty("preferredJoinAlgorithm") JoinAlgorithm preferredJoinAlgorithm + ) { return new JoinDataSource( left, @@ -146,7 +166,8 @@ public static JoinDataSource create( ), joinType, leftFilter, - joinableFactoryWrapper + joinableFactoryWrapper, + preferredJoinAlgorithm ); } @@ -170,10 +191,33 @@ public static JoinDataSource create( conditionAnalysis, joinType, leftFilter, - joinableFactoryWrapper + joinableFactoryWrapper, + null ); } + public static JoinDataSource create( + final DataSource left, + final DataSource right, + final String rightPrefix, + final JoinConditionAnalysis conditionAnalysis, + final JoinType joinType, + final DimFilter leftFilter, + @Nullable final JoinableFactoryWrapper joinableFactoryWrapper, + @Nullable final JoinAlgorithm preferredJoinAlgorithm + ) + { + return new JoinDataSource( + left, + right, + rightPrefix, + conditionAnalysis, + joinType, + leftFilter, + joinableFactoryWrapper, + preferredJoinAlgorithm + ); + } @Override public Set getTableNames() @@ -253,7 +297,8 @@ public DataSource withChildren(List children) conditionAnalysis, joinType, leftFilter, - joinableFactoryWrapper + joinableFactoryWrapper, + preferredJoinAlgorithm ); } @@ -363,6 +408,13 @@ public DataSourceAnalysis getAnalysis() return analysis; } + @Nullable + @JsonProperty + public JoinAlgorithm getPreferredJoinAlgorithm() + { + return preferredJoinAlgorithm; + } + @Override public boolean equals(Object o) { @@ -378,7 +430,8 @@ public boolean equals(Object o) Objects.equals(rightPrefix, that.rightPrefix) && Objects.equals(conditionAnalysis, that.conditionAnalysis) && Objects.equals(leftFilter, that.leftFilter) && - joinType == that.joinType; + joinType == that.joinType && + preferredJoinAlgorithm == that.preferredJoinAlgorithm; } @Override @@ -397,6 +450,7 @@ public String toString() ", condition=" + conditionAnalysis + ", joinType=" + joinType + ", leftFilter=" + leftFilter + + ", preferredJoinAlgorithm=" + preferredJoinAlgorithm + '}'; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java index d6dd1310e6c5..ea4efd16d91a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java @@ -50,6 +50,7 @@ import org.apache.calcite.tools.Programs; import org.apache.calcite.tools.RelBuilder; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.external.ExternalTableScanRule; import org.apache.druid.sql.calcite.rule.AggregatePullUpLookupRule; import org.apache.druid.sql.calcite.rule.CaseToCoalesceRule; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java new file mode 100644 index 000000000000..587366bbacba --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.planner; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.JoinAlgorithm; + +import java.util.Arrays; + +public enum JoinHint +{ + USE_MERGE_JOIN("use_merge_join") { + @Override + public JoinAlgorithm getJoinAlgorithm() + { + return JoinAlgorithm.SORT_MERGE; + } + }, + USE_HASH_JOIN("use_hash_join") { + @Override + public JoinAlgorithm getJoinAlgorithm() + { + return JoinAlgorithm.BROADCAST; + } + }; + + private final String id; + + JoinHint(String id) + { + this.id = id; + } + + @JsonCreator + public static JoinHint fromString(final String id) + { + for (final JoinHint value : values()) { + if (value.id.equals(id)) { + return value; + } + } + + throw new IAE("No such join hint [%s]. Supported values are: %s", id, Arrays.toString(values())); + } + + @JsonValue + public String getId() + { + return id; + } + + /** + * Whether this join algorithm requires subqueries for all inputs. + */ + public abstract JoinAlgorithm getJoinAlgorithm(); + + @Override + public String toString() + { + return id; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java index cd2e9401954e..c41971d3a671 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java @@ -36,6 +36,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.QueryContext; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.filter.InDimFilter; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java index e0a5cce94187..82f694cdebf1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryUtils.java @@ -19,6 +19,8 @@ package org.apache.druid.sql.calcite.planner; +import org.apache.calcite.rel.core.Join; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.rel.DruidQuery; import java.util.ArrayList; @@ -55,4 +57,13 @@ public static List buildColumnMappings( return columnMappings; } + + public static JoinAlgorithm getJoinAlgorithm(Join join, PlannerContext plannerContext) + { + if (join.getHints().isEmpty()) { + return plannerContext.getJoinAlgorithm(); + } + + return JoinHint.fromString(join.getHints().get(0).hintName).getJoinAlgorithm(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java index c516496af31e..27c5ca7e9e82 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java @@ -25,6 +25,7 @@ import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.tools.ValidationException; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.run.EngineFeature; /** diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java index 5615da3344fe..5f46dfd97eb0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java @@ -55,6 +55,7 @@ import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.planner.QueryUtils; import org.apache.druid.sql.calcite.planner.querygen.SourceDescProducer.SourceDesc; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -225,7 +226,8 @@ public static SourceDesc buildJoinSourceDesc(final SourceDesc leftDesc, final So ), toDruidJoinType(joinRel.getJoinType()), getDimFilter(plannerContext, leftDesc.rowSignature, leftFilter), - plannerContext.getJoinableFactoryWrapper() + plannerContext.getJoinableFactoryWrapper(), + QueryUtils.getJoinAlgorithm(joinRel, plannerContext) ); SourceDesc sourceDesc = new SourceDesc(joinDataSource, signature, virtualColumnRegistry); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java index 8d6230afcc8f..8795b5054657 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java @@ -50,7 +50,10 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.LookupDataSource; +import org.apache.druid.query.JoinAlgorithm; +import org.apache.druid.sql.calcite.planner.JoinHint; import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.planner.QueryUtils; import org.apache.druid.sql.calcite.rel.DruidJoinQueryRel; import org.apache.druid.sql.calcite.rel.DruidQueryRel; import org.apache.druid.sql.calcite.rel.DruidRel; @@ -130,7 +133,7 @@ public void onMatch(RelOptRuleCall call) plannerContext.setPlanningError(conditionAnalysis.errorStr); final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (left instanceof DruidQueryRel); - if (!plannerContext.getJoinAlgorithm().requiresSubquery() + if (!QueryUtils.getJoinAlgorithm(join, plannerContext).requiresSubquery() && left.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT && (isLeftDirectAccessPossible || left.getPartialDruidQuery().getWhereFilter() == null)) { // Swap the left-side projection above the join, so the left side is a simple scan or mapping. This helps us @@ -153,7 +156,7 @@ public void onMatch(RelOptRuleCall call) leftFilter = null; } - if (!plannerContext.getJoinAlgorithm().requiresSubquery() + if (!QueryUtils.getJoinAlgorithm(join, plannerContext).requiresSubquery() && right.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT && right.getPartialDruidQuery().getWhereFilter() == null && !right.getPartialDruidQuery().getSelectProject().isMapping() diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java index 97e81c2ef5c3..9bf4fac8e1e7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java @@ -19,6 +19,7 @@ package org.apache.druid.sql.calcite.run; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.external.ExternalDataSource; /** @@ -103,7 +104,7 @@ public enum EngineFeature UNNEST, /** - * Planner is permitted to use {@link org.apache.druid.sql.calcite.planner.JoinAlgorithm#BROADCAST} with RIGHT + * Planner is permitted to use {@link JoinAlgorithm#BROADCAST} with RIGHT * and FULL join. Not guaranteed to produce correct results in either the native or MSQ engines, but we allow * it in native for two reasons: legacy (the docs caution against it, but it's always been allowed), and the fact * that it actually *does* generate correct results in native when the join is processed on the Broker. It is much diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index 4f3d86b1b420..a832509f5cad 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -32,7 +32,7 @@ import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlReplace; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.destination.IngestDestination; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java index fa76ba3a3c90..56b9ae3091d9 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java @@ -33,7 +33,7 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.query.QueryContext; import org.apache.druid.sql.calcite.planner.DruidTypeSystem; -import org.apache.druid.sql.calcite.planner.JoinAlgorithm; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.junit.Assert; import org.junit.Before; diff --git a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq deleted file mode 100644 index 5a4a0f15fab7..000000000000 --- a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/simpleJoinHint.iq +++ /dev/null @@ -1,255 +0,0 @@ -!use druidtest:/// -!set outputformat mysql - -select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName -from wikipedia w1 -JOIN wikipedia w2 ON w1.cityName = w2.cityName -where w1.cityName='New York'; -+----------+---------------+ -| cityName | countryName | -+----------+---------------+ -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -| New York | United States | -+----------+---------------+ -(169 rows) - -!ok - -{ - "queryType" : "scan", - "dataSource" : { - "type" : "join", - "left" : { - "type" : "table", - "name" : "wikipedia" - }, - "right" : { - "type" : "query", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "table", - "name" : "wikipedia" - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "resultFormat" : "compactedList", - "columns" : [ "cityName", "countryName" ], - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "rightPrefix" : "j0.", - "condition" : "(\"cityName\" == \"j0.cityName\")", - "joinType" : "INNER" - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "virtualColumns" : [ { - "type" : "expression", - "name" : "v0", - "expression" : "'New York'", - "outputType" : "STRING" - } ], - "resultFormat" : "compactedList", - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" - }, - "columns" : [ "j0.countryName", "v0" ], - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false -} -!nativePlan - -LogicalProject(cityName=[$2], countryName=[$25]) - LogicalFilter(condition=[=($2, 'New York')]) - LogicalJoin(condition=[=($2, $22)], joinType=[inner]) - LogicalTableScan(table=[[druid, wikipedia]]) - LogicalTableScan(table=[[druid, wikipedia]]) - -!convertedPlan - - -LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] - -!hints From 0bc1e929e6dc03235d796ff5f4b3133a301478b8 Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Thu, 24 Oct 2024 19:04:21 +0530 Subject: [PATCH 3/4] propagate join hints --- .../BroadcastJoinSegmentMapFnProcessor.java | 2 +- .../druid/msq/querykit/DataSourcePlan.java | 2 +- .../apache/druid/msq/exec/MSQSelectTest.java | 2 +- .../msq/indexing/error/MSQFaultSerdeTest.java | 2 +- ...roadcastJoinSegmentMapFnProcessorTest.java | 2 +- .../test/CalciteSelectJoinQueryMSQTest.java | 2 +- .../msqJoinHint.iq | 1022 +++++++++++++++++ .../simpleJoinHint.iq | 182 --- .../sql/calcite/planner/CalcitePlanner.java | 37 +- .../druid/sql/calcite/planner/JoinHint.java | 7 + .../druid/sql/calcite/rule/DruidJoinRule.java | 2 - .../sql/calcite/run/NativeSqlEngine.java | 2 +- .../quidem/DruidQuidemCommandHandler.java | 51 +- .../sql/calcite/rule/DruidJoinRuleTest.java | 2 +- 14 files changed, 1096 insertions(+), 221 deletions(-) create mode 100644 extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq delete mode 100644 extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java index cf3813ed7939..b47a450aa000 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessor.java @@ -38,11 +38,11 @@ import org.apache.druid.msq.input.ReadableInput; import org.apache.druid.query.DataSource; import org.apache.druid.query.InlineDataSource; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.Query; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; import org.apache.druid.segment.SegmentReference; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.io.IOException; diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java index 6a19768692f1..4acacdf53bec 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/DataSourcePlan.java @@ -46,6 +46,7 @@ import org.apache.druid.query.DataSource; import org.apache.druid.query.FilteredDataSource; import org.apache.druid.query.InlineDataSource; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.JoinDataSource; import org.apache.druid.query.LookupDataSource; import org.apache.druid.query.QueryContext; @@ -64,7 +65,6 @@ import org.apache.druid.segment.join.JoinConditionAnalysis; import org.apache.druid.sql.calcite.external.ExternalDataSource; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.joda.time.Interval; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index c47b0be821cf..b5c9675a411c 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -50,6 +50,7 @@ import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.util.MultiStageQueryContext; import org.apache.druid.query.InlineDataSource; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.LookupDataSource; import org.apache.druid.query.OrderBy; import org.apache.druid.query.Query; @@ -81,7 +82,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.ColumnMapping; import org.apache.druid.sql.calcite.planner.ColumnMappings; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.util.CalciteTests; import org.hamcrest.CoreMatchers; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java index 92c905b87acb..3d26444a3d27 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/error/MSQFaultSerdeTest.java @@ -26,9 +26,9 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.msq.guice.MSQIndexingModule; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.query.JoinAlgorithm; import org.junit.Assert; import org.junit.Before; import org.junit.Test; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java index ab47b9fabe8b..ee5db19daa2a 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/BroadcastJoinSegmentMapFnProcessorTest.java @@ -42,6 +42,7 @@ import org.apache.druid.msq.indexing.error.MSQException; import org.apache.druid.query.DataSource; import org.apache.druid.query.InlineDataSource; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.JoinDataSource; import org.apache.druid.query.Query; import org.apache.druid.query.QueryContext; @@ -50,7 +51,6 @@ import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.join.JoinConditionAnalysis; import org.apache.druid.segment.join.JoinType; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.testing.InitializedNullHandlingTest; import org.easymock.EasyMock; diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java index 00378824f3fb..42ee5bac3528 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/test/CalciteSelectJoinQueryMSQTest.java @@ -21,11 +21,11 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.msq.sql.MSQTaskSqlEngine; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.BaseCalciteQueryTest; import org.apache.druid.sql.calcite.CalciteJoinQueryTest; import org.apache.druid.sql.calcite.QueryTestBuilder; import org.apache.druid.sql.calcite.SqlTestFrameworkConfig; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import java.util.Map; diff --git a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq new file mode 100644 index 000000000000..26a7b1c776d8 --- /dev/null +++ b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq @@ -0,0 +1,1022 @@ +!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier +!set outputformat mysql + +select w1.cityName, w2.countryName +from (select cityName, countryName from wikipedia where cityName='New York') w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "cityName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "_2", + "input" : [ { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "broadcast" : [ 1 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER", + "preferredJoinAlgorithm" : null + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "columns" : [ "j0.countryName", "v0" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + }, { + "name" : "v0", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select /*+ use_hash_join(w1, w2) */ w1.cityName, w2.countryName +from (select cityName, countryName from wikipedia where cityName='New York') w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +LogicalJoin:[[use_hash_join inheritPath:[0, 0] options:[w1, w2]]] + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "cityName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "_2", + "input" : [ { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "broadcast" : [ 1 ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "join", + "left" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "right" : { + "type" : "inputNumber", + "inputNumber" : 1 + }, + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER", + "preferredJoinAlgorithm" : null + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "columns" : [ "j0.countryName", "v0" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + }, { + "name" : "v0", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select /*+ no_hash_join */ w1.cityName, w2.countryName +from (select cityName, countryName from wikipedia where cityName='New York') w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + + +LogicalJoin:[[no_hash_join inheritPath:[0, 0]]] + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "cityName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + } ], + "shuffleSpec" : { + "type" : "hash", + "clusterBy" : { + "columns" : [ { + "columnName" : "cityName", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "hashLocalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "hash", + "clusterBy" : { + "columns" : [ { + "columnName" : "cityName", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "hashLocalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "_2", + "input" : [ { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "processor" : { + "type" : "sortMergeJoin", + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER" + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.cityName", + "type" : "STRING" + }, { + "name" : "j0.__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + } ], + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ +}, { + "stageNumber" : 3, + "definition" : { + "id" : "_3", + "input" : [ { + "type" : "stage", + "stage" : 2 + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "columns" : [ "j0.countryName", "v0" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + }, { + "name" : "v0", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan + +select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName +from (select cityName, countryName from wikipedia where cityName='New York') w1 +JOIN wikipedia w2 ON w1.cityName = w2.cityName +where w1.cityName='New York'; + +LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] + +!hints + +[ { + "stageNumber" : 0, + "definition" : { + "id" : "_0", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "filterFields" : [ "cityName" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "filter" : { + "type" : "equals", + "column" : "cityName", + "matchValueType" : "STRING", + "matchValue" : "New York" + }, + "columns" : [ "cityName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + } ], + "shuffleSpec" : { + "type" : "hash", + "clusterBy" : { + "columns" : [ { + "columnName" : "cityName", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "hashLocalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 1, + "definition" : { + "id" : "_1", + "input" : [ { + "type" : "table", + "dataSource" : "wikipedia", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "resultFormat" : "compactedList", + "columns" : [ "cityName", "countryName" ], + "context" : { + "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", + "sqlInsertSegmentGranularity" : null, + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "countryName", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "hash", + "clusterBy" : { + "columns" : [ { + "columnName" : "cityName", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "hashLocalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +}, { + "stageNumber" : 2, + "definition" : { + "id" : "_2", + "input" : [ { + "type" : "stage", + "stage" : 0 + }, { + "type" : "stage", + "stage" : 1 + } ], + "processor" : { + "type" : "sortMergeJoin", + "rightPrefix" : "j0.", + "condition" : "(\"cityName\" == \"j0.cityName\")", + "joinType" : "INNER" + }, + "signature" : [ { + "name" : "cityName", + "type" : "STRING" + }, { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.cityName", + "type" : "STRING" + }, { + "name" : "j0.__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + } ], + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ +}, { + "stageNumber" : 3, + "definition" : { + "id" : "_3", + "input" : [ { + "type" : "stage", + "stage" : 2 + } ], + "processor" : { + "type" : "scan", + "query" : { + "queryType" : "scan", + "dataSource" : { + "type" : "inputNumber", + "inputNumber" : 0 + }, + "intervals" : { + "type" : "intervals", + "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] + }, + "virtualColumns" : [ { + "type" : "expression", + "name" : "v0", + "expression" : "'New York'", + "outputType" : "STRING" + } ], + "resultFormat" : "compactedList", + "columns" : [ "j0.countryName", "v0" ], + "context" : { + "__user" : null, + "finalize" : true, + "maxParseExceptions" : 0, + "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", + "sqlQueryId" : __SQL_QUERY_ID__ + "sqlStringifyArrays" : false + }, + "columnTypes" : [ "STRING", "STRING" ], + "granularity" : { + "type" : "all" + }, + "legacy" : false + } + }, + "signature" : [ { + "name" : "__boost", + "type" : "LONG" + }, { + "name" : "j0.countryName", + "type" : "STRING" + }, { + "name" : "v0", + "type" : "STRING" + } ], + "shuffleSpec" : { + "type" : "maxCount", + "clusterBy" : { + "columns" : [ { + "columnName" : "__boost", + "order" : "ASCENDING" + } ] + }, + "partitions" : 1 + }, + "maxWorkerCount" : 1 + }, + "phase" : "FINISHED", + "workerCount" : 1, + "partitionCount" : 1, + "shuffle" : "globalSort", + "output" : "localStorage", + "startTime" : __TIMESTAMP__ + "duration" : __DURATION__ + "sort" : true +} ] +!msqPlan diff --git a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq deleted file mode 100644 index 3064a6ae6be7..000000000000 --- a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/simpleJoinHint.iq +++ /dev/null @@ -1,182 +0,0 @@ -!use druidtest://?componentSupplier=DrillWindowQueryMSQComponentSupplier -!set outputformat mysql - -select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName -from wikipedia w1 -JOIN wikipedia w2 ON w1.cityName = w2.cityName -where w1.cityName='New York'; - - -LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] - -!hints - -[ { - "stageNumber" : 0, - "definition" : { - "id" : "_0", - "input" : [ { - "type" : "table", - "dataSource" : "wikipedia", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - } ], - "processor" : { - "type" : "scan", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "inputNumber", - "inputNumber" : 0 - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "resultFormat" : "compactedList", - "columns" : [ "cityName", "countryName" ], - "context" : { - "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", - "sqlInsertSegmentGranularity" : null, - "sqlQueryId" : __SQL_QUERY_ID__ - "sqlStringifyArrays" : false - }, - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "signature" : [ { - "name" : "__boost", - "type" : "LONG" - }, { - "name" : "cityName", - "type" : "STRING" - }, { - "name" : "countryName", - "type" : "STRING" - } ], - "shuffleSpec" : { - "type" : "maxCount", - "clusterBy" : { - "columns" : [ { - "columnName" : "__boost", - "order" : "ASCENDING" - } ] - }, - "partitions" : 1 - }, - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "shuffle" : "globalSort", - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ - "sort" : true -}, { - "stageNumber" : 1, - "definition" : { - "id" : "_1", - "input" : [ { - "type" : "table", - "dataSource" : "wikipedia", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" - }, - "filterFields" : [ "cityName" ] - }, { - "type" : "stage", - "stage" : 0 - } ], - "broadcast" : [ 1 ], - "processor" : { - "type" : "scan", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "join", - "left" : { - "type" : "inputNumber", - "inputNumber" : 0 - }, - "right" : { - "type" : "inputNumber", - "inputNumber" : 1 - }, - "rightPrefix" : "j0.", - "condition" : "(\"cityName\" == \"j0.cityName\")", - "joinType" : "INNER" - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "virtualColumns" : [ { - "type" : "expression", - "name" : "v0", - "expression" : "'New York'", - "outputType" : "STRING" - } ], - "resultFormat" : "compactedList", - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" - }, - "columns" : [ "j0.countryName", "v0" ], - "context" : { - "__user" : null, - "finalize" : true, - "maxParseExceptions" : 0, - "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", - "sqlQueryId" : __SQL_QUERY_ID__ - "sqlStringifyArrays" : false - }, - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "signature" : [ { - "name" : "__boost", - "type" : "LONG" - }, { - "name" : "j0.countryName", - "type" : "STRING" - }, { - "name" : "v0", - "type" : "STRING" - } ], - "shuffleSpec" : { - "type" : "maxCount", - "clusterBy" : { - "columns" : [ { - "columnName" : "__boost", - "order" : "ASCENDING" - } ] - }, - "partitions" : 1 - }, - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "shuffle" : "globalSort", - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ - "sort" : true -} ] -!msqPlan diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java index 94b078768978..b1a32a1c823f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java @@ -38,14 +38,8 @@ import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Snapshot; -import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.hint.HintPredicate; import org.apache.calcite.rel.hint.HintPredicates; import org.apache.calcite.rel.hint.HintStrategyTable; -import org.apache.calcite.rel.logical.LogicalCorrelate; -import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -68,16 +62,12 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; -import org.apache.calcite.util.Util; import javax.annotation.Nullable; import java.io.Reader; import java.util.List; import java.util.Objects; import java.util.Properties; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import java.util.stream.Stream; /** * Calcite planner. Clone of Calcite's @@ -306,14 +296,9 @@ public RelRoot rel(SqlNode sql) Objects.requireNonNull(planner, "planner"), rexBuilder ); - HintStrategyTable hintStrategyTable = HintStrategyTable.builder() - .hintStrategy("use_hash_join", HintPredicates.JOIN) - .hintStrategy("use_merge_join", HintPredicates.JOIN) - .hintStrategy("no_hash_join", HintPredicates.JOIN) - .build(); final SqlToRelConverter.Config config = sqlToRelConverterConfig.withTrimUnusedFields(false) - .withHintStrategyTable(hintStrategyTable); + .withHintStrategyTable(HintTools.HINT_STRATEGY_TABLE); final SqlToRelConverter sqlToRelConverter = new DruidSqlToRelConverter(this, validator, createCatalogReader(), cluster, convertletTable, config @@ -520,4 +505,24 @@ void from(CalcitePlanner planner) + " to " + this); } } + + /** Define some tool members and methods for hints. */ + private static class HintTools + { + static final HintStrategyTable HINT_STRATEGY_TABLE = createHintStrategies(); + + /** + * Creates hint strategies. + * + * @return HintStrategyTable instance + */ + private static HintStrategyTable createHintStrategies() + { + return HintStrategyTable.builder() + .hintStrategy("no_hash_join", HintPredicates.JOIN) + .hintStrategy("use_hash_join", HintPredicates.JOIN) + .hintStrategy("use_merge_join", HintPredicates.JOIN) + .build(); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java index 587366bbacba..fddb7057e89f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java @@ -35,6 +35,13 @@ public JoinAlgorithm getJoinAlgorithm() return JoinAlgorithm.SORT_MERGE; } }, + NO_HASH_JOIN("no_hash_join") { + @Override + public JoinAlgorithm getJoinAlgorithm() + { + return JoinAlgorithm.SORT_MERGE; + } + }, USE_HASH_JOIN("use_hash_join") { @Override public JoinAlgorithm getJoinAlgorithm() diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java index 8795b5054657..b12908014787 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java @@ -50,8 +50,6 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.LookupDataSource; -import org.apache.druid.query.JoinAlgorithm; -import org.apache.druid.sql.calcite.planner.JoinHint; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.QueryUtils; import org.apache.druid.sql.calcite.rel.DruidJoinQueryRel; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index a832509f5cad..16228a19e127 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -27,12 +27,12 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.druid.error.InvalidSqlInput; import org.apache.druid.guice.LazySingleton; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.groupby.GroupByQuery; import org.apache.druid.query.timeboundary.TimeBoundaryQuery; import org.apache.druid.server.QueryLifecycleFactory; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; import org.apache.druid.sql.calcite.parser.DruidSqlReplace; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.destination.IngestDestination; diff --git a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java index b5bf4d7690dc..a04cfc2e8aae 100644 --- a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java +++ b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java @@ -294,88 +294,113 @@ private static class HintCollector extends RelShuttleImpl { private final List hintsCollect; - HintCollector(List hintsCollect) { + HintCollector(List hintsCollect) + { this.hintsCollect = hintsCollect; } - @Override public RelNode visit(TableScan scan) { + @Override + public RelNode visit(TableScan scan) + { if (!scan.getHints().isEmpty()) { this.hintsCollect.add("TableScan:" + scan.getHints()); } return super.visit(scan); } - @Override public RelNode visit(LogicalJoin join) { + @Override + public RelNode visit(LogicalJoin join) + { if (!join.getHints().isEmpty()) { this.hintsCollect.add("LogicalJoin:" + join.getHints()); } return super.visit(join); } - @Override public RelNode visit(LogicalProject project) { + @Override + public RelNode visit(LogicalProject project) + { if (!project.getHints().isEmpty()) { this.hintsCollect.add("Project:" + project.getHints()); } return super.visit(project); } - @Override public RelNode visit(LogicalAggregate aggregate) { + @Override + public RelNode visit(LogicalAggregate aggregate) + { if (!aggregate.getHints().isEmpty()) { this.hintsCollect.add("Aggregate:" + aggregate.getHints()); } return super.visit(aggregate); } - @Override public RelNode visit(LogicalCorrelate correlate) { + @Override + public RelNode visit(LogicalCorrelate correlate) + { if (!correlate.getHints().isEmpty()) { this.hintsCollect.add("Correlate:" + correlate.getHints()); } return super.visit(correlate); } - @Override public RelNode visit(LogicalFilter filter) { + @Override + public RelNode visit(LogicalFilter filter) + { if (!filter.getHints().isEmpty()) { this.hintsCollect.add("Filter:" + filter.getHints()); } return super.visit(filter); } - @Override public RelNode visit(LogicalUnion union) { + @Override + public RelNode visit(LogicalUnion union) + { if (!union.getHints().isEmpty()) { this.hintsCollect.add("Union:" + union.getHints()); } return super.visit(union); } - @Override public RelNode visit(LogicalIntersect intersect) { + @Override + public RelNode visit(LogicalIntersect intersect) + { if (!intersect.getHints().isEmpty()) { this.hintsCollect.add("Intersect:" + intersect.getHints()); } return super.visit(intersect); } - @Override public RelNode visit(LogicalMinus minus) { + @Override + public RelNode visit(LogicalMinus minus) + { if (!minus.getHints().isEmpty()) { this.hintsCollect.add("Minus:" + minus.getHints()); } return super.visit(minus); } - @Override public RelNode visit(LogicalSort sort) { + @Override + public RelNode visit(LogicalSort sort) + { if (!sort.getHints().isEmpty()) { this.hintsCollect.add("Sort:" + sort.getHints()); } return super.visit(sort); } - @Override public RelNode visit(LogicalValues values) { + @Override + public RelNode visit(LogicalValues values) + { if (!values.getHints().isEmpty()) { this.hintsCollect.add("Values:" + values.getHints()); } return super.visit(values); } - @Override public RelNode visit(RelNode other) { + @Override + public RelNode visit(RelNode other) + { if (other instanceof Window) { Window window = (Window) other; if (!window.getHints().isEmpty()) { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java index 56b9ae3091d9..7b9ed21f89e9 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/rule/DruidJoinRuleTest.java @@ -31,9 +31,9 @@ import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.druid.common.config.NullHandling; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.QueryContext; import org.apache.druid.sql.calcite.planner.DruidTypeSystem; -import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.junit.Assert; import org.junit.Before; From 1acb8bf40f20b9bd3fba0fc9fbb3b9a5c03df1bc Mon Sep 17 00:00:00 2001 From: sreemanamala Date: Mon, 4 Nov 2024 11:15:27 +0530 Subject: [PATCH 4/4] review comments --- .../msqJoinHint.iq | 281 +----------------- .../apache/druid/query/JoinDataSource.java | 4 +- .../sql/calcite/planner/CalcitePlanner.java | 28 +- .../druid/sql/calcite/planner/JoinHint.java | 11 +- .../sql/calcite/rel/DruidJoinQueryRel.java | 8 +- .../druid/sql/calcite/rule/DruidJoinRule.java | 6 +- .../quidem/DruidQuidemCommandHandler.java | 162 ++-------- 7 files changed, 67 insertions(+), 433 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq index 26a7b1c776d8..3d6a1b1cc12c 100644 --- a/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq +++ b/extensions-core/multi-stage-query/src/test/quidem/org.apache.druid.msq.quidem.MSQQuidemTest/msqJoinHint.iq @@ -7,6 +7,11 @@ JOIN wikipedia w2 ON w1.cityName = w2.cityName where w1.cityName='New York'; + + + + + !hints [ { @@ -242,12 +247,12 @@ where w1.cityName='New York'; } ] !msqPlan -select /*+ use_hash_join(w1, w2) */ w1.cityName, w2.countryName +select /*+ broadcast(w1, w2) */ w1.cityName, w2.countryName from (select cityName, countryName from wikipedia where cityName='New York') w1 JOIN wikipedia w2 ON w1.cityName = w2.cityName where w1.cityName='New York'; -LogicalJoin:[[use_hash_join inheritPath:[0, 0] options:[w1, w2]]] +LogicalJoin:[[broadcast inheritPath:[0, 0] options:[w1, w2]]] !hints @@ -484,281 +489,13 @@ LogicalJoin:[[use_hash_join inheritPath:[0, 0] options:[w1, w2]]] } ] !msqPlan -select /*+ no_hash_join */ w1.cityName, w2.countryName +select /*+ sort_merge(w1, w2) */ w1.cityName, w2.countryName from (select cityName, countryName from wikipedia where cityName='New York') w1 JOIN wikipedia w2 ON w1.cityName = w2.cityName where w1.cityName='New York'; -LogicalJoin:[[no_hash_join inheritPath:[0, 0]]] - -!hints - -[ { - "stageNumber" : 0, - "definition" : { - "id" : "_0", - "input" : [ { - "type" : "table", - "dataSource" : "wikipedia", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ], - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" - }, - "filterFields" : [ "cityName" ] - } ], - "processor" : { - "type" : "scan", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "inputNumber", - "inputNumber" : 0 - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "resultFormat" : "compactedList", - "filter" : { - "type" : "equals", - "column" : "cityName", - "matchValueType" : "STRING", - "matchValue" : "New York" - }, - "columns" : [ "cityName" ], - "context" : { - "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"}]", - "sqlInsertSegmentGranularity" : null, - "sqlQueryId" : __SQL_QUERY_ID__ - "sqlStringifyArrays" : false - }, - "columnTypes" : [ "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "signature" : [ { - "name" : "cityName", - "type" : "STRING" - }, { - "name" : "__boost", - "type" : "LONG" - } ], - "shuffleSpec" : { - "type" : "hash", - "clusterBy" : { - "columns" : [ { - "columnName" : "cityName", - "order" : "ASCENDING" - } ] - }, - "partitions" : 1 - }, - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "shuffle" : "hashLocalSort", - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ - "sort" : true -}, { - "stageNumber" : 1, - "definition" : { - "id" : "_1", - "input" : [ { - "type" : "table", - "dataSource" : "wikipedia", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - } ], - "processor" : { - "type" : "scan", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "inputNumber", - "inputNumber" : 0 - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "resultFormat" : "compactedList", - "columns" : [ "cityName", "countryName" ], - "context" : { - "scanSignature" : "[{\"name\":\"cityName\",\"type\":\"STRING\"},{\"name\":\"countryName\",\"type\":\"STRING\"}]", - "sqlInsertSegmentGranularity" : null, - "sqlQueryId" : __SQL_QUERY_ID__ - "sqlStringifyArrays" : false - }, - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "signature" : [ { - "name" : "cityName", - "type" : "STRING" - }, { - "name" : "__boost", - "type" : "LONG" - }, { - "name" : "countryName", - "type" : "STRING" - } ], - "shuffleSpec" : { - "type" : "hash", - "clusterBy" : { - "columns" : [ { - "columnName" : "cityName", - "order" : "ASCENDING" - } ] - }, - "partitions" : 1 - }, - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "shuffle" : "hashLocalSort", - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ - "sort" : true -}, { - "stageNumber" : 2, - "definition" : { - "id" : "_2", - "input" : [ { - "type" : "stage", - "stage" : 0 - }, { - "type" : "stage", - "stage" : 1 - } ], - "processor" : { - "type" : "sortMergeJoin", - "rightPrefix" : "j0.", - "condition" : "(\"cityName\" == \"j0.cityName\")", - "joinType" : "INNER" - }, - "signature" : [ { - "name" : "cityName", - "type" : "STRING" - }, { - "name" : "__boost", - "type" : "LONG" - }, { - "name" : "j0.cityName", - "type" : "STRING" - }, { - "name" : "j0.__boost", - "type" : "LONG" - }, { - "name" : "j0.countryName", - "type" : "STRING" - } ], - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ -}, { - "stageNumber" : 3, - "definition" : { - "id" : "_3", - "input" : [ { - "type" : "stage", - "stage" : 2 - } ], - "processor" : { - "type" : "scan", - "query" : { - "queryType" : "scan", - "dataSource" : { - "type" : "inputNumber", - "inputNumber" : 0 - }, - "intervals" : { - "type" : "intervals", - "intervals" : [ "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ] - }, - "virtualColumns" : [ { - "type" : "expression", - "name" : "v0", - "expression" : "'New York'", - "outputType" : "STRING" - } ], - "resultFormat" : "compactedList", - "columns" : [ "j0.countryName", "v0" ], - "context" : { - "__user" : null, - "finalize" : true, - "maxParseExceptions" : 0, - "scanSignature" : "[{\"name\":\"j0.countryName\",\"type\":\"STRING\"},{\"name\":\"v0\",\"type\":\"STRING\"}]", - "sqlQueryId" : __SQL_QUERY_ID__ - "sqlStringifyArrays" : false - }, - "columnTypes" : [ "STRING", "STRING" ], - "granularity" : { - "type" : "all" - }, - "legacy" : false - } - }, - "signature" : [ { - "name" : "__boost", - "type" : "LONG" - }, { - "name" : "j0.countryName", - "type" : "STRING" - }, { - "name" : "v0", - "type" : "STRING" - } ], - "shuffleSpec" : { - "type" : "maxCount", - "clusterBy" : { - "columns" : [ { - "columnName" : "__boost", - "order" : "ASCENDING" - } ] - }, - "partitions" : 1 - }, - "maxWorkerCount" : 1 - }, - "phase" : "FINISHED", - "workerCount" : 1, - "partitionCount" : 1, - "shuffle" : "globalSort", - "output" : "localStorage", - "startTime" : __TIMESTAMP__ - "duration" : __DURATION__ - "sort" : true -} ] -!msqPlan - -select /*+ use_merge_join(w1, w2) */ w1.cityName, w2.countryName -from (select cityName, countryName from wikipedia where cityName='New York') w1 -JOIN wikipedia w2 ON w1.cityName = w2.cityName -where w1.cityName='New York'; - -LogicalJoin:[[use_merge_join inheritPath:[0, 0] options:[w1, w2]]] +LogicalJoin:[[sort_merge inheritPath:[0, 0] options:[w1, w2]]] !hints diff --git a/processing/src/main/java/org/apache/druid/query/JoinDataSource.java b/processing/src/main/java/org/apache/druid/query/JoinDataSource.java index 0981d644a9ef..976ab78728b8 100644 --- a/processing/src/main/java/org/apache/druid/query/JoinDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/JoinDataSource.java @@ -430,8 +430,7 @@ public boolean equals(Object o) Objects.equals(rightPrefix, that.rightPrefix) && Objects.equals(conditionAnalysis, that.conditionAnalysis) && Objects.equals(leftFilter, that.leftFilter) && - joinType == that.joinType && - preferredJoinAlgorithm == that.preferredJoinAlgorithm; + joinType == that.joinType; } @Override @@ -450,7 +449,6 @@ public String toString() ", condition=" + conditionAnalysis + ", joinType=" + joinType + ", leftFilter=" + leftFilter + - ", preferredJoinAlgorithm=" + preferredJoinAlgorithm + '}'; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java index b1a32a1c823f..a074abcc1a70 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalcitePlanner.java @@ -38,8 +38,11 @@ import org.apache.calcite.rel.RelCollationTraitDef; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.hint.HintPredicate; import org.apache.calcite.rel.hint.HintPredicates; import org.apache.calcite.rel.hint.HintStrategyTable; +import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -62,12 +65,14 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.ValidationException; import org.apache.calcite.util.Pair; +import org.apache.calcite.util.Util; import javax.annotation.Nullable; import java.io.Reader; import java.util.List; import java.util.Objects; import java.util.Properties; +import java.util.stream.Collectors; /** * Calcite planner. Clone of Calcite's @@ -519,10 +524,27 @@ private static class HintTools private static HintStrategyTable createHintStrategies() { return HintStrategyTable.builder() - .hintStrategy("no_hash_join", HintPredicates.JOIN) - .hintStrategy("use_hash_join", HintPredicates.JOIN) - .hintStrategy("use_merge_join", HintPredicates.JOIN) + .hintStrategy("broadcast", HintPredicates.JOIN) + .hintStrategy("sort_merge", HintPredicates.JOIN) + //.hintStrategy("sort_merge", HintPredicates.and(HintPredicates.JOIN, joinWithFixedTableName())) .build(); } + + /** Returns a {@link HintPredicate} for join with specified table references. */ + private static HintPredicate joinWithFixedTableName() + { + return (hint, rel) -> { + if (!(rel instanceof LogicalJoin)) { + return false; + } + LogicalJoin join = (LogicalJoin) rel; + final List tableNames = hint.listOptions; + final List inputTables = join.getInputs().stream() + .filter(input -> input instanceof TableScan) + .map(scan -> Util.last(scan.getTable().getQualifiedName())) + .collect(Collectors.toList()); + return tableNames.equals(inputTables); + }; + } } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java index fddb7057e89f..b4201cd13675 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/JoinHint.java @@ -28,21 +28,14 @@ public enum JoinHint { - USE_MERGE_JOIN("use_merge_join") { + SORT_MERGE("sort_merge") { @Override public JoinAlgorithm getJoinAlgorithm() { return JoinAlgorithm.SORT_MERGE; } }, - NO_HASH_JOIN("no_hash_join") { - @Override - public JoinAlgorithm getJoinAlgorithm() - { - return JoinAlgorithm.SORT_MERGE; - } - }, - USE_HASH_JOIN("use_hash_join") { + BROADCAST("broadcast") { @Override public JoinAlgorithm getJoinAlgorithm() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java index 5f46dfd97eb0..312ba1885dd9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidJoinQueryRel.java @@ -152,7 +152,7 @@ private SourceDesc buildLeftSourceDesc() final DruidQuery leftQuery = Preconditions.checkNotNull(leftDruidRel.toDruidQuery(false), "leftQuery"); final RowSignature leftSignature = leftQuery.getOutputRowSignature(); final DataSource leftDataSource; - if (computeLeftRequiresSubquery(getPlannerContext(), leftDruidRel)) { + if (computeLeftRequiresSubquery(getPlannerContext(), leftDruidRel, joinRel)) { leftDataSource = new QueryDataSource(leftQuery.getQuery()); if (leftFilter != null) { throw new ISE("Filter on left table is supposed to be null if left child is a query source"); @@ -362,7 +362,7 @@ public RelOptCost computeSelfCost(final RelOptPlanner planner, final RelMetadata joinCost *= CostEstimates.MULTIPLIER_OUTER_QUERY; } else { // Penalize subqueries if we don't have to do them. - if (computeLeftRequiresSubquery(getPlannerContext(), getSomeDruidChild(left))) { + if (computeLeftRequiresSubquery(getPlannerContext(), getSomeDruidChild(left), joinRel)) { joinCost += CostEstimates.COST_SUBQUERY; } else { if (joinRel.getJoinType() == JoinRelType.INNER && plannerConfig.isComputeInnerJoinCostAsFilter()) { @@ -402,9 +402,9 @@ public static JoinType toDruidJoinType(JoinRelType calciteJoinType) } } - public static boolean computeLeftRequiresSubquery(final PlannerContext plannerContext, final DruidRel left) + public static boolean computeLeftRequiresSubquery(final PlannerContext plannerContext, final DruidRel left, final Join joinRel) { - if (plannerContext.getJoinAlgorithm().requiresSubquery()) { + if (QueryUtils.getJoinAlgorithm(joinRel, plannerContext).requiresSubquery()) { return true; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java index b12908014787..d76230da2a5e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidJoinRule.java @@ -49,6 +49,7 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.JoinAlgorithm; import org.apache.druid.query.LookupDataSource; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.QueryUtils; @@ -131,7 +132,8 @@ public void onMatch(RelOptRuleCall call) plannerContext.setPlanningError(conditionAnalysis.errorStr); final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (left instanceof DruidQueryRel); - if (!QueryUtils.getJoinAlgorithm(join, plannerContext).requiresSubquery() + final JoinAlgorithm joinAlgorithm = QueryUtils.getJoinAlgorithm(join, plannerContext); + if (!joinAlgorithm.requiresSubquery() && left.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT && (isLeftDirectAccessPossible || left.getPartialDruidQuery().getWhereFilter() == null)) { // Swap the left-side projection above the join, so the left side is a simple scan or mapping. This helps us @@ -154,7 +156,7 @@ public void onMatch(RelOptRuleCall call) leftFilter = null; } - if (!QueryUtils.getJoinAlgorithm(join, plannerContext).requiresSubquery() + if (!joinAlgorithm.requiresSubquery() && right.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT && right.getPartialDruidQuery().getWhereFilter() == null && !right.getPartialDruidQuery().getSelectProject().isMapping() diff --git a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java index a04cfc2e8aae..5bf6d18050f5 100644 --- a/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java +++ b/sql/src/test/java/org/apache/druid/quidem/DruidQuidemCommandHandler.java @@ -26,22 +26,9 @@ import net.hydromatic.quidem.CommandHandler; import net.hydromatic.quidem.Quidem.SqlCommand; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelShuttleImpl; -import org.apache.calcite.rel.core.Snapshot; -import org.apache.calcite.rel.core.TableFunctionScan; -import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.core.Window; -import org.apache.calcite.rel.logical.LogicalAggregate; -import org.apache.calcite.rel.logical.LogicalCorrelate; -import org.apache.calcite.rel.logical.LogicalFilter; -import org.apache.calcite.rel.logical.LogicalIntersect; -import org.apache.calcite.rel.logical.LogicalJoin; -import org.apache.calcite.rel.logical.LogicalMinus; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rel.logical.LogicalSort; -import org.apache.calcite.rel.logical.LogicalUnion; -import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.hint.Hintable; import org.apache.calcite.runtime.Hook; import org.apache.calcite.sql.SqlExplainFormat; import org.apache.calcite.sql.SqlExplainLevel; @@ -220,12 +207,12 @@ protected final void executeExplain(Context context) throws IOException if (node instanceof DruidRel) { node = ((DruidRel) node).unwrapLogicalPlan(); } - String str = getString(node); + String str = convertRelToString(node); context.echo(ImmutableList.of(str)); } } - protected String getString(RelNode node) + protected String convertRelToString(RelNode node) { String str = RelOptUtil.dumpPlan("", node, SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES); return str; @@ -277,147 +264,42 @@ static class HintPlanCommand extends AbstractRelPlanCommand } @Override - protected String getString(RelNode node) + protected String convertRelToString(RelNode node) { - final List hintsCollect = new ArrayList<>(); - final HintCollector collector = new HintCollector(hintsCollect); + final HintCollector collector = new HintCollector(); node.accept(collector); - StringBuilder builder = new StringBuilder(); - for (String hintLine : hintsCollect) { - builder.append(hintLine).append("\n"); - } - - return builder.toString(); + return collector.getCollectedHintsAsString(); } - private static class HintCollector extends RelShuttleImpl + private static class HintCollector extends RelHomogeneousShuttle { private final List hintsCollect; - HintCollector(List hintsCollect) - { - this.hintsCollect = hintsCollect; - } - - @Override - public RelNode visit(TableScan scan) - { - if (!scan.getHints().isEmpty()) { - this.hintsCollect.add("TableScan:" + scan.getHints()); - } - return super.visit(scan); - } - - @Override - public RelNode visit(LogicalJoin join) - { - if (!join.getHints().isEmpty()) { - this.hintsCollect.add("LogicalJoin:" + join.getHints()); - } - return super.visit(join); - } - - @Override - public RelNode visit(LogicalProject project) - { - if (!project.getHints().isEmpty()) { - this.hintsCollect.add("Project:" + project.getHints()); - } - return super.visit(project); - } - - @Override - public RelNode visit(LogicalAggregate aggregate) - { - if (!aggregate.getHints().isEmpty()) { - this.hintsCollect.add("Aggregate:" + aggregate.getHints()); - } - return super.visit(aggregate); - } - - @Override - public RelNode visit(LogicalCorrelate correlate) - { - if (!correlate.getHints().isEmpty()) { - this.hintsCollect.add("Correlate:" + correlate.getHints()); - } - return super.visit(correlate); - } - - @Override - public RelNode visit(LogicalFilter filter) - { - if (!filter.getHints().isEmpty()) { - this.hintsCollect.add("Filter:" + filter.getHints()); - } - return super.visit(filter); - } - - @Override - public RelNode visit(LogicalUnion union) - { - if (!union.getHints().isEmpty()) { - this.hintsCollect.add("Union:" + union.getHints()); - } - return super.visit(union); - } - - @Override - public RelNode visit(LogicalIntersect intersect) - { - if (!intersect.getHints().isEmpty()) { - this.hintsCollect.add("Intersect:" + intersect.getHints()); - } - return super.visit(intersect); - } - - @Override - public RelNode visit(LogicalMinus minus) + HintCollector() { - if (!minus.getHints().isEmpty()) { - this.hintsCollect.add("Minus:" + minus.getHints()); - } - return super.visit(minus); + this.hintsCollect = new ArrayList<>(); } @Override - public RelNode visit(LogicalSort sort) + public RelNode visit(RelNode relNode) { - if (!sort.getHints().isEmpty()) { - this.hintsCollect.add("Sort:" + sort.getHints()); + if (relNode instanceof Hintable) { + Hintable hintableRelNode = (Hintable) relNode; + if (!hintableRelNode.getHints().isEmpty()) { + this.hintsCollect.add(relNode.getClass().getSimpleName() + ":" + hintableRelNode.getHints()); + } } - return super.visit(sort); + return super.visit(relNode); } - @Override - public RelNode visit(LogicalValues values) + public String getCollectedHintsAsString() { - if (!values.getHints().isEmpty()) { - this.hintsCollect.add("Values:" + values.getHints()); + StringBuilder builder = new StringBuilder(); + for (String hintLine : hintsCollect) { + builder.append(hintLine).append("\n"); } - return super.visit(values); - } - @Override - public RelNode visit(RelNode other) - { - if (other instanceof Window) { - Window window = (Window) other; - if (!window.getHints().isEmpty()) { - this.hintsCollect.add("Window:" + window.getHints()); - } - } else if (other instanceof Snapshot) { - Snapshot snapshot = (Snapshot) other; - if (!snapshot.getHints().isEmpty()) { - this.hintsCollect.add("Snapshot:" + snapshot.getHints()); - } - } else if (other instanceof TableFunctionScan) { - TableFunctionScan scan = (TableFunctionScan) other; - if (!scan.getHints().isEmpty()) { - this.hintsCollect.add("TableFunctionScan:" + scan.getHints()); - } - } - return super.visit(other); + return builder.toString(); } } }