Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.Druids;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.table.RowSignatures;
Expand All @@ -46,7 +46,9 @@
*/
public class DruidOuterQueryRel extends DruidRel<DruidOuterQueryRel>
{
private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__subquery__");
private static final QueryDataSource DUMMY_DATA_SOURCE = new QueryDataSource(
Druids.newScanQueryBuilder().dataSource("__subquery__").eternityInterval().build()
);

private final PartialDruidQuery partialQuery;
private RelNode sourceRel;
Expand Down
27 changes: 19 additions & 8 deletions sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -1199,14 +1199,25 @@ private ScanQuery toScanQuery(final QueryFeatureInspector queryFeatureInspector)
orderByColumns = Collections.emptyList();
}

if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME)
&& (orderByColumns.size() > 1
|| orderByColumns.stream()
.anyMatch(orderBy -> !orderBy.getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME)))) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError("SQL query requires order by non-time column %s that is not supported.", orderByColumns);
return null;
if (!queryFeatureInspector.feature(QueryFeature.SCAN_CAN_ORDER_BY_NON_TIME) && !orderByColumns.isEmpty()) {
if (orderByColumns.size() > 1 || !ColumnHolder.TIME_COLUMN_NAME.equals(orderByColumns.get(0).getColumnName())) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError(
"SQL query requires order by non-time column %s that is not supported.",
orderByColumns
);
return null;
}
if (!dataSource.isConcrete()) {
// Cannot handle this ordering.
// Scan cannot ORDER BY non-time columns.
plannerContext.setPlanningError(
"SQL query requires order by non-time column on a datasource[%s], which is not supported.",
dataSource
Comment on lines +1215 to +1217
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized that this message is incorrect. You already checked that the column being ordered on is a time column.

Suggested change
plannerContext.setPlanningError(
"SQL query requires order by non-time column on a datasource[%s], which is not supported.",
dataSource
plannerContext.setPlanningError(
"SQL query is a scan and requires order by on a datasource of type[%s], which is not supported.",
dataSource.getType()

);
return null;
}
}

// Compute the list of columns to select, sorted and deduped.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,64 @@
@RunWith(JUnitParamsRunner.class)
public class CalciteJoinQueryTest extends BaseCalciteQueryTest
{

@Test
public void testInnerJoinWithLimitAndAlias() throws Exception
{
minTopNThreshold = 1;
Map<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
context.put(PlannerConfig.CTX_KEY_USE_APPROXIMATE_TOPN, false);
testQuery(
"select t1.b1 from (select __time as b1 from numfoo group by 1 order by 1) as t1 inner join (\n"
+ " select __time as b2 from foo group by 1 order by 1\n"
+ ") as t2 on t1.b1 = t2.b2 ",
context, // turn on exact topN
ImmutableList.of(
newScanQueryBuilder()
.intervals(querySegmentSpec(Filtration.eternity()))
.dataSource(
JoinDataSource.create(
new QueryDataSource(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDataSource(new TableDataSource("numfoo"))
.setDimensions(new DefaultDimensionSpec("__time", "_d0", ColumnType.LONG))
.setContext(context)
.build()
),
new QueryDataSource(
GroupByQuery.builder()
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDataSource(new TableDataSource("foo"))
.setDimensions(new DefaultDimensionSpec("__time", "d0", ColumnType.LONG))
.setContext(context)
.build()
),
"j0.",
"(\"_d0\" == \"j0.d0\")",
JoinType.INNER,
null,
ExprMacroTable.nil()
)
)
.columns("_d0")
.context(context)
.build()
),
ImmutableList.of(
new Object[]{946684800000L},
new Object[]{946771200000L},
new Object[]{946857600000L},
new Object[]{978307200000L},
new Object[]{978393600000L},
new Object[]{978480000000L}
)
);
}


@Test
public void testExactTopNOnInnerJoinWithLimit() throws Exception
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6719,6 +6719,12 @@ public void testMinMaxAvgDailyCountWithLimit() throws Exception
)
)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setLimitSpec(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised that explanation didn't need to change here. The object should also have the same limit spec as this native plan.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the previous would have been limitSpec=NoopLimitSpec but now it changed to limitSpec=DefaultLimitSpec{columns='[]', offset=0, limit=1} . But considering the query has a LIMIT 1 at the end, I thought this was acceptable. I updated the limit to a different value and it shows up in the DefaultLimitSpec

new DefaultLimitSpec(
ImmutableList.of(),
1
)
)
.setGranularity(Granularities.ALL)
.setAggregatorSpecs(
useDefault
Expand Down Expand Up @@ -6752,7 +6758,7 @@ public void testMinMaxAvgDailyCountWithLimit() throws Exception
new FieldAccessPostAggregator(null, "_a2:count")
)
),
expressionPostAgg("p0", "timestamp_extract(\"_a3\",'EPOCH','UTC')")
expressionPostAgg("s0", "timestamp_extract(\"_a3\",'EPOCH','UTC')")
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
Expand Down Expand Up @@ -7002,7 +7008,7 @@ public void testExplainExactCountDistinctOfSemiJoinResult() throws Exception
+ " )\n"
+ ")";
final String legacyExplanation =
"DruidOuterQueryRel(query=[{\"queryType\":\"timeseries\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"descending\":false,\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"limit\":2147483647,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}}], signature=[{a0:LONG}])\n"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why this changed but the query still runs as before since the final native query is still the same.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure why the timeseries changed to a groupBy. But it may be due to the change in DUMMY_DATA_SOURCE moving from a table source to a query data source which led the planner to do something else.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right. That makes sense.

"DruidOuterQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"query\",\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"__subquery__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"list\",\"batchSize\":20480,\"filter\":null,\"context\":null,\"descending\":false,\"granularity\":{\"type\":\"all\"}}},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[],\"aggregations\":[{\"type\":\"count\",\"name\":\"a0\"}],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{a0:LONG}])\n"
+ " DruidJoinQueryRel(condition=[=(SUBSTRING($3, 1, 1), $8)], joinType=[inner], query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"__join__\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":null,\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"default\",\"dimension\":\"dim2\",\"outputName\":\"d0\",\"outputType\":\"STRING\"}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n"
+ " DruidQueryRel(query=[{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"resultFormat\":\"compactedList\",\"batchSize\":20480,\"filter\":null,\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false,\"granularity\":{\"type\":\"all\"}}], signature=[{__time:LONG, cnt:LONG, dim1:STRING, dim2:STRING, dim3:STRING, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX<hyperUnique>}])\n"
+ " DruidQueryRel(query=[{\"queryType\":\"groupBy\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"virtualColumns\":[],\"filter\":{\"type\":\"not\",\"field\":{\"type\":\"selector\",\"dimension\":\"dim1\",\"value\":null,\"extractionFn\":null}},\"granularity\":{\"type\":\"all\"},\"dimensions\":[{\"type\":\"extraction\",\"dimension\":\"dim1\",\"outputName\":\"d0\",\"outputType\":\"STRING\",\"extractionFn\":{\"type\":\"substring\",\"index\":0,\"length\":1}}],\"aggregations\":[],\"postAggregations\":[],\"having\":null,\"limitSpec\":{\"type\":\"NoopLimitSpec\"},\"context\":{\"defaultTimeout\":300000,\"maxScatterGatherBytes\":9223372036854775807,\"sqlCurrentTimestamp\":\"2000-01-01T00:00:00Z\",\"sqlQueryId\":\"dummy\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"descending\":false}], signature=[{d0:STRING}])\n";
Expand Down Expand Up @@ -11100,6 +11106,65 @@ public void testUnicodeFilterAndGroupBy() throws Exception
);
}


@Test
public void testOrderByAlongWithAliasOrderByTimeGroupByMulti() throws Exception
{
testQuery(
"select __time as bug, dim2 from druid.foo group by 1, 2 order by 1 limit 1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setDimensions(
dimensions(
new DefaultDimensionSpec("__time", "d0", ColumnType.LONG),
new DefaultDimensionSpec("dim2", "d1", ColumnType.STRING)
)
)
.setLimitSpec(
new DefaultLimitSpec(
Collections.singletonList(
new OrderByColumnSpec("d0", Direction.ASCENDING, StringComparators.NUMERIC)
),
1
)
)
.setContext(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{946684800000L, "a"}
)
);
}


@Test
public void testOrderByAlongWithAliasOrderByTimeGroupByOneCol() throws Exception
{
testQuery(
"select __time as bug from druid.foo group by 1 order by 1 limit 1",
ImmutableList.of(
new TopNQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.granularity(Granularities.ALL)
.dimension(
new DefaultDimensionSpec("__time", "d0", ColumnType.LONG)
)
.threshold(1)
.metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
ImmutableList.of(
new Object[]{946684800000L}
)
);
}

@Test
public void testProjectAfterSort() throws Exception
{
Expand Down