Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ private void advanceAndUpdate()
index = 0;
if (!baseCursor.isDone()) {
baseCursor.advanceUninterruptibly();
indexedIntsForCurrentRow = dimSelector.getRow();
}
} else {
if (index >= indexedIntsForCurrentRow.size() - 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,36 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations)
// This is necessary to handle the virtual columns on the unnestProject
// Also create the unnest datasource to be used by the partial query
PartialDruidQuery partialDruidQuery = unnestProjectNeeded ? partialQuery.withUnnest(unnestProject) : partialQuery;
String outputColName = unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0);

// In case of nested queries for e.g.
// with t AS (select * from druid.numfoo, unnest(MV_TO_ARRAY(dim3)) as unnested (d3))
// select d2,d3 from t, UNNEST(MV_TO_ARRAY(dim2)) as foo(d2)
// which plans to
// 186:LogicalCorrelate
// 178:LogicalProject
// 176:LogicalCorrelate
// 13:LogicalTableScan(subset=[rel#168:Subset#0.NONE.[]], table=[[druid, numfoo]])
// 172:Uncollect(subset=[rel#173:Subset#3.NONE.[]])
// 170:LogicalProject(subset=[rel#171:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)])
// 14:LogicalValues(subset=[rel#169:Subset#1.NONE.[0]], tuples=[[{ 0 }]])
// 182:Uncollect(subset=[rel#183:Subset#8.NONE.[]])
// 180:LogicalProject(subset=[rel#181:Subset#7.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor1.dim2)])
// 14:LogicalValues(subset=[rel#169:Subset#1.NONE.[0]], tuples=[[{ 0 }]])
//
// the column name cannot be EXPR$0 for both inner and outer. The inner one which gets executed first gets the name
// EXPR$0 and as we move up the tree we add a 0 at the end to make the top level EXPR$00.
// Ideally these names should be replaced by the alias names specified in the query. Any future developer if
// able to find these alias names should replace EXPR$0 by dim3 and EXPR$00 by dim2, i.e use the correct name from Calcite
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks much for the detailed explanation!


if (druidQueryRel instanceof DruidCorrelateUnnestRel) {
outputColName = outputColName + "0";
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im skeptical that this is always correct, is it really cool?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hacky way as of now, I have kept a pointer to this to be corrected by fetching the actual names. Will do this in a followup PR

}
return partialDruidQuery.build(
UnnestDataSource.create(
leftDataSource,
dimOrExpToUnnest,
unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0),
outputColName,
null
),
rowSignature,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ public DruidUnnestDatasourceRel asDruidConvention()
@Override
public RelWriter explainTerms(RelWriter pw)
{
return super.explainTerms(pw);
return super.explainTerms(pw)
.item("Uncollect", uncollect)
.item("druidQueryRel", druidQueryRel)
.item("unnestProject", unnestProject);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,12 @@ public void testDatabaseMetaDataTables() throws SQLException
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE6),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.USERVISITDATASOURCE),
Expand Down Expand Up @@ -633,6 +639,12 @@ public void testDatabaseMetaDataTablesAsSuperuser() throws SQLException
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE6),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.USERVISITDATASOURCE),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3316,4 +3316,140 @@ public void testUnnestWithConstant()
)
);
}

@Test
public void testUnnestRecursive()
{
skipVectorize();
cannotVectorize();
testQuery(
"with t AS (select * from druid.numfoo, unnest(MV_TO_ARRAY(dim3)) as unnested (d3))"
+ " select d4,d3 from t, UNNEST(MV_TO_ARRAY(dim4)) as foo(d4) ",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(
UnnestDataSource.create(
new QueryDataSource(
newScanQueryBuilder()
.dataSource(
UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
"dim3",
"EXPR$0",
null
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.columns(
"EXPR$0",
"__time",
"cnt",
"d1",
"d2",
"dim1",
"dim2",
"dim3",
"dim4",
"dim5",
"dim6",
"f1",
"f2",
"l1",
"l2",
"m1",
"m2",
"unique_dim1"
)
.context(QUERY_CONTEXT_DEFAULT)
.build()
),
"dim4",
"EXPR$00",
null
)
)
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.context(QUERY_CONTEXT_DEFAULT)
.columns(ImmutableList.of(
"EXPR$0",
"EXPR$00"
))
.build()
),
useDefault ?
ImmutableList.of(
new Object[]{"a", "a"},
new Object[]{"a", "b"},
new Object[]{"a", "b"},
new Object[]{"a", "c"},
new Object[]{"a", "d"},
new Object[]{"b", ""},
new Object[]{"b", ""},
new Object[]{"b", ""}
) :
ImmutableList.of(
new Object[]{"a", "a"},
new Object[]{"a", "b"},
new Object[]{"a", "b"},
new Object[]{"a", "c"},
new Object[]{"a", "d"},
new Object[]{"b", ""},
new Object[]{"b", null},
new Object[]{"b", null}
)
);
}

@Test
public void testUnnestFirstRowNull()
{
skipVectorize();

cannotVectorize();
testQuery(
"SELECT h FROM druid.unnestnumfoo, UNNEST(MV_TO_ARRAY(dim2)) as unnested (h)",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(UnnestDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE6),
"dim2",
"EXPR$0",
null
))
.intervals(querySegmentSpec(Filtration.eternity()))
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.context(QUERY_CONTEXT_DEFAULT)
.columns(ImmutableList.of(
"EXPR$0"
))
.build()
),
useDefault ?
ImmutableList.of(
new Object[]{""},
new Object[]{"a"},
new Object[]{"b"},
new Object[]{""},
new Object[]{""},
new Object[]{"a"},
new Object[]{"abc"},
new Object[]{""}
) :
ImmutableList.of(
new Object[]{null},
new Object[]{"a"},
new Object[]{"b"},
new Object[]{null},
new Object[]{""},
new Object[]{"a"},
new Object[]{"abc"},
new Object[]{null}
)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ public void testInformationSchemaTables()
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.SOME_DATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.SOMEXDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE6, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.USERVISITDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", "wikipedia", "TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE", "NO", "NO"})
Expand Down Expand Up @@ -255,6 +256,7 @@ public void testInformationSchemaTables()
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.SOME_DATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.SOMEXDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE6, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", CalciteTests.USERVISITDATASOURCE, "TABLE", "NO", "NO"})
.add(new Object[]{"druid", "wikipedia", "TABLE", "NO", "NO"})
.add(new Object[]{"INFORMATION_SCHEMA", "COLUMNS", "SYSTEM_TABLE", "NO", "NO"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ public class CalciteTests
public static final String DATASOURCE3 = "numfoo";
public static final String DATASOURCE4 = "foo4";
public static final String DATASOURCE5 = "lotsocolumns";
public static final String DATASOURCE6 = "unnestnumfoo";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better name? nested perhaps? Also, would be cool to add a comment with the schema: I find it hard to suss that out from the code.

public static final String BROADCAST_DATASOURCE = "broadcast";
public static final String FORBIDDEN_DATASOURCE = "forbiddenDatasource";
public static final String SOME_DATASOURCE = "some_datasource";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,100 @@ public Optional<Joinable> build(
public static final List<InputRow> ROWS1 =
RAW_ROWS1.stream().map(TestDataBuilder::createRow).collect(Collectors.toList());

public static final List<ImmutableMap<String, Object>> RAW_ROWS_FOR_UNNEST = ImmutableList.of(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have all the interesting corner cases? Empty arrays or objects? Null values? Fields that appear in one nested object but not another (in both orders: (a,b), (a), (a,c))? And so on. To help future readers, might be handy to add a comment above each .put( call that sets up one of these cases.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea will do

ImmutableMap.<String, Object>builder()
.put("t", "2000-01-01")
.put("m1", "1.0")
.put("m2", "1.0")
.put("d1", 1.0)
.put("f1", 1.0f)
.put("l1", 7L)
.put("dim1", "")
.put("dim3", ImmutableList.of("a", ImmutableList.of("b", "c")))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the string dimension indexer can't really handle nested arrays like this, i think you'll end up with something like "a" and then the 'toString' of ["b","c"], or maybe something even weirder...

I think you should stick to having either flat lists or single layer strings for these tests

.put("dim5", ImmutableList.of("a5", "b5"))
.put("dim6", "1")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-01")
.put("m1", "1.0")
.put("m2", "1.0")
.put("d1", 1.0)
.put("f1", 1.0f)
.put("l1", 7L)
.put("dim1", "")
.put("dim2", ImmutableList.of("a", "b"))
.put("dim3", ImmutableList.of("a", ImmutableList.of("b", "c")))
.put("dim4", ImmutableList.of("x", "y"))
.put("dim5", ImmutableList.of("c5", "d5"))
.put("dim6", "1")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-02")
.put("m1", "2.0")
.put("m2", "2.0")
.put("d1", 1.7)
.put("d2", 1.7)
.put("f1", 0.1f)
.put("f2", 0.1f)
.put("l1", 325323L)
.put("l2", 325323L)
.put("dim1", "10.1")
.put("dim2", ImmutableList.of())
.put("dim3", ImmutableList.of("b", "c"))
.put("dim4", ImmutableList.of("p", "q"))
.put("dim5", ImmutableList.of("e5", "f5"))
.put("dim6", "2")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-03")
.put("m1", "3.0")
.put("m2", "3.0")
.put("d1", 0.0)
.put("d2", 0.0)
.put("f1", 0.0)
.put("f2", 0.0)
.put("l1", 0)
.put("l2", 0)
.put("dim1", "2")
.put("dim2", ImmutableList.of(""))
.put("dim3", ImmutableList.of("d"))
.put("dim4", ImmutableList.of("", "null", ""))
.put("dim5", ImmutableList.of("a5", "b5"))
.put("dim6", "3")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2001-01-01")
.put("m1", "4.0")
.put("m2", "4.0")
.put("dim1", "1")
.put("dim2", ImmutableList.of("a"))
.put("dim3", ImmutableList.of(""))
.put("dim4", "b")
.put("dim5", "a5")
.put("dim6", "4")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2001-01-02")
.put("m1", "5.0")
.put("m2", "5.0")
.put("dim1", "def")
.put("dim2", ImmutableList.of("abc"))
.put("dim3", ImmutableList.of())
.put("dim4", "b")
.put("dim5", "b5")
.put("dim6", "5")
.build(),
ImmutableMap.<String, Object>builder()
.put("t", "2001-01-03")
.put("m1", "6.0")
.put("m2", "6.0")
.put("dim1", "abc")
.put("dim4", "b")
.put("dim5", "ab5")
.put("dim6", "6")
.build()
);

public static final List<ImmutableMap<String, Object>> RAW_ROWS1_WITH_NUMERIC_DIMS = ImmutableList.of(
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-01")
Expand Down Expand Up @@ -435,6 +529,9 @@ public Optional<Joinable> build(
public static final List<InputRow> ROWS1_WITH_NUMERIC_DIMS =
RAW_ROWS1_WITH_NUMERIC_DIMS.stream().map(raw -> createRow(raw, NUMFOO_SCHEMA)).collect(Collectors.toList());

public static final List<InputRow> ROWS_WITH_NUMERIC_DIMS_FOR_UNNEST =
RAW_ROWS_FOR_UNNEST.stream().map(raw -> createRow(raw, NUMFOO_SCHEMA)).collect(Collectors.toList());

public static final List<ImmutableMap<String, Object>> RAW_ROWS2 = ImmutableList.of(
ImmutableMap.<String, Object>builder()
.put("t", "2000-01-01")
Expand Down Expand Up @@ -811,6 +908,14 @@ public static SpecificSegmentsQuerySegmentWalker createMockWalker(
.rows(USER_VISIT_ROWS)
.buildMMappedIndex();

final QueryableIndex unnestIndex = IndexBuilder
.create()
.tmpDir(new File(tmpDir, "9"))
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(INDEX_SCHEMA_NUMERIC_DIMS)
.rows(ROWS_WITH_NUMERIC_DIMS_FOR_UNNEST)
.buildMMappedIndex();

return new SpecificSegmentsQuerySegmentWalker(
conglomerate,
injector.getInstance(LookupExtractorFactoryContainerProvider.class),
Expand Down Expand Up @@ -906,6 +1011,15 @@ public static SpecificSegmentsQuerySegmentWalker createMockWalker(
.size(0)
.build(),
userVisitIndex
).add(
DataSegment.builder()
.dataSource(CalciteTests.DATASOURCE6)
.interval(indexNumericDims.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build(),
unnestIndex
).add(
DataSegment.builder()
.dataSource("wikipedia")
Expand Down