Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -712,13 +712,13 @@ public InputRawSupplierColumnSelectorStrategy makeColumnSelectorStrategy(
return new StringInputRawSupplierColumnSelectorStrategy();
case LONG:
return (InputRawSupplierColumnSelectorStrategy<BaseLongColumnValueSelector>)
columnSelector -> columnSelector::getLong;
columnSelector -> () -> columnSelector.isNull() ? null : columnSelector.getLong();
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, this change could cause a dip in performance when columns are actually strings and being read as a number. Since the parsing first happens in isNull function and then again in getLong

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, the selectors themselves should ideally cache this computation, similar to the changes being made in #10614. Therefore, I think this change is OK, and if there are any issues it should be fixed at the selector level.

case FLOAT:
return (InputRawSupplierColumnSelectorStrategy<BaseFloatColumnValueSelector>)
columnSelector -> columnSelector::getFloat;
columnSelector -> () -> columnSelector.isNull() ? null : columnSelector.getFloat();
case DOUBLE:
return (InputRawSupplierColumnSelectorStrategy<BaseDoubleColumnValueSelector>)
columnSelector -> columnSelector::getDouble;
columnSelector -> () -> columnSelector.isNull() ? null : columnSelector.getDouble();
default:
throw new IAE("Cannot create query type helper from invalid type [%s]", type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,27 +408,10 @@ public Sequence<ResultRow> processSubtotalsSpec(
// Dimension spec including dimension name and output name
final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
final List<DimensionSpec> dimensions = query.getDimensions();
final List<DimensionSpec> newDimensions = new ArrayList<>();

for (int i = 0; i < dimensions.size(); i++) {
DimensionSpec dimensionSpec = dimensions.get(i);
for (DimensionSpec dimensionSpec : dimensions) {
if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
newDimensions.add(
new DefaultDimensionSpec(
dimensionSpec.getOutputName(),
dimensionSpec.getOutputName(),
dimensionSpec.getOutputType()
)
);
subTotalDimensionSpec.add(dimensionSpec);
} else {
// Insert dummy dimension so all subtotals queries have ResultRows with the same shape.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this concern no longer valid?

IIRC, it was necessary because otherwise the ResultRows would be different lengths and so the final results wouldn't be correct.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/apache/druid/blob/master/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java#L581

We are still keeping all the original dimensions in the query. So result row size should be the same. I think you were concerned that the result should be null for dimensions not part of the subtotal. We are not carrying over the result for those dimensions so it should work out.
https://github.com/apache/druid/blob/master/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java#L593

// Use a field name that does not appear in the main query result, to assure the result will be null.
String dimName = "_" + i;
while (query.getResultRowSignature().indexOf(dimName) >= 0) {
dimName = "_" + dimName;
}
newDimensions.add(DefaultDimensionSpec.of(dimName));
}
}

Expand All @@ -442,8 +425,7 @@ public Sequence<ResultRow> processSubtotalsSpec(
}

GroupByQuery subtotalQuery = baseSubtotalQuery
.withLimitSpec(subtotalQueryLimitSpec)
.withDimensionSpecs(newDimensions);
.withLimitSpec(subtotalQueryLimitSpec);

final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
import org.apache.druid.query.extraction.JavaScriptExtractionFn;
import org.apache.druid.query.extraction.MapLookupExtractor;
import org.apache.druid.query.extraction.RegexDimExtractionFn;
import org.apache.druid.query.extraction.SearchQuerySpecDimExtractionFn;
import org.apache.druid.query.extraction.StringFormatExtractionFn;
import org.apache.druid.query.extraction.StrlenExtractionFn;
import org.apache.druid.query.extraction.TimeFormatExtractionFn;
Expand Down Expand Up @@ -9563,6 +9564,92 @@ public void testGroupByNestedWithInnerQueryNumerics()
TestHelper.assertExpectedObjects(expectedResults, results, "numerics");
}

@Test
public void testGroupByNestedWithInnerQueryOutputNullNumerics()
{
cannotVectorize();

if (config.getDefaultStrategy().equals(GroupByStrategySelector.STRATEGY_V1)) {
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage("GroupBy v1 only supports dimensions with an outputType of STRING.");
}

// Following extractionFn will generate null value for one kind of quality
ExtractionFn extractionFn = new SearchQuerySpecDimExtractionFn(new ContainsSearchQuerySpec("1200", false));
GroupByQuery subquery = makeQueryBuilder()
.setDataSource(QueryRunnerTestHelper.DATA_SOURCE)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec("quality", "alias"),
new ExtractionDimensionSpec("qualityLong", "ql_alias", ValueType.LONG, extractionFn),
new ExtractionDimensionSpec("qualityFloat", "qf_alias", ValueType.FLOAT, extractionFn),
new ExtractionDimensionSpec("qualityDouble", "qd_alias", ValueType.DOUBLE, extractionFn)
)
.setDimFilter(
new InDimFilter(
"quality",
Arrays.asList("entertainment", "business"),
null
)
).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index"))
.setGranularity(QueryRunnerTestHelper.DAY_GRAN)
.build();

GroupByQuery outerQuery = makeQueryBuilder()
.setDataSource(subquery)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec("ql_alias", "quallong", ValueType.LONG),
new DefaultDimensionSpec("qf_alias", "qualfloat", ValueType.FLOAT),
new DefaultDimensionSpec("qd_alias", "qualdouble", ValueType.DOUBLE)
)
.setAggregatorSpecs(
new LongSumAggregatorFactory("ql_alias_sum", "ql_alias"),
new DoubleSumAggregatorFactory("qf_alias_sum", "qf_alias"),
new DoubleSumAggregatorFactory("qd_alias_sum", "qd_alias")
)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(
outerQuery,
"2011-04-01",
"quallong",
NullHandling.defaultLongValue(),
"qualfloat",
NullHandling.defaultFloatValue(),
"qualdouble",
NullHandling.defaultDoubleValue(),
"ql_alias_sum",
NullHandling.defaultLongValue(),
"qf_alias_sum",
NullHandling.defaultFloatValue(),
"qd_alias_sum",
NullHandling.defaultDoubleValue()
),
makeRow(
outerQuery,
"2011-04-01",
"quallong",
1200L,
"qualfloat",
12000.0,
"qualdouble",
12000.0,
"ql_alias_sum",
2400L,
"qf_alias_sum",
24000.0,
"qd_alias_sum",
24000.0
)
);

Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, outerQuery);
TestHelper.assertExpectedObjects(expectedResults, results, "numerics");
}

@Test
public void testGroupByNestedWithInnerQueryNumericsWithLongTime()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12159,23 +12159,23 @@ public void testGroupingAggregatorWithPostAggregator() throws Exception
List<Object[]> resultList;
if (NullHandling.sqlCompatible()) {
resultList = ImmutableList.of(
new Object[]{NULL_STRING, 2L, 0L, "INDIVIDUAL"},
new Object[]{"", 1L, 0L, "INDIVIDUAL"},
new Object[]{"a", 2L, 0L, "INDIVIDUAL"},
new Object[]{"abc", 1L, 0L, "INDIVIDUAL"},
new Object[]{NULL_STRING, 2L, 0L, NULL_STRING},
new Object[]{"", 1L, 0L, ""},
new Object[]{"a", 2L, 0L, "a"},
new Object[]{"abc", 1L, 0L, "abc"},
new Object[]{NULL_STRING, 6L, 1L, "ALL"}
);
} else {
resultList = ImmutableList.of(
new Object[]{"", 3L, 0L, "INDIVIDUAL"},
new Object[]{"a", 2L, 0L, "INDIVIDUAL"},
new Object[]{"abc", 1L, 0L, "INDIVIDUAL"},
new Object[]{"", 3L, 0L, ""},
new Object[]{"a", 2L, 0L, "a"},
new Object[]{"abc", 1L, 0L, "abc"},
new Object[]{NULL_STRING, 6L, 1L, "ALL"}
);
}
testQuery(
"SELECT dim2, SUM(cnt), GROUPING(dim2), \n"
+ "CASE WHEN GROUPING(dim2) = 1 THEN 'ALL' ELSE 'INDIVIDUAL' END\n"
+ "CASE WHEN GROUPING(dim2) = 1 THEN 'ALL' ELSE dim2 END\n"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you change this test case? (As opposed to introducing a new test case.)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wrote this test when I submitted the patch for the grouping function. I had wanted to write it this way (as is in PR) but couldn't because of the post-aggregation bug. Now changing it as I am fixing the bug. BTW There are two more tests for the grouping function.

+ "FROM druid.foo\n"
+ "GROUP BY GROUPING SETS ( (dim2), () )",
ImmutableList.of(
Expand All @@ -12200,7 +12200,7 @@ public void testGroupingAggregatorWithPostAggregator() throws Exception
)
.setPostAggregatorSpecs(Collections.singletonList(new ExpressionPostAggregator(
"p0",
"case_searched((\"a1\" == 1),'ALL','INDIVIDUAL')",
"case_searched((\"a1\" == 1),'ALL',\"d0\")",
null,
ExprMacroTable.nil()
)))
Expand Down