Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
import org.apache.druid.query.groupby.epinephelinae.column.NullableValueGroupByColumnSelectorStrategy;
import org.apache.druid.query.groupby.epinephelinae.column.StringGroupByColumnSelectorStrategy;
import org.apache.druid.query.groupby.strategy.GroupByStrategyV2;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
Expand Down Expand Up @@ -115,14 +117,6 @@ public static Sequence<Row> process(
null
);

final boolean allSingleValueDims = query
.getDimensions()
.stream()
.allMatch(dimension -> {
final ColumnCapabilities columnCapabilities = storageAdapter.getColumnCapabilities(dimension.getDimension());
return columnCapabilities != null && !columnCapabilities.hasMultipleValues();
});

final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take();

final String fudgeTimestampString = NullHandling.emptyToNullIfNeeded(
Expand All @@ -140,18 +134,38 @@ public static Sequence<Row> process(
@Override
public GroupByEngineIterator make()
{
final ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();
final boolean allSingleValueDims = query
.getDimensions()
.stream()
.allMatch(dimension -> {
final ColumnCapabilities columnCapabilities = columnSelectorFactory.getColumnCapabilities(
dimension.getDimension()
);
return columnCapabilities != null && !columnCapabilities.hasMultipleValues();
});

ColumnSelectorPlus<GroupByColumnSelectorStrategy>[] selectorPlus = DimensionHandlerUtils
.createColumnSelectorPluses(
STRATEGY_FACTORY,
query.getDimensions(),
cursor.getColumnSelectorFactory()
columnSelectorFactory
);
GroupByColumnSelectorPlus[] dims = createGroupBySelectorPlus(selectorPlus);

final ByteBuffer buffer = bufferHolder.get();

// Check array-based aggregation is applicable
if (isArrayAggregateApplicable(querySpecificConfig, query, dims, storageAdapter, buffer)) {
// Check if array-based aggregation is applicable
final boolean useArrayAggregation = isArrayAggregateApplicable(
querySpecificConfig,
query,
dims,
storageAdapter,
query.getVirtualColumns(),
buffer
);

if (useArrayAggregation) {
return new ArrayAggregateIterator(
query,
querySpecificConfig,
Expand Down Expand Up @@ -191,6 +205,7 @@ private static boolean isArrayAggregateApplicable(
GroupByQuery query,
GroupByColumnSelectorPlus[] dims,
StorageAdapter storageAdapter,
VirtualColumns virtualColumns,
ByteBuffer buffer
)
{
Expand All @@ -206,11 +221,19 @@ private static boolean isArrayAggregateApplicable(
columnCapabilities = null;
cardinality = 1;
} else if (dims.length == 1) {
// Only real columns can use array-based aggregation, since virtual columns cannot currently report their
// cardinality. We need to check if a virtual column exists with the same name, since virtual columns can shadow
// real columns, and we might miss that since we're going directly to the StorageAdapter (which only knows about
// real columns).
if (virtualColumns.exists(dims[0].getName())) {
return false;
}

columnCapabilities = storageAdapter.getColumnCapabilities(dims[0].getName());
cardinality = storageAdapter.getDimensionCardinality(dims[0].getName());
} else {
columnCapabilities = null;
cardinality = -1; // ArrayAggregateIterator is not available
// Cannot use array-based aggregation with more than one dimension.
return false;
}

// Choose array-based aggregation if the grouping key is a single string dimension of a
Expand All @@ -225,11 +248,11 @@ private static boolean isArrayAggregateApplicable(
aggregatorFactories
);

// Check that all keys and aggregated values can be contained the buffer
// Check that all keys and aggregated values can be contained in the buffer
return requiredBufferCapacity <= buffer.capacity();
} else {
return false;
}

return false;
}

private static class GroupByStrategyFactory implements ColumnSelectorStrategyFactory<GroupByColumnSelectorStrategy>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ public interface StorageAdapter extends CursorFactory
* the column does exist but the capabilities are unknown. The latter is possible with dynamically discovered
* columns.
*
* Note that StorageAdapters are representations of "real" segments, so they are not aware of any virtual columns
* that may be involved in a query. In general, query engines should instead use the method
* {@link ColumnSelectorFactory#getColumnCapabilities(String)}, which returns capabilities for virtual columns as
* well.
*
* @param column column name
*
* @return capabilities, or null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ public ColumnValueSelector<?> makeColumnValueSelector(String columnName, ColumnS
@Override
public ColumnCapabilities capabilities(String columnName)
{
return new ColumnCapabilitiesImpl().setType(outputType);
// Note: Ideally we would only "setHasMultipleValues(true)" if the expression in question could potentially return
// multiple values. However, we don't currently have a good way of determining this, so to be safe we always
// set the flag.
return new ColumnCapabilitiesImpl().setType(outputType).setHasMultipleValues(true);
}

@Override
Expand Down