Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.GenericIndexed;
Expand Down Expand Up @@ -195,7 +196,7 @@ public CloseableIndexed<String> getDimensionValues(String dimension)
}

@Override
public boolean hasMultipleValues(final String dimension)
public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
{
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.query.filter.DruidLongPredicate;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.GenericIndexed;
Expand Down Expand Up @@ -166,7 +167,7 @@ public CloseableIndexed<String> getDimensionValues(String dimension)
}

@Override
public boolean hasMultipleValues(final String dimension)
public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
{
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.BitmapSerdeFactory;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.GenericIndexed;
Expand Down Expand Up @@ -166,7 +167,7 @@ public CloseableIndexed<String> getDimensionValues(String dimension)
}

@Override
public boolean hasMultipleValues(final String dimension)
public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
{
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.collections.spatial.ImmutableRTree;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.CloseableIndexed;

import javax.annotation.Nullable;
Expand All @@ -35,7 +36,7 @@ public interface BitmapIndexSelector
@MustBeClosed
@Nullable
CloseableIndexed<String> getDimensionValues(String dimension);
boolean hasMultipleValues(String dimension);
ColumnCapabilities.Capable hasMultipleValues(String dimension);
int getNumRows();
BitmapFactory getBitmapFactory();
@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,13 @@ public static int getCardinalityForArrayAggregation(
/**
* Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column
* selectors will show up as full of nulls they are effectively single valued, however they can also be null during
* broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonexistent' is sort of a hack to let
* broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let
* the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns.
*/
public static boolean isAllSingleValueDims(
final Function<String, ColumnCapabilities> capabilitiesFunction,
final List<DimensionSpec> dimensions,
final boolean missingMeansNonexistent
final boolean missingMeansNonExistent
)
{
return dimensions
Expand All @@ -342,8 +342,8 @@ public static boolean isAllSingleValueDims(

// Now check column capabilities.
final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
return (columnCapabilities != null && !columnCapabilities.hasMultipleValues()) ||
(missingMeansNonexistent && columnCapabilities == null);
return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
(missingMeansNonExistent && columnCapabilities == null);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.serde.ComplexMetricSerde;
import org.apache.druid.segment.serde.ComplexMetrics;
import org.joda.time.DateTime;
Expand Down Expand Up @@ -101,9 +102,18 @@ public Map<String, ColumnAnalysis> analyze(Segment segment)

for (String columnName : columnNames) {
final ColumnHolder columnHolder = index == null ? null : index.getColumnHolder(columnName);
final ColumnCapabilities capabilities = columnHolder != null
? columnHolder.getCapabilities()
: storageAdapter.getColumnCapabilities(columnName);
final ColumnCapabilities capabilities;
if (columnHolder != null) {
capabilities = columnHolder.getCapabilities();
} else {
// this can be removed if we get to the point where IncrementalIndexStorageAdapter.getColumnCapabilities
// accurately reports the capabilities
if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
} else {
capabilities = storageAdapter.getColumnCapabilities(columnName);
}
}

final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
Expand Down Expand Up @@ -138,7 +148,7 @@ public Map<String, ColumnAnalysis> analyze(Segment segment)
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(ColumnHolder.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
timeCapabilities = ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
}
columns.put(
ColumnHolder.TIME_COLUMN_NAME,
Expand Down Expand Up @@ -172,7 +182,7 @@ private ColumnAnalysis analyzeNumericColumn(
long size = 0;

if (analyzingSize()) {
if (capabilities.hasMultipleValues()) {
if (capabilities.hasMultipleValues().isTrue()) {
return ColumnAnalysis.error("multi_value");
}

Expand All @@ -181,7 +191,7 @@ private ColumnAnalysis analyzeNumericColumn(

return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
capabilities.hasMultipleValues().isTrue(),
size,
null,
null,
Expand Down Expand Up @@ -231,7 +241,7 @@ private ColumnAnalysis analyzeStringColumn(

return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
capabilities.hasMultipleValues().isTrue(),
size,
analyzingCardinality() ? cardinality : 0,
min,
Expand Down Expand Up @@ -308,7 +318,7 @@ public Long accumulate(Long accumulated, Cursor cursor)

return new ColumnAnalysis(
capabilities.getType().name(),
capabilities.hasMultipleValues(),
capabilities.hasMultipleValues().isTrue(),
size,
cardinality,
min,
Expand All @@ -324,7 +334,7 @@ private ColumnAnalysis analyzeComplexColumn(
)
{
try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) {
final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues();
final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue();
long size = 0;

if (analyzingSize() && complexColumn != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,6 @@ private static <T> T makeProcessorInternal(
*/
private static boolean mayBeMultiValue(@Nullable final ColumnCapabilities capabilities)
{
return capabilities == null || !capabilities.isComplete() || capabilities.hasMultipleValues();
return capabilities == null || capabilities.hasMultipleValues().isMaybeTrue();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.NumericColumn;
Expand Down Expand Up @@ -157,14 +158,18 @@ public void close() throws IOException
}

@Override
public boolean hasMultipleValues(final String dimension)
public ColumnCapabilities.Capable hasMultipleValues(final String dimension)
{
if (isVirtualColumn(dimension)) {
return virtualColumns.getVirtualColumn(dimension).capabilities(dimension).hasMultipleValues();
}

final ColumnHolder columnHolder = index.getColumnHolder(dimension);
return columnHolder != null && columnHolder.getCapabilities().hasMultipleValues();
// if ColumnHolder is null, the column doesn't exist, but report as not having multiple values so that
// the empty bitmap will be used
return columnHolder != null
? columnHolder.getCapabilities().hasMultipleValues()
: ColumnCapabilities.Capable.FALSE;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,23 @@ public static <T> T makeVectorProcessor(
final VectorColumnSelectorFactory selectorFactory
)
{
final ColumnCapabilities capabilities = getEffectiveCapabilities(
final ColumnCapabilities originalCapabilities =
selectorFactory.getColumnCapabilities(dimensionSpec.getDimension());

final ColumnCapabilities effectiveCapabilites = getEffectiveCapabilities(
dimensionSpec,
selectorFactory.getColumnCapabilities(dimensionSpec.getDimension())
originalCapabilities
);

final ValueType type = capabilities.getType();
final ValueType type = effectiveCapabilites.getType();

// vector selectors should never have null column capabilities, these signify a non-existent column, and complex
// columns should never be treated as a multi-value column, so always use single value string processor
final boolean forceSingleValue =
originalCapabilities == null || ValueType.COMPLEX.equals(originalCapabilities.getType());

if (type == ValueType.STRING) {
if (capabilities.hasMultipleValues()) {
if (!forceSingleValue && effectiveCapabilites.hasMultipleValues().isMaybeTrue()) {
return strategyFactory.makeMultiValueDimensionProcessor(
selectorFactory.makeMultiValueDimensionSelector(dimensionSpec)
);
Expand Down Expand Up @@ -328,7 +336,7 @@ public static <T> T makeVectorProcessor(
selectorFactory.makeValueSelector(dimensionSpec.getDimension())
);
} else {
throw new ISE("Unsupported type[%s]", capabilities.getType());
throw new ISE("Unsupported type[%s]", effectiveCapabilites.getType());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,19 @@ public interface DimensionIndexer
*/
EncodedKeyComponentType processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions);

/**
* This method will be called while building an {@link IncrementalIndex} whenever a known dimension column (either
* through an explicit schema on the ingestion spec, or auto-discovered while processing rows) is absent in any row
* that is processed, to allow an indexer to account for any missing rows if necessary. Useful so that a string
* {@link DimensionSelector} built on top of an {@link IncrementalIndex} may accurately report
* {@link DimensionSelector#nameLookupPossibleInAdvance()} by allowing it to track if it has any implicit null valued
* rows.
*
* At index persist/merge time all missing columns for a row will be explicitly replaced with the value appropriate
* null or default value.
*/
void setSparseIndexed();

/**
* Gives the estimated size in bytes for the given key
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVa
return DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions);
}

@Override
public void setSparseIndexed()
{
// no-op, double columns do not have a dictionary to track null values
}

@Override
public long estimateEncodedKeyComponentSize(Double key)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimVal
return DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions);
}

@Override
public void setSparseIndexed()
{
// no-op, float columns do not have a dictionary to track null values
}

@Override
public long estimateEncodedKeyComponentSize(Float key)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ private File makeIndexFiles(
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilitiesImpl> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);

final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
Expand Down Expand Up @@ -716,18 +716,22 @@ private void mergeCapabilities(
final List<String> mergedDimensions,
final Map<String, ValueType> metricsValueTypes,
final Map<String, String> metricTypeNames,
final List<ColumnCapabilitiesImpl> dimCapabilities
final List<ColumnCapabilities> dimCapabilities
)
{
final Map<String, ColumnCapabilitiesImpl> capabilitiesMap = new HashMap<>();
final Map<String, ColumnCapabilities> capabilitiesMap = new HashMap<>();
for (IndexableAdapter adapter : adapters) {
for (String dimension : adapter.getDimensionNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
capabilitiesMap.computeIfAbsent(dimension, d -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities);
capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
ColumnCapabilitiesImpl.snapshot(capabilities)
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
}
for (String metric : adapter.getMetricNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(metric);
capabilitiesMap.computeIfAbsent(metric, m -> new ColumnCapabilitiesImpl().setIsComplete(true)).merge(capabilities);
capabilitiesMap.compute(metric, (m, existingCapabilities) ->
ColumnCapabilitiesImpl.snapshot(capabilities)
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
metricsValueTypes.put(metric, capabilities.getType());
metricTypeNames.put(metric, adapter.getMetricType(metric));
}
Expand Down Expand Up @@ -1002,7 +1006,7 @@ public File append(

private Map<String, DimensionHandler> makeDimensionHandlers(
final List<String> mergedDimensions,
final List<ColumnCapabilitiesImpl> dimCapabilities
final List<ColumnCapabilities> dimCapabilities
)
{
Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValu
return DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions);
}

@Override
public void setSparseIndexed()
{
// no-op, long columns do not have a dictionary to track null values
}

@Override
public long estimateEncodedKeyComponentSize(Long key)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static ColumnCapabilities getColumnCapabilities(
{
if (ColumnHolder.TIME_COLUMN_NAME.equals(columnName)) {
// TIME_COLUMN_NAME is handled specially; override the provided rowSignature.
return new ColumnCapabilitiesImpl().setType(ValueType.LONG).setIsComplete(true);
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
} else {
final ValueType valueType = rowSignature.getColumnType(columnName).orElse(null);

Expand All @@ -105,12 +105,13 @@ static ColumnCapabilities getColumnCapabilities(
// causes expression selectors to always treat us as arrays. If we might have multiple values (i.e. if our type
// is nonnumeric), set isComplete false to compensate.
if (valueType != null) {
if (valueType.isNumeric()) {
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(valueType);
}
return new ColumnCapabilitiesImpl()
.setType(valueType)
.setDictionaryValuesUnique(false)
.setDictionaryValuesSorted(false)
// Numeric types should be reported as complete, but not STRING or COMPLEX (because we don't have full info)
.setIsComplete(valueType.isNumeric());
.setDictionaryValuesSorted(false);
} else {
return null;
}
Expand Down
Loading