From 2365aded617bcec332f4901e9eccdcf04e07eafe Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 11 Sep 2020 18:33:53 -0700 Subject: [PATCH 1/7] allow vectorized query engines to utilize vectorized virtual column implementations --- .../vector/VectorGroupByEngine.java | 40 ++- .../QueryableIndexCursorSequenceBuilder.java | 51 ++- .../segment/QueryableIndexStorageAdapter.java | 5 +- .../apache/druid/segment/VirtualColumn.java | 128 ++++++- .../apache/druid/segment/VirtualColumns.java | 178 ++++++++-- ...yableIndexVectorColumnSelectorFactory.java | 229 ++++++++---- .../AlwaysTwoCounterAggregatorFactory.java | 249 +++++++++++++ .../AlwaysTwoVectorizedVirtualColumn.java | 327 ++++++++++++++++++ .../virtual/VectorizedVirtualColumnTest.java | 304 ++++++++++++++++ 9 files changed, 1390 insertions(+), 121 deletions(-) create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java create mode 100644 processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 99f14ae6c3c4..be8bf9f9abfe 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -42,6 +42,8 @@ import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.vector.VectorColumnSelectorFactory; import org.apache.druid.segment.vector.VectorCursor; @@ -55,6 +57,7 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.function.Function; import java.util.stream.Collectors; public class VectorGroupByEngine @@ -82,12 +85,47 @@ public static boolean canVectorize( // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we // won't have to worry about having this all-single-value-dims check here. - return GroupByQueryEngineV2.isAllSingleValueDims(adapter::getColumnCapabilities, query.getDimensions(), true) + Function capabilitiesFunction = name -> + query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, name); + + return canVectorizeDimensions(capabilitiesFunction, query.getDimensions()) && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize) && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(adapter)) && adapter.canVectorize(filter, query.getVirtualColumns(), false); } + public static boolean canVectorizeDimensions( + final Function capabilitiesFunction, + final List dimensions + ) + { + return dimensions + .stream() + .allMatch( + dimension -> { + if (dimension.mustDecorate()) { + // group by on multi value dimensions are not currently supported + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } + + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); + // null here currently means the column does not exist, nil columns can be vectorized + if (columnCapabilities == null) { + return true; + } + // strings must be single valued, dictionary encoded, and have unique dictionary entries + if (ValueType.STRING.equals(columnCapabilities.getType())) { + return columnCapabilities.hasMultipleValues().isFalse() && + columnCapabilities.isDictionaryEncoded().isTrue() && + columnCapabilities.areDictionaryValuesUnique().isTrue(); + } + return columnCapabilities.hasMultipleValues().isFalse(); + }); + } + public static Sequence process( final GroupByQuery query, final StorageAdapter storageAdapter, diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java index dfd2fa624a97..e1a6c6d7c80a 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java @@ -193,7 +193,6 @@ public Cursor apply(final Interval inputInterval) public VectorCursor buildVectorized(final int vectorSize) { // Sanity check - matches QueryableIndexStorageAdapter.canVectorize - Preconditions.checkState(virtualColumns.size() == 0, "virtualColumns.size == 0"); Preconditions.checkState(!descending, "!descending"); final Map columnCache = new HashMap<>(); @@ -229,17 +228,15 @@ public VectorCursor buildVectorized(final int vectorSize) ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset); + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory baseColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset( + columnCache, + baseOffset, + closer + ); if (postFilter == null) { - return new QueryableIndexVectorCursor(index, baseOffset, closer, columnCache, vectorSize); + return new QueryableIndexVectorCursor(baseColumnSelectorFactory, baseOffset, vectorSize, closer); } else { - // baseColumnSelectorFactory using baseOffset is the column selector for filtering. - final VectorColumnSelectorFactory baseColumnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory( - index, - baseOffset, - closer, - columnCache - ); - final VectorOffset filteredOffset = FilteredVectorOffset.create( baseOffset, baseColumnSelectorFactory, @@ -254,10 +251,31 @@ public VectorCursor buildVectorized(final int vectorSize) // object will get hit twice for some of the values (anything that matched the filter). This is probably most // noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed // this directly but it seems possible in principle. - return new QueryableIndexVectorCursor(index, filteredOffset, closer, columnCache, vectorSize); + // baseColumnSelectorFactory using baseOffset is the column selector for filtering. + final VectorColumnSelectorFactory filteredColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset( + columnCache, + filteredOffset, + closer + ); + return new QueryableIndexVectorCursor(filteredColumnSelectorFactory, filteredOffset, vectorSize, closer); } } + VectorColumnSelectorFactory makeVectorColumnSelectorFactoryForOffset( + Map columnCache, + VectorOffset baseOffset, + Closer closer + ) + { + return new QueryableIndexVectorColumnSelectorFactory( + index, + baseOffset, + closer, + columnCache, + virtualColumns + ); + } + /** * Search the time column using binary search. Benchmarks on various other approaches (linear search, binary * search that switches to linear at various closeness thresholds) indicated that a pure binary search worked best. @@ -318,17 +336,16 @@ private static class QueryableIndexVectorCursor implements VectorCursor private final VectorColumnSelectorFactory columnSelectorFactory; public QueryableIndexVectorCursor( - final QueryableIndex index, + final VectorColumnSelectorFactory vectorColumnSelectorFactory, final VectorOffset offset, - final Closer closer, - final Map columnCache, - final int vectorSize + final int vectorSize, + final Closer closer ) { + this.columnSelectorFactory = vectorColumnSelectorFactory; + this.vectorSize = vectorSize; this.offset = offset; this.closer = closer; - this.vectorSize = vectorSize; - this.columnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(index, offset, closer, columnCache); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java index e9aa01e07c9e..2ab9e7a536aa 100644 --- a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java @@ -219,9 +219,8 @@ public boolean canVectorize( } } - // 1) Virtual columns can't vectorize yet - // 2) Vector cursors can't iterate backwards yet - return virtualColumns.size() == 0 && !descending; + // vector cursors can't iterate backwards yet + return virtualColumns.canVectorize(this) && !descending; } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index 217f72f83627..a59212f537c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -26,14 +26,21 @@ import org.apache.druid.segment.column.BitmapIndex; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import javax.annotation.Nullable; import java.util.List; /** - * Virtual columns are "views" created over a ColumnSelectorFactory or ColumnSelector. They can potentially draw from multiple - * underlying columns, although they always present themselves as if they were a single column. + * Virtual columns are "views" created over a {@link ColumnSelectorFactory} or {@link ColumnSelector}. They can + * potentially draw from multiple underlying columns, although they always present themselves as if they were a single + * column. * * A virtual column object will be shared amongst threads and must be thread safe. The selectors returned * from the various makeXXXSelector methods need not be thread safe. @@ -113,6 +120,122 @@ default ColumnValueSelector makeColumnValueSelector(String columnName, Column return null; } + default boolean canVectorize(ColumnInspector inspector) + { + return false; + } + + /** + * Build a vectorized dictionary indexed selector corresponding to this virtual column. Also provides the name that + * the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful if this + * column uses dot notation. The virtual column is expected to apply any necessary decoration from the dimensionSpec. + * + * @param dimensionSpec the dimensionSpec this column was referenced with + * @param factory vector column selector factory + * + * @return the selector, must not be null + */ + default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by + * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this + * method has full access to underlying column and can potentially provide a more efficient implementation. + * + * @param dimensionSpec + * @param columnSelector + * @param offset + * @return the selector + */ + @SuppressWarnings("unused") + @Nullable + default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + /** + * Build a vectorized 'multi-valued' dictionary indexed selector corresponding to this virtual column. Also provides + * the name that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful + * if this column uses dot notation. The virtual column is expected to apply any necessary decoration from the + * dimensionSpec. + * + * @param dimensionSpec the dimensionSpec this column was referenced with + * @param factory vector column selector factory + * + * @return the selector, must not be null + */ + default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + throw new UnsupportedOperationException("not supported"); + } + + /** + * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by + * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this + * method has full access to underlying column and can potentially provide a more efficient implementation. + * + * @param dimensionSpec + * @param columnSelector + * @param offset + * @return the selector + */ + @SuppressWarnings("unused") + @Nullable + default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + + default VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) + { + throw new UnsupportedOperationException("not supported"); + } + + @Nullable + default VectorValueSelector makeVectorValueSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + + + default VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) + { + throw new UnsupportedOperationException("not supported"); + } + + @Nullable + default VectorObjectSelector makeVectorObjectSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + return null; + } + /** * Returns the capabilities of this virtual column, which includes a type that corresponds to the best * performing base selector supertype (e. g. {@link BaseLongColumnValueSelector}) of the object, returned from @@ -160,4 +283,5 @@ default BitmapIndex getBitmapIndex(String columnName, ColumnSelector selector) { throw new UnsupportedOperationException("not supported"); } + } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 7a8d7d4fa8fb..5cfc2912aab4 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -35,6 +35,12 @@ import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.data.ReadableOffset; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorOffset; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; import org.apache.druid.segment.virtual.VirtualizedColumnSelectorFactory; import javax.annotation.Nullable; @@ -152,6 +158,20 @@ public VirtualColumn getVirtualColumn(String columnName) return withDotSupport.get(baseColumnName); } + @Nullable + public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector) + { + final VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } else { + return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( + columnName, + columnSelector + ) : null; + } + } + /** * Create a dimension (string) selector. * @@ -175,71 +195,182 @@ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, Colu } @Nullable - public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector) + public DimensionSelector makeDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); + } else { + return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); + } + } + + /** + * Create a column value selector. + * + * @param columnName column mame + * @param factory base column selector factory + * + * @return selector + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ + public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) { final VirtualColumn virtualColumn = getVirtualColumn(columnName); if (virtualColumn == null) { throw new IAE("No such virtual column[%s]", columnName); } else { - return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( - columnName, - columnSelector - ) : null; + final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } } @Nullable - public DimensionSelector makeDimensionSelector( - DimensionSpec dimensionSpec, + public ColumnValueSelector makeColumnValueSelector( + String columnName, ColumnSelector columnSelector, ReadableOffset offset ) + { + final VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } else { + return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); + } + } + + public boolean canVectorize(ColumnInspector columnInspector) + { + return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(columnInspector)); + } + + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) { final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); if (virtualColumn == null) { throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); } else { - return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); + final SingleValueDimensionVectorSelector selector = virtualColumn.makeSingleValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; } } @Nullable - public ColumnValueSelector makeColumnValueSelector( + public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); + } else { + return virtualColumn.makeSingleValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); + } + } + + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); + } else { + final MultiValueDimensionVectorSelector selector = virtualColumn.makeMultiValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + } + + @Nullable + public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); + } else { + return virtualColumn.makeMultiValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); + } + } + + public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) + { + final VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } else { + final VectorValueSelector selector = virtualColumn.makeVectorValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; + } + } + + @Nullable + public VectorValueSelector makeVectorValueSelector( String columnName, ColumnSelector columnSelector, - ReadableOffset offset + ReadableVectorOffset offset ) { final VirtualColumn virtualColumn = getVirtualColumn(columnName); if (virtualColumn == null) { throw new IAE("No such virtual column[%s]", columnName); } else { - return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); + return virtualColumn.makeVectorValueSelector(columnName, columnSelector, offset); } } - /** - * Create a column value selector. - * - * @param columnName column mame - * @param factory base column selector factory - * - * @return selector - * - * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} - */ - public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) + public VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) { final VirtualColumn virtualColumn = getVirtualColumn(columnName); if (virtualColumn == null) { throw new IAE("No such virtual column[%s]", columnName); } else { - final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); + final VectorObjectSelector selector = virtualColumn.makeVectorObjectSelector(columnName, factory); Preconditions.checkNotNull(selector, "selector"); return selector; } } + @Nullable + public VectorObjectSelector makeVectorObjectSelector( + String columnName, + ColumnSelector columnSelector, + ReadableVectorOffset offset + ) + { + final VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } else { + return virtualColumn.makeVectorObjectSelector(columnName, columnSelector, offset); + } + } + @Nullable public ColumnCapabilities getColumnCapabilities(String columnName) { @@ -339,4 +470,5 @@ public String toString() { return virtualColumns.toString(); } + } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 48c56c9ef6df..05981aaed324 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -24,6 +24,7 @@ import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndexStorageAdapter; +import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.column.BaseColumn; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnHolder; @@ -33,9 +34,11 @@ import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; +import java.util.function.Function; public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSelectorFactory { + private final VirtualColumns virtualColumns; private final QueryableIndex index; private final ReadableVectorOffset offset; private final Closer closer; @@ -52,12 +55,14 @@ public QueryableIndexVectorColumnSelectorFactory( final QueryableIndex index, final ReadableVectorOffset offset, final Closer closer, - final Map columnCache + final Map columnCache, + final VirtualColumns virtualColumns ) { this.index = index; this.offset = offset; this.closer = closer; + this.virtualColumns = virtualColumns; this.columnCache = columnCache; this.singleValueDimensionSelectorCache = new HashMap<>(); this.multiValueDimensionSelectorCache = new HashMap<>(); @@ -77,34 +82,54 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D if (!dimensionSpec.canVectorize()) { throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); } - - return multiValueDimensionSelectorCache.computeIfAbsent( - dimensionSpec, - spec -> { - final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); - if (holder == null - || holder.getCapabilities().isDictionaryEncoded().isFalse() - || holder.getCapabilities().getType() != ValueType.STRING - || holder.getCapabilities().hasMultipleValues().isFalse()) { - throw new ISE( - "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", - spec.getDimension() - ); - } - - @SuppressWarnings("unchecked") - final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) - getCachedColumn(spec.getDimension()); - - // dictionaryEncodedColumn is not null because of holder null check above - assert dictionaryEncodedColumn != null; - final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( - offset - ); - - return spec.decorate(selector); + Function mappingFunction = spec -> { + if (virtualColumns.exists(spec.getDimension())) { + MultiValueDimensionVectorSelector dimensionSelector = virtualColumns.makeMultiValueDimensionVectorSelector( + dimensionSpec, + index, + offset + ); + if (dimensionSelector == null) { + return virtualColumns.makeMultiValueDimensionVectorSelector(dimensionSpec, this); + } else { + return dimensionSelector; } - ); + } + + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || holder.getCapabilities().isDictionaryEncoded().isFalse() + || holder.getCapabilities().getType() != ValueType.STRING + || holder.getCapabilities().hasMultipleValues().isFalse()) { + throw new ISE( + "Column[%s] is not a multi-value string column, do not ask for a multi-value selector", + spec.getDimension() + ); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + // dictionaryEncodedColumn is not null because of holder null check above + assert dictionaryEncodedColumn != null; + final MultiValueDimensionVectorSelector selector = dictionaryEncodedColumn.makeMultiValueDimensionVectorSelector( + offset + ); + + return spec.decorate(selector); + }; + + // We cannot use dimensionSelectorCache.computeIfAbsent() here since the function being + // applied may modify the dimensionSelectorCache itself through virtual column references, + // triggering a ConcurrentModificationException in JDK 9 and above. + MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec); + if (selector == null) { + selector = mappingFunction.apply(dimensionSpec); + multiValueDimensionSelectorCache.put(dimensionSpec, selector); + } + + return selector; } @Override @@ -114,66 +139,117 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final throw new ISE("DimensionSpec[%s] cannot be vectorized", dimensionSpec); } - return singleValueDimensionSelectorCache.computeIfAbsent( - dimensionSpec, - spec -> { - final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); - if (holder == null - || !holder.getCapabilities().isDictionaryEncoded().isTrue() - || holder.getCapabilities().getType() != ValueType.STRING) { - // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. - return NilVectorSelector.create(offset); - } - - if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { - // Asking for a single-value dimension selector on a multi-value column gets you an error. - throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); - } - - @SuppressWarnings("unchecked") - final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) - getCachedColumn(spec.getDimension()); - - // dictionaryEncodedColumn is not null because of holder null check above - assert dictionaryEncodedColumn != null; - final SingleValueDimensionVectorSelector selector = - dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); - - return spec.decorate(selector); + Function mappingFunction = spec -> { + if (virtualColumns.exists(spec.getDimension())) { + SingleValueDimensionVectorSelector dimensionSelector = virtualColumns.makeSingleValueDimensionVectorSelector( + dimensionSpec, + index, + offset + ); + if (dimensionSelector == null) { + return virtualColumns.makeSingleValueDimensionVectorSelector(dimensionSpec, this); + } else { + return dimensionSelector; } - ); + } + + final ColumnHolder holder = index.getColumnHolder(spec.getDimension()); + if (holder == null + || !holder.getCapabilities().isDictionaryEncoded().isTrue() + || holder.getCapabilities().getType() != ValueType.STRING) { + // Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls. + return NilVectorSelector.create(offset); + } + + if (holder.getCapabilities().hasMultipleValues().isMaybeTrue()) { + // Asking for a single-value dimension selector on a multi-value column gets you an error. + throw new ISE("Column[%s] is multi-value, do not ask for a single-value selector", spec.getDimension()); + } + + @SuppressWarnings("unchecked") + final DictionaryEncodedColumn dictionaryEncodedColumn = (DictionaryEncodedColumn) + getCachedColumn(spec.getDimension()); + + // dictionaryEncodedColumn is not null because of holder null check above + assert dictionaryEncodedColumn != null; + final SingleValueDimensionVectorSelector selector = + dictionaryEncodedColumn.makeSingleValueDimensionVectorSelector(offset); + + return spec.decorate(selector); + }; + + // We cannot use dimensionSelectorCache.computeIfAbsent() here since the function being + // applied may modify the dimensionSelectorCache itself through virtual column references, + // triggering a ConcurrentModificationException in JDK 9 and above. + SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec); + if (selector == null) { + selector = mappingFunction.apply(dimensionSpec); + singleValueDimensionSelectorCache.put(dimensionSpec, selector); + } + + return selector; } @Override public VectorValueSelector makeValueSelector(final String columnName) { - return valueSelectorCache.computeIfAbsent( - columnName, - name -> { - final BaseColumn column = getCachedColumn(name); - if (column == null) { - return NilVectorSelector.create(offset); - } else { - return column.makeVectorValueSelector(offset); - } + Function mappingFunction = name -> { + if (virtualColumns.exists(columnName)) { + VectorValueSelector selector = virtualColumns.makeVectorValueSelector(columnName, index, offset); + if (selector == null) { + return virtualColumns.makeVectorValueSelector(columnName, this); + } else { + return selector; } - ); + } + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorValueSelector(offset); + } + }; + // We cannot use valueSelectorCache.computeIfAbsent() here since the function being + // applied may modify the valueSelectorCache itself through virtual column references, + // triggering a ConcurrentModificationException in JDK 9 and above. + VectorValueSelector columnValueSelector = valueSelectorCache.get(columnName); + if (columnValueSelector == null) { + columnValueSelector = mappingFunction.apply(columnName); + valueSelectorCache.put(columnName, columnValueSelector); + } + + return columnValueSelector; } @Override public VectorObjectSelector makeObjectSelector(final String columnName) { - return objectSelectorCache.computeIfAbsent( - columnName, - name -> { - final BaseColumn column = getCachedColumn(name); - if (column == null) { - return NilVectorSelector.create(offset); - } else { - return column.makeVectorObjectSelector(offset); - } + Function mappingFunction = name -> { + if (virtualColumns.exists(columnName)) { + VectorObjectSelector selector = virtualColumns.makeVectorObjectSelector(columnName, index, offset); + if (selector == null) { + return virtualColumns.makeVectorObjectSelector(columnName, this); + } else { + return selector; } - ); + } + final BaseColumn column = getCachedColumn(name); + if (column == null) { + return NilVectorSelector.create(offset); + } else { + return column.makeVectorObjectSelector(offset); + } + }; + // We cannot use valueSelectorCache.computeIfAbsent() here since the function being + // applied may modify the valueSelectorCache itself through virtual column references, + // triggering a ConcurrentModificationException in JDK 9 and above. + VectorObjectSelector columnValueSelector = objectSelectorCache.get(columnName); + if (columnValueSelector == null) { + columnValueSelector = mappingFunction.apply(columnName); + objectSelectorCache.put(columnName, columnValueSelector); + } + + return columnValueSelector; } @Nullable @@ -193,6 +269,9 @@ private BaseColumn getCachedColumn(final String columnName) @Override public ColumnCapabilities getColumnCapabilities(final String columnName) { + if (virtualColumns.exists(columnName)) { + return virtualColumns.getColumnCapabilities(columnName); + } return QueryableIndexStorageAdapter.getColumnCapabilities(index, columnName); } } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java new file mode 100644 index 000000000000..ba85e43b60ea --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.CountAggregatorFactory; +import org.apache.druid.query.aggregation.CountVectorAggregator; +import org.apache.druid.query.aggregation.VectorAggregator; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; +import org.junit.Assert; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.List; + +public class AlwaysTwoCounterAggregatorFactory extends CountAggregatorFactory +{ + private final String fieldName; + + public AlwaysTwoCounterAggregatorFactory(String name, String field) + { + super(name); + this.fieldName = field; + } + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + ColumnCapabilities capabilities = selectorFactory.getColumnCapabilities(fieldName); + switch (capabilities.getType()) { + case LONG: + case DOUBLE: + case FLOAT: + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeValueSelector(fieldName)); + case STRING: + if (capabilities.isDictionaryEncoded().isTrue()) { + if (capabilities.hasMultipleValues().isTrue()) { + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeMultiValueDimensionSelector( + DefaultDimensionSpec.of(fieldName))); + } + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(fieldName))); + } + return new AlwaysTwoCounterVectorAggregator(selectorFactory.makeObjectSelector(fieldName)); + default: + throw new IllegalStateException("how did this happen"); + } + } + + public static class AlwaysTwoCounterVectorAggregator extends CountVectorAggregator + { + @Nullable + private final VectorValueSelector valueSelector; + @Nullable + private final VectorObjectSelector objectSelector; + @Nullable + private final SingleValueDimensionVectorSelector singleValueDimensionSelector; + @Nullable + private final MultiValueDimensionVectorSelector multiValueDimensionSelector; + + AlwaysTwoCounterVectorAggregator(VectorValueSelector valueSelector) + { + this(valueSelector, null, null, null); + } + + AlwaysTwoCounterVectorAggregator(VectorObjectSelector objectSelector) + { + this(null, objectSelector, null, null); + } + + AlwaysTwoCounterVectorAggregator(SingleValueDimensionVectorSelector dimSelector) + { + this(null, null, dimSelector, null); + } + + AlwaysTwoCounterVectorAggregator(MultiValueDimensionVectorSelector dimSelector) + { + this(null, null, null, dimSelector); + } + + AlwaysTwoCounterVectorAggregator( + @Nullable VectorValueSelector valueSelector, + @Nullable VectorObjectSelector objectSelector, + @Nullable SingleValueDimensionVectorSelector singleValueDimensionSelector, + @Nullable MultiValueDimensionVectorSelector multiValueDimensionSelector + ) + { + this.valueSelector = valueSelector; + this.objectSelector = objectSelector; + this.singleValueDimensionSelector = singleValueDimensionSelector; + this.multiValueDimensionSelector = multiValueDimensionSelector; + } + + @Override + public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) + { + if (valueSelector != null) { + final long[] vector = valueSelector.getLongVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + Assert.assertEquals(2L, vector[i]); + count += 1; + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (objectSelector != null) { + final Object[] vector = objectSelector.getObjectVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + if (vector[i] instanceof List) { + Assert.assertEquals(ImmutableList.of("2", "2"), vector[i]); + count += 2; + } else { + Assert.assertEquals("2", vector[i]); + count += 1; + } + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (singleValueDimensionSelector != null) { + final int[] rowVector = singleValueDimensionSelector.getRowVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + Assert.assertEquals("2", singleValueDimensionSelector.lookupName(rowVector[i])); + count += 1; + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + if (multiValueDimensionSelector != null) { + final IndexedInts[] rowVector = multiValueDimensionSelector.getRowVector(); + + long count = 0; + for (int i = startRow; i < endRow; i++) { + for (int j = 0; j < rowVector[i].size(); j++){ + Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); + count += 1; + } + } + + buf.putLong(position, buf.getLong(position) + count); + return; + } + throw new IllegalStateException("how did this happen"); + } + + @Override + public void aggregate( + ByteBuffer buf, + int numRows, + int[] positions, + @Nullable int[] rows, + int positionOffset + ) + { + if (valueSelector != null) { + final long[] vector = valueSelector.getLongVector(); + + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + Assert.assertEquals(2L, vector[i]); + buf.putLong(position, buf.getLong(position) + 1); + } + return; + } + if (objectSelector != null) { + final Object[] vector = objectSelector.getObjectVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + if (vector[i] instanceof List) { + Assert.assertEquals(ImmutableList.of("2", "2"), vector[i]); + buf.putLong(position, buf.getLong(position) + 2); + } else { + Assert.assertEquals("2", vector[i]); + buf.putLong(position, buf.getLong(position) + 1); + } + } + return; + } + if (singleValueDimensionSelector != null) { + final int[] rowVector = singleValueDimensionSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + Assert.assertEquals("2", singleValueDimensionSelector.lookupName(rowVector[i])); + buf.putLong(position, buf.getLong(position) + 1); + } + return; + } + if (multiValueDimensionSelector != null) { + final IndexedInts[] rowVector = multiValueDimensionSelector.getRowVector(); + for (int i = 0; i < numRows; i++) { + final int position = positions[i] + positionOffset; + for (int j = 0; j < rowVector[i].size(); j++){ + Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); + buf.putLong(position, buf.getLong(position) + 1); + } + } + return; + } + throw new IllegalStateException("how did this happen"); + } + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java new file mode 100644 index 000000000000..4acc810ea8bb --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorSizeInspector; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.List; + +public class AlwaysTwoVectorizedVirtualColumn implements VirtualColumn +{ + private final String outputName; + private final ColumnCapabilities capabilities; + private final boolean dictionaryEncoded; + + public AlwaysTwoVectorizedVirtualColumn( + String name, + ColumnCapabilities capabilites + ) + { + this.outputName = name; + this.capabilities = capabilites; + this.dictionaryEncoded = capabilites.isDictionaryEncoded().isTrue() && capabilites.areDictionaryValuesUnique().isTrue(); + } + + @Override + public boolean canVectorize(ColumnInspector inspector) + { + return true; + } + + @Override + public String getOutputName() + { + return outputName; + } + + @Override + public DimensionSelector makeDimensionSelector( + DimensionSpec dimensionSpec, ColumnSelectorFactory factory + ) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public ColumnValueSelector makeColumnValueSelector( + String columnName, ColumnSelectorFactory factory + ) + { + throw new IllegalStateException("don't call this"); + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory + ) + { + + return new SingleValueDimensionVectorSelector() + { + private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); + private final int[] rowVector = new int[inspector.getMaxVectorSize()]; + + @Override + public int[] getRowVector() + { + + return rowVector; + } + + @Override + public int getValueCardinality() + { + return dictionaryEncoded ? 1 : CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + return "2"; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dictionaryEncoded; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( + DimensionSpec dimensionSpec, VectorColumnSelectorFactory factory + ) + { + final IndexedInts[] rowVector = new IndexedInts[factory.getVectorSizeInspector().getMaxVectorSize()]; + Arrays.fill(rowVector, new IndexedInts() + { + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + // nothing + } + + @Override + public int size() + { + return 2; + } + + @Override + public int get(int index) + { + return 0; + } + }); + return new MultiValueDimensionVectorSelector() + { + private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); + + @Override + public IndexedInts[] getRowVector() + { + return rowVector; + } + + @Override + public int getValueCardinality() + { + return dictionaryEncoded ? 1 : CARDINALITY_UNKNOWN; + } + + @Nullable + @Override + public String lookupName(int id) + { + return "2"; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return dictionaryEncoded; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return inspector.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return inspector.getCurrentVectorSize(); + } + }; + } + + @Override + public VectorValueSelector makeVectorValueSelector( + String columnName, VectorColumnSelectorFactory factory + ) + { + final long[] longs = new long[factory.getVectorSizeInspector().getMaxVectorSize()]; + final double[] doubles = new double[factory.getVectorSizeInspector().getMaxVectorSize()]; + final float[] floats = new float[factory.getVectorSizeInspector().getMaxVectorSize()]; + Arrays.fill(longs, 2L); + Arrays.fill(doubles, 2.0); + Arrays.fill(floats, 2.0f); + return new VectorValueSelector() + { + @Override + public long[] getLongVector() + { + return longs; + } + + @Override + public float[] getFloatVector() + { + return floats; + } + + @Override + public double[] getDoubleVector() + { + return doubles; + } + + @Nullable + @Override + public boolean[] getNullVector() + { + return null; + } + + @Override + public int getMaxVectorSize() + { + return factory.getVectorSizeInspector().getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return factory.getVectorSizeInspector().getCurrentVectorSize(); + } + }; + } + + @Override + public VectorObjectSelector makeVectorObjectSelector( + String columnName, VectorColumnSelectorFactory factory + ) + { + final Object[] objects = new Object[factory.getVectorSizeInspector().getMaxVectorSize()]; + Arrays.fill(objects, "2"); + return new VectorObjectSelector() + { + @Override + public int getMaxVectorSize() + { + return factory.getVectorSizeInspector().getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return factory.getVectorSizeInspector().getCurrentVectorSize(); + } + + @Override + public Object[] getObjectVector() + { + return objects; + } + }; + } + + @Override + public ColumnCapabilities capabilities(String columnName) + { + return capabilities; + } + + @Override + public List requiredColumns() + { + return ImmutableList.of(); + } + + @Override + public boolean usesDotNotation() + { + return false; + } + + @Override + public byte[] getCacheKey() + { + return new byte[0]; + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java new file mode 100644 index 000000000000..041befeefb7c --- /dev/null +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.virtual; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryContexts; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTestHelper; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.Segment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.timeline.SegmentId; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class VectorizedVirtualColumnTest +{ + private static final String ALWAYS_TWO = "two"; + private static final String COUNT = "count"; + private static final Map CONTEXT = ImmutableMap.of(QueryContexts.VECTORIZE_KEY, "force"); + + @Rule + public final TemporaryFolder tmpFolder = new TemporaryFolder(); + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + private AggregationTestHelper groupByTestHelper; + private AggregationTestHelper timeseriesTestHelper; + private List segments = null; + + @Before + public void setup() throws IOException + { + groupByTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + Collections.emptyList(), + new GroupByQueryConfig(), + tmpFolder + ); + timeseriesTestHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + Collections.emptyList(), + tmpFolder + ); + QueryableIndexSegment queryableIndexSegment = new QueryableIndexSegment( + TestIndex.getMMappedTestIndex(), + SegmentId.dummy(QueryRunnerTestHelper.DATA_SOURCE) + ); + + segments = Lists.newArrayList(queryableIndexSegment, queryableIndexSegment); + } + + @Test + public void testGroupBySingleValueString() + { + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + ); + } + + @Test + public void testGroupByMultiValueString() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(true) + ); + } + + @Test + public void testGroupByMultiValueStringUnknown() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + ); + } + + @Test + public void testGroupBySingleValueStringNotDictionaryEncoded() + { + // cannot currently group by string columns that are not dictionary encoded + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(false) + ); + } + + @Test + public void testGroupByLong() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)); + } + + @Test + public void testGroupByDouble() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)); + } + + @Test + public void testGroupByFloat() + { + testGroupBy(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)); + } + + @Test + public void testTimeseriesSingleValueString() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(false) + ); + } + + @Test + public void testTimeseriesMultiValueString() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + .setHasMultipleValues(true) + ); + } + + @Test + public void testTimeseriesMultiValueStringUnknown() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(true) + .setDictionaryValuesUnique(true) + ); + } + + @Test + public void testTimeseriesSingleValueStringNotDictionaryEncoded() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(false) + ); + } + + @Test + public void testTimeseriesLong() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)); + } + + @Test + public void testTimeseriesDouble() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE)); + } + + @Test + public void testTimeseriesFloat() + { + testTimeseries(ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT)); + } + + private void testTimeseries(ColumnCapabilities capabilities) + { + TimeseriesQuery query = Druids.newTimeseriesQueryBuilder() + .intervals("2000/2030") + .dataSource(QueryRunnerTestHelper.DATA_SOURCE) + .granularity(Granularities.ALL) + .virtualColumns(new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO,capabilities)) + .aggregators(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) + .context(CONTEXT) + .build(); + + Sequence seq = timeseriesTestHelper.runQueryOnSegmentsObjs(segments, query); + + List> expectedResults = ImmutableList.of( + new Result<>( + DateTimes.of("2011-01-12T00:00:00.000Z"), + new TimeseriesResultValue( + ImmutableMap.of(COUNT, getCount(capabilities)) + ) + ) + ); + + TestHelper.assertExpectedObjects(expectedResults, seq.toList(), "failed"); + } + + private void testGroupBy(ColumnCapabilities capabilities) + { + GroupByQuery query = new GroupByQuery.Builder() + .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO,capabilities) + ) + .addDimension(new DefaultDimensionSpec(ALWAYS_TWO, ALWAYS_TWO, capabilities.getType())) + .setAggregatorSpecs(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) + .setInterval("2000/2030") + .setContext(CONTEXT) + .addOrderByColumn(ALWAYS_TWO) + .build(); + + List rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList(); + + List expectedRows = Collections.singletonList( + GroupByQueryRunnerTestHelper.createExpectedRow(query, "2000", COUNT, getCount(capabilities), ALWAYS_TWO, getTwo(capabilities)) + ); + + TestHelper.assertExpectedObjects(expectedRows, rows, "failed"); + } + + private long getCount(ColumnCapabilities capabilities) + { + long modifier = 1L; + if (capabilities.hasMultipleValues().isTrue()) { + modifier = 2L; + } + return 2418L * modifier; + } + + private Object getTwo(ColumnCapabilities capabilities) + { + switch (capabilities.getType()) { + case LONG: + return 2L; + case DOUBLE: + return 2.0; + case FLOAT: + return 2.0f; + case STRING: + default: + if (capabilities.hasMultipleValues().isTrue()) { + return ImmutableList.of("2", "2"); + } + return "2"; + } + } + + private void cannotVectorize() + { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("Cannot vectorize!"); + } +} From b88bd174add261800c2a34ef10553c78fd391034 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 11 Sep 2020 19:17:06 -0700 Subject: [PATCH 2/7] javadoc, refactor, checkstyle --- .../apache/druid/segment/VirtualColumn.java | 1 - .../apache/druid/segment/VirtualColumns.java | 226 +++++++++--------- .../AlwaysTwoCounterAggregatorFactory.java | 6 +- .../AlwaysTwoVectorizedVirtualColumn.java | 21 +- .../virtual/VectorizedVirtualColumnTest.java | 13 +- 5 files changed, 141 insertions(+), 126 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index a59212f537c4..611a3ec6ea81 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -283,5 +283,4 @@ default BitmapIndex getBitmapIndex(String columnName, ColumnSelector selector) { throw new UnsupportedOperationException("not supported"); } - } diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 5cfc2912aab4..59ae9f752dae 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -161,39 +161,33 @@ public VirtualColumn getVirtualColumn(String columnName) @Nullable public BitmapIndex getBitmapIndex(String columnName, ColumnSelector columnSelector) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( - columnName, - columnSelector - ) : null; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.capabilities(columnName).hasBitmapIndexes() ? virtualColumn.getBitmapIndex( + columnName, + columnSelector + ) : null; } /** * Create a dimension (string) selector. * - * @param dimensionSpec the dimensionSpec for this selector - * @param factory base column selector factory - * - * @return selector - * * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} */ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - final DimensionSelector selector = virtualColumn.makeDimensionSelector(dimensionSpec, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final DimensionSelector selector = virtualColumn.makeDimensionSelector(dimensionSpec, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized dimension (string) selector directly from a {@link ColumnSelector}. If this method + * returns null, callers should try to fallback to + * {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public DimensionSelector makeDimensionSelector( DimensionSpec dimensionSpec, @@ -201,36 +195,29 @@ public DimensionSelector makeDimensionSelector( ReadableOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeDimensionSelector(dimensionSpec, columnSelector, offset); } /** * Create a column value selector. * - * @param columnName column mame - * @param factory base column selector factory - * - * @return selector - * * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} */ public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final ColumnValueSelector selector = virtualColumn.makeColumnValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized value selector directly from a {@link ColumnSelector}. If this method returns null, + * callers should try to fallback to {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public ColumnValueSelector makeColumnValueSelector( String columnName, @@ -238,12 +225,8 @@ public ColumnValueSelector makeColumnValueSelector( ReadableOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeColumnValueSelector(columnName, columnSelector, offset); } public boolean canVectorize(ColumnInspector columnInspector) @@ -251,24 +234,32 @@ public boolean canVectorize(ColumnInspector columnInspector) return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(columnInspector)); } + /** + * Create a single value dimension vector (string) selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( DimensionSpec dimensionSpec, VectorColumnSelectorFactory factory ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - final SingleValueDimensionVectorSelector selector = virtualColumn.makeSingleValueVectorDimensionSelector( - dimensionSpec, - factory - ); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final SingleValueDimensionVectorSelector selector = virtualColumn.makeSingleValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized single value dimension (string) vector selector, directly from a + * {@link ColumnSelector}. If this method returns null, callers should try to fallback to + * {@link #makeSingleValueDimensionVectorSelector(DimensionSpec, VectorColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector( DimensionSpec dimensionSpec, @@ -276,32 +267,36 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector ReadableVectorOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - return virtualColumn.makeSingleValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeSingleValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); } + /** + * Create a multi value dimension vector (string) selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( DimensionSpec dimensionSpec, VectorColumnSelectorFactory factory ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - final MultiValueDimensionVectorSelector selector = virtualColumn.makeMultiValueVectorDimensionSelector( - dimensionSpec, - factory - ); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + final MultiValueDimensionVectorSelector selector = virtualColumn.makeMultiValueVectorDimensionSelector( + dimensionSpec, + factory + ); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized multi value dimension (string) vector selector, directly from a + * {@link ColumnSelector}. If this method returns null, callers should try to fallback to + * {@link #makeMultiValueDimensionVectorSelector(DimensionSpec, VectorColumnSelectorFactory)} instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( DimensionSpec dimensionSpec, @@ -309,26 +304,30 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector( ReadableVectorOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(dimensionSpec.getDimension()); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", dimensionSpec.getDimension()); - } else { - return virtualColumn.makeMultiValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(dimensionSpec.getDimension()); + return virtualColumn.makeMultiValueVectorDimensionSelector(dimensionSpec, columnSelector, offset); } + /** + * Create a column vector value selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - final VectorValueSelector selector = virtualColumn.makeVectorValueSelector(columnName, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final VectorValueSelector selector = virtualColumn.makeVectorValueSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized vector value selector directly from a {@link ColumnSelector}. If this method returns + * null, callers should try to fallback to {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} + * instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public VectorValueSelector makeVectorValueSelector( String columnName, @@ -336,26 +335,30 @@ public VectorValueSelector makeVectorValueSelector( ReadableVectorOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.makeVectorValueSelector(columnName, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeVectorValueSelector(columnName, columnSelector, offset); } + /** + * Create a column vector object selector. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ public VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - final VectorObjectSelector selector = virtualColumn.makeVectorObjectSelector(columnName, factory); - Preconditions.checkNotNull(selector, "selector"); - return selector; - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + final VectorObjectSelector selector = virtualColumn.makeVectorObjectSelector(columnName, factory); + Preconditions.checkNotNull(selector, "selector"); + return selector; } + /** + * Try to create an optimized vector object selector directly from a {@link ColumnSelector}.If this method returns + * null, callers should try to fallback to {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} + * instead. + * + * @throws IllegalArgumentException if the virtual column does not exist (see {@link #exists(String)} + */ @Nullable public VectorObjectSelector makeVectorObjectSelector( String columnName, @@ -363,12 +366,8 @@ public VectorObjectSelector makeVectorObjectSelector( ReadableVectorOffset offset ) { - final VirtualColumn virtualColumn = getVirtualColumn(columnName); - if (virtualColumn == null) { - throw new IAE("No such virtual column[%s]", columnName); - } else { - return virtualColumn.makeVectorObjectSelector(columnName, columnSelector, offset); - } + final VirtualColumn virtualColumn = getVirtualColumnForSelector(columnName); + return virtualColumn.makeVectorObjectSelector(columnName, columnSelector, offset); } @Nullable @@ -425,6 +424,15 @@ public byte[] getCacheKey() return new CacheKeyBuilder((byte) 0).appendCacheablesIgnoringOrder(virtualColumns).build(); } + private VirtualColumn getVirtualColumnForSelector(String columnName) + { + VirtualColumn virtualColumn = getVirtualColumn(columnName); + if (virtualColumn == null) { + throw new IAE("No such virtual column[%s]", columnName); + } + return virtualColumn; + } + private void detectCycles(VirtualColumn virtualColumn, @Nullable Set columnNames) { // Copy columnNames to avoid modifying it diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java index ba85e43b60ea..d2b3aba68b32 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java @@ -178,7 +178,8 @@ public void aggregate(ByteBuffer buf, int position, int startRow, int endRow) long count = 0; for (int i = startRow; i < endRow; i++) { - for (int j = 0; j < rowVector[i].size(); j++){ + //noinspection SSBasedInspection + for (int j = 0; j < rowVector[i].size(); j++) { Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); count += 1; } @@ -236,7 +237,8 @@ public void aggregate( final IndexedInts[] rowVector = multiValueDimensionSelector.getRowVector(); for (int i = 0; i < numRows; i++) { final int position = positions[i] + positionOffset; - for (int j = 0; j < rowVector[i].size(); j++){ + //noinspection SSBasedInspection + for (int j = 0; j < rowVector[i].size(); j++) { Assert.assertEquals("2", multiValueDimensionSelector.lookupName(rowVector[i].get(j))); buf.putLong(position, buf.getLong(position) + 1); } diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java index 4acc810ea8bb..3b302f049202 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java @@ -54,7 +54,8 @@ public AlwaysTwoVectorizedVirtualColumn( { this.outputName = name; this.capabilities = capabilites; - this.dictionaryEncoded = capabilites.isDictionaryEncoded().isTrue() && capabilites.areDictionaryValuesUnique().isTrue(); + this.dictionaryEncoded = capabilites.isDictionaryEncoded().isTrue() && + capabilites.areDictionaryValuesUnique().isTrue(); } @Override @@ -70,17 +71,13 @@ public String getOutputName() } @Override - public DimensionSelector makeDimensionSelector( - DimensionSpec dimensionSpec, ColumnSelectorFactory factory - ) + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory) { throw new IllegalStateException("don't call this"); } @Override - public ColumnValueSelector makeColumnValueSelector( - String columnName, ColumnSelectorFactory factory - ) + public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) { throw new IllegalStateException("don't call this"); } @@ -91,7 +88,6 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector VectorColumnSelectorFactory factory ) { - return new SingleValueDimensionVectorSelector() { private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); @@ -146,7 +142,8 @@ public int getCurrentVectorSize() @Override public MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( - DimensionSpec dimensionSpec, VectorColumnSelectorFactory factory + DimensionSpec dimensionSpec, + VectorColumnSelectorFactory factory ) { final IndexedInts[] rowVector = new IndexedInts[factory.getVectorSizeInspector().getMaxVectorSize()]; @@ -222,7 +219,8 @@ public int getCurrentVectorSize() @Override public VectorValueSelector makeVectorValueSelector( - String columnName, VectorColumnSelectorFactory factory + String columnName, + VectorColumnSelectorFactory factory ) { final long[] longs = new long[factory.getVectorSizeInspector().getMaxVectorSize()]; @@ -274,7 +272,8 @@ public int getCurrentVectorSize() @Override public VectorObjectSelector makeVectorObjectSelector( - String columnName, VectorColumnSelectorFactory factory + String columnName, + VectorColumnSelectorFactory factory ) { final Object[] objects = new Object[factory.getVectorSizeInspector().getMaxVectorSize()]; diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java index 041befeefb7c..5bc25d091b89 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java @@ -226,7 +226,7 @@ private void testTimeseries(ColumnCapabilities capabilities) .intervals("2000/2030") .dataSource(QueryRunnerTestHelper.DATA_SOURCE) .granularity(Granularities.ALL) - .virtualColumns(new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO,capabilities)) + .virtualColumns(new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO, capabilities)) .aggregators(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) .context(CONTEXT) .build(); @@ -251,7 +251,7 @@ private void testGroupBy(ColumnCapabilities capabilities) .setDataSource(QueryRunnerTestHelper.DATA_SOURCE) .setGranularity(Granularities.ALL) .setVirtualColumns( - new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO,capabilities) + new AlwaysTwoVectorizedVirtualColumn(ALWAYS_TWO, capabilities) ) .addDimension(new DefaultDimensionSpec(ALWAYS_TWO, ALWAYS_TWO, capabilities.getType())) .setAggregatorSpecs(new AlwaysTwoCounterAggregatorFactory(COUNT, ALWAYS_TWO)) @@ -263,7 +263,14 @@ private void testGroupBy(ColumnCapabilities capabilities) List rows = groupByTestHelper.runQueryOnSegmentsObjs(segments, query).toList(); List expectedRows = Collections.singletonList( - GroupByQueryRunnerTestHelper.createExpectedRow(query, "2000", COUNT, getCount(capabilities), ALWAYS_TWO, getTwo(capabilities)) + GroupByQueryRunnerTestHelper.createExpectedRow( + query, + "2000", + COUNT, + getCount(capabilities), + ALWAYS_TWO, + getTwo(capabilities) + ) ); TestHelper.assertExpectedObjects(expectedRows, rows, "failed"); From 294b58a304248a6ccecd1aed2e3c6d2d7c57332e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 11 Sep 2020 20:32:25 -0700 Subject: [PATCH 3/7] intellij inspection and more javadoc --- .../apache/druid/segment/VirtualColumn.java | 68 +++++++------------ 1 file changed, 26 insertions(+), 42 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index 611a3ec6ea81..a3dfdbe23d26 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -63,11 +63,6 @@ public interface VirtualColumn extends Cacheable * virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which * is useful if this column uses dot notation. The virtual column is expected to apply any * necessary decoration from the dimensionSpec. - * - * @param dimensionSpec the dimensionSpec this column was referenced with - * @param factory column selector factory - * - * @return the selector, must not be null */ DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory); @@ -77,11 +72,6 @@ public interface VirtualColumn extends Cacheable * * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. - * - * @param dimensionSpec - * @param columnSelector - * @param offset - * @return the selector */ @SuppressWarnings("unused") @Nullable @@ -93,11 +83,6 @@ default DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, Col /** * Build a selector corresponding to this virtual column. Also provides the name that the * virtual column was referenced with, which is useful if this column uses dot notation. - * - * @param columnName the name this virtual column was referenced with - * @param factory column selector factory - * - * @return the selector, must not be null */ ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory); @@ -107,11 +92,6 @@ default DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, Col * * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. - * - * @param columnName - * @param columnSelector - * @param offset - * @return the selector */ @SuppressWarnings("unused") @Nullable @@ -126,13 +106,9 @@ default boolean canVectorize(ColumnInspector inspector) } /** - * Build a vectorized dictionary indexed selector corresponding to this virtual column. Also provides the name that + * Build a {@link SingleValueDimensionVectorSelector} corresponding to this virtual column. Also provides the name that * the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful if this * column uses dot notation. The virtual column is expected to apply any necessary decoration from the dimensionSpec. - * - * @param dimensionSpec the dimensionSpec this column was referenced with - * @param factory vector column selector factory - * * @return the selector, must not be null */ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( @@ -147,11 +123,6 @@ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelecto * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this * method has full access to underlying column and can potentially provide a more efficient implementation. - * - * @param dimensionSpec - * @param columnSelector - * @param offset - * @return the selector */ @SuppressWarnings("unused") @Nullable @@ -165,15 +136,10 @@ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelecto } /** - * Build a vectorized 'multi-valued' dictionary indexed selector corresponding to this virtual column. Also provides + * Build a {@link MultiValueDimensionVectorSelector} corresponding to this virtual column. Also provides * the name that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful * if this column uses dot notation. The virtual column is expected to apply any necessary decoration from the * dimensionSpec. - * - * @param dimensionSpec the dimensionSpec this column was referenced with - * @param factory vector column selector factory - * - * @return the selector, must not be null */ default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( DimensionSpec dimensionSpec, @@ -187,11 +153,6 @@ default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this * method has full access to underlying column and can potentially provide a more efficient implementation. - * - * @param dimensionSpec - * @param columnSelector - * @param offset - * @return the selector */ @SuppressWarnings("unused") @Nullable @@ -205,11 +166,23 @@ default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( } + /** + * Build a {@link VectorValueSelector} corresponding to this virtual column. Also provides the name that the + * virtual column was referenced with, which is useful if this column uses dot notation. + */ default VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) { throw new UnsupportedOperationException("not supported"); } + /** + * Returns similar VectorValueSelector object as returned by {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} + * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in + * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + */ + @SuppressWarnings("unused") @Nullable default VectorValueSelector makeVectorValueSelector( String columnName, @@ -220,12 +193,23 @@ default VectorValueSelector makeVectorValueSelector( return null; } - + /** + * Build a {@link VectorObjectSelector} corresponding to this virtual column. Also provides the name that the + * virtual column was referenced with, which is useful if this column uses dot notation. + */ default VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) { throw new UnsupportedOperationException("not supported"); } + /** + * Returns similar VectorObjectSelector object as returned by {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} + * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in + * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + */ + @SuppressWarnings("unused") @Nullable default VectorObjectSelector makeVectorObjectSelector( String columnName, From 8f9bfb58a801f2be561f2891a33b17ee46b7948c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 11 Sep 2020 22:18:24 -0700 Subject: [PATCH 4/7] better --- .../epinephelinae/GroupByQueryEngineV2.java | 18 ++--- .../vector/VectorGroupByEngine.java | 12 ---- .../apache/druid/segment/VirtualColumn.java | 66 +++++++++++-------- .../apache/druid/segment/VirtualColumns.java | 5 -- .../AlwaysTwoVectorizedVirtualColumn.java | 12 +++- .../virtual/VectorizedVirtualColumnTest.java | 27 +++++++- 6 files changed, 83 insertions(+), 57 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index b4cda9b54d93..805606f5ceb7 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -74,7 +74,6 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; -import java.util.function.Function; import java.util.stream.Stream; /** @@ -231,7 +230,7 @@ public GroupByEngineIterator make() processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false), + isAllSingleValueDims(columnSelectorFactory, query.getDimensions()), cardinalityForArrayAggregation ); } else { @@ -242,7 +241,7 @@ public GroupByEngineIterator make() processingBuffer, fudgeTimestamp, dims, - isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false) + isAllSingleValueDims(columnSelectorFactory, query.getDimensions()) ); } } @@ -319,13 +318,11 @@ public static int getCardinalityForArrayAggregation( /** * Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column * selectors will show up as full of nulls they are effectively single valued, however they can also be null during - * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let - * the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns. + * broker merge, for example with an 'inline' datasource subquery. */ public static boolean isAllSingleValueDims( - final Function capabilitiesFunction, - final List dimensions, - final boolean missingMeansNonExistent + final ColumnSelectorFactory factory, + final List dimensions ) { return dimensions @@ -339,9 +336,8 @@ public static boolean isAllSingleValueDims( } // Now check column capabilities. - final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension()); - return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) || - (missingMeansNonExistent && columnCapabilities == null); + final ColumnCapabilities columnCapabilities = factory.getColumnCapabilities(dimension.getDimension()); + return columnCapabilities == null || columnCapabilities.hasMultipleValues().isFalse(); }); } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index be8bf9f9abfe..e2f2e756be96 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -73,18 +73,6 @@ public static boolean canVectorize( @Nullable final Filter filter ) { - // Multi-value dimensions are not yet supported. - // - // Two notes here about how we're handling this check: - // 1) After multi-value dimensions are supported, we could alter "GroupByQueryEngineV2.isAllSingleValueDims" - // to accept a ColumnSelectorFactory, which makes more sense than using a StorageAdapter (see #8013). - // 2) Technically using StorageAdapter here is bad since it only looks at real columns, but they might - // be shadowed by virtual columns (again, see #8013). But it's fine for now since adapter.canVectorize - // always returns false if there are any virtual columns. - // - // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we - // won't have to worry about having this all-single-value-dims check here. - Function capabilitiesFunction = name -> query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, name); diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java index a3dfdbe23d26..f7299dbad01b 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumn.java @@ -67,35 +67,45 @@ public interface VirtualColumn extends Cacheable DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelectorFactory factory); /** - * Returns similar DimensionSelector object as returned by {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link DimensionSelector} object as returned by + * {@link #makeDimensionSelector(DimensionSpec, ColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called + * in query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. */ @SuppressWarnings("unused") @Nullable - default DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec, ColumnSelector columnSelector, ReadableOffset offset) + default DimensionSelector makeDimensionSelector( + DimensionSpec dimensionSpec, + ColumnSelector columnSelector, + ReadableOffset offset + ) { return null; } /** - * Build a selector corresponding to this virtual column. Also provides the name that the + * Build a {@link ColumnValueSelector} corresponding to this virtual column. Also provides the name that the * virtual column was referenced with, which is useful if this column uses dot notation. */ ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory); /** - * Returns similar ColumnValueSelector object as returned by {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link ColumnValueSelector} object as returned by + * {@link #makeColumnValueSelector(String, ColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called + * in query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. */ @SuppressWarnings("unused") @Nullable - default ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelector columnSelector, ReadableOffset offset) + default ColumnValueSelector makeColumnValueSelector( + String columnName, + ColumnSelector columnSelector, + ReadableOffset offset + ) { return null; } @@ -106,10 +116,10 @@ default boolean canVectorize(ColumnInspector inspector) } /** - * Build a {@link SingleValueDimensionVectorSelector} corresponding to this virtual column. Also provides the name that - * the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful if this - * column uses dot notation. The virtual column is expected to apply any necessary decoration from the dimensionSpec. - * @return the selector, must not be null + * Build a {@link SingleValueDimensionVectorSelector} corresponding to this virtual column. Also provides the name + * that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful if this + * column uses dot notation. The virtual column is expected to apply any necessary decoration from the + * {@link DimensionSpec}. */ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector( DimensionSpec dimensionSpec, @@ -122,7 +132,9 @@ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelecto /** * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this - * method has full access to underlying column and can potentially provide a more efficient implementation. + * method has full access to the underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. */ @SuppressWarnings("unused") @Nullable @@ -139,7 +151,7 @@ default SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelecto * Build a {@link MultiValueDimensionVectorSelector} corresponding to this virtual column. Also provides * the name that the virtual column was referenced with (through {@link DimensionSpec#getDimension()}, which is useful * if this column uses dot notation. The virtual column is expected to apply any necessary decoration from the - * dimensionSpec. + * {@link DimensionSpec}. */ default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( DimensionSpec dimensionSpec, @@ -152,7 +164,9 @@ default MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( /** * Returns similar {@link SingleValueDimensionVectorSelector} object as returned by * {@link #makeSingleValueVectorDimensionSelector(DimensionSpec, ColumnSelector, ReadableVectorOffset)} except this - * method has full access to underlying column and can potentially provide a more efficient implementation. + * method has full access to the underlying column and can potentially provide a more efficient implementation. + * + * Users of this interface must ensure to first call this method whenever possible. */ @SuppressWarnings("unused") @Nullable @@ -176,11 +190,11 @@ default VectorValueSelector makeVectorValueSelector(String columnName, VectorCol } /** - * Returns similar VectorValueSelector object as returned by {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link VectorValueSelector} object as returned by + * {@link #makeVectorValueSelector(String, VectorColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + * Users of this interface must ensure to first call this method whenever possible. */ @SuppressWarnings("unused") @Nullable @@ -203,11 +217,11 @@ default VectorObjectSelector makeVectorObjectSelector(String columnName, VectorC } /** - * Returns similar VectorObjectSelector object as returned by {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} - * except this method has full access to underlying column and can potentially provide a more efficient implementation. + * Returns similar {@link VectorObjectSelector} object as returned by + * {@link #makeVectorObjectSelector(String, VectorColumnSelectorFactory)} except this method has full access to the + * underlying column and can potentially provide a more efficient implementation. * - * Users of this interface must ensure to first call this method whenever possible. Typically this can not be called in - * query paths on top of IncrementalIndex which doesn't have columns as in persisted segments. + * Users of this interface must ensure to first call this method whenever possible. */ @SuppressWarnings("unused") @Nullable diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 59ae9f752dae..db57bcd8ee72 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -403,11 +403,6 @@ public VirtualColumn[] getVirtualColumns() return virtualColumns.toArray(new VirtualColumn[0]); } - public int size() - { - return virtualColumns.size(); - } - public ColumnSelectorFactory wrap(final ColumnSelectorFactory baseFactory) { if (virtualColumns.isEmpty()) { diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java index 3b302f049202..227793ff66d6 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorSizeInspector; import org.apache.druid.segment.vector.VectorValueSelector; +import org.junit.Assert; import javax.annotation.Nullable; import java.util.Arrays; @@ -61,6 +62,7 @@ public AlwaysTwoVectorizedVirtualColumn( @Override public boolean canVectorize(ColumnInspector inspector) { + Assert.assertNotNull(inspector); return true; } @@ -88,6 +90,7 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector VectorColumnSelectorFactory factory ) { + Assert.assertEquals(outputName, dimensionSpec.getOutputName()); return new SingleValueDimensionVectorSelector() { private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); @@ -146,6 +149,7 @@ public MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( VectorColumnSelectorFactory factory ) { + Assert.assertEquals(outputName, dimensionSpec.getOutputName()); final IndexedInts[] rowVector = new IndexedInts[factory.getVectorSizeInspector().getMaxVectorSize()]; Arrays.fill(rowVector, new IndexedInts() { @@ -223,6 +227,7 @@ public VectorValueSelector makeVectorValueSelector( VectorColumnSelectorFactory factory ) { + Assert.assertEquals(outputName, columnName); final long[] longs = new long[factory.getVectorSizeInspector().getMaxVectorSize()]; final double[] doubles = new double[factory.getVectorSizeInspector().getMaxVectorSize()]; final float[] floats = new float[factory.getVectorSizeInspector().getMaxVectorSize()]; @@ -276,8 +281,13 @@ public VectorObjectSelector makeVectorObjectSelector( VectorColumnSelectorFactory factory ) { + Assert.assertEquals(outputName, columnName); final Object[] objects = new Object[factory.getVectorSizeInspector().getMaxVectorSize()]; - Arrays.fill(objects, "2"); + if (capabilities.hasMultipleValues().isTrue()) { + Arrays.fill(objects, ImmutableList.of("2", "2")); + } else { + Arrays.fill(objects, "2"); + } return new VectorObjectSelector() { @Override diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java index 5bc25d091b89..ed6d1b4fe8ae 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/VectorizedVirtualColumnTest.java @@ -51,7 +51,6 @@ import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; -import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -73,7 +72,7 @@ public class VectorizedVirtualColumnTest private List segments = null; @Before - public void setup() throws IOException + public void setup() { groupByTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( Collections.emptyList(), @@ -141,6 +140,19 @@ public void testGroupBySingleValueStringNotDictionaryEncoded() ); } + @Test + public void testGroupByMultiValueStringNotDictionaryEncoded() + { + // cannot currently group by string columns that might be multi valued + cannotVectorize(); + testGroupBy(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(true) + ); + } + @Test public void testGroupByLong() { @@ -202,6 +214,17 @@ public void testTimeseriesSingleValueStringNotDictionaryEncoded() ); } + @Test + public void testTimeseriesMultiValueStringNotDictionaryEncoded() + { + testTimeseries(new ColumnCapabilitiesImpl() + .setType(ValueType.STRING) + .setDictionaryEncoded(false) + .setDictionaryValuesUnique(false) + .setHasMultipleValues(true) + ); + } + @Test public void testTimeseriesLong() { From 1b706ed0201efeb9508045b2080792bbdc86568b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 11 Sep 2020 22:39:52 -0700 Subject: [PATCH 5/7] review stuffs --- .../AlwaysTwoCounterAggregatorFactory.java | 6 +++++ .../AlwaysTwoVectorizedVirtualColumn.java | 27 +++++-------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java index d2b3aba68b32..f44114300516 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoCounterAggregatorFactory.java @@ -40,6 +40,12 @@ import java.nio.ByteBuffer; import java.util.List; +/** + * Specialized aggregator factory for testing the selectors produced by {@link AlwaysTwoVectorizedVirtualColumn}, used + * for counting the number of values read so that tests can ensure the correct number of values have been processed. A + * {@link AlwaysTwoCounterVectorAggregator} will be produced, backed by a type appropriate selector for a given + * {@link ColumnCapabilities}. + */ public class AlwaysTwoCounterAggregatorFactory extends CountAggregatorFactory { private final String fieldName; diff --git a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java index 227793ff66d6..78d031bb5200 100644 --- a/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java +++ b/processing/src/test/java/org/apache/druid/segment/virtual/AlwaysTwoVectorizedVirtualColumn.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; @@ -29,6 +28,7 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.data.ArrayBasedIndexedInts; import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; @@ -42,6 +42,10 @@ import java.util.Arrays; import java.util.List; +/** + * {@link VirtualColumn} which only can produce all kinds of vector selectors and report any type of + * {@link ColumnCapabilities} + */ public class AlwaysTwoVectorizedVirtualColumn implements VirtualColumn { private final String outputName; @@ -151,26 +155,7 @@ public MultiValueDimensionVectorSelector makeMultiValueVectorDimensionSelector( { Assert.assertEquals(outputName, dimensionSpec.getOutputName()); final IndexedInts[] rowVector = new IndexedInts[factory.getVectorSizeInspector().getMaxVectorSize()]; - Arrays.fill(rowVector, new IndexedInts() - { - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - // nothing - } - - @Override - public int size() - { - return 2; - } - - @Override - public int get(int index) - { - return 0; - } - }); + Arrays.fill(rowVector, new ArrayBasedIndexedInts(new int[]{0, 0})); return new MultiValueDimensionVectorSelector() { private final VectorSizeInspector inspector = factory.getVectorSizeInspector(); From 5920653d6a4f16e5b18a0b6da32809691b4863ba Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Sat, 12 Sep 2020 01:06:18 -0700 Subject: [PATCH 6/7] fix incorrect refactor, thanks tests --- .../query/groupby/epinephelinae/GroupByQueryEngineV2.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 805606f5ceb7..0808f6540415 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -335,9 +335,9 @@ public static boolean isAllSingleValueDims( return false; } - // Now check column capabilities. + // Now check column capabilities, which must be present and explicitly not multi-valued final ColumnCapabilities columnCapabilities = factory.getColumnCapabilities(dimension.getDimension()); - return columnCapabilities == null || columnCapabilities.hasMultipleValues().isFalse(); + return columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse(); }); } From ad0419ce2d4e895acb804136653adcda32b5afe5 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 14 Sep 2020 10:04:23 -0700 Subject: [PATCH 7/7] minor adjustments --- .../epinephelinae/GroupByQueryEngineV2.java | 5 +++-- ...yableIndexVectorColumnSelectorFactory.java | 20 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 0808f6540415..f170550a102c 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -55,6 +55,7 @@ import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.query.groupby.strategy.GroupByStrategyV2; import org.apache.druid.query.ordering.StringComparator; +import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.ColumnSelectorFactory; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.Cursor; @@ -321,7 +322,7 @@ public static int getCardinalityForArrayAggregation( * broker merge, for example with an 'inline' datasource subquery. */ public static boolean isAllSingleValueDims( - final ColumnSelectorFactory factory, + final ColumnInspector inspector, final List dimensions ) { @@ -336,7 +337,7 @@ public static boolean isAllSingleValueDims( } // Now check column capabilities, which must be present and explicitly not multi-valued - final ColumnCapabilities columnCapabilities = factory.getColumnCapabilities(dimension.getDimension()); + final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); return columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse(); }); } diff --git a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java index 05981aaed324..a92f0fad91e9 100644 --- a/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java +++ b/processing/src/main/java/org/apache/druid/segment/vector/QueryableIndexVectorColumnSelectorFactory.java @@ -120,9 +120,8 @@ public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(final D return spec.decorate(selector); }; - // We cannot use dimensionSelectorCache.computeIfAbsent() here since the function being - // applied may modify the dimensionSelectorCache itself through virtual column references, - // triggering a ConcurrentModificationException in JDK 9 and above. + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. MultiValueDimensionVectorSelector selector = multiValueDimensionSelectorCache.get(dimensionSpec); if (selector == null) { selector = mappingFunction.apply(dimensionSpec); @@ -178,9 +177,8 @@ public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(final return spec.decorate(selector); }; - // We cannot use dimensionSelectorCache.computeIfAbsent() here since the function being - // applied may modify the dimensionSelectorCache itself through virtual column references, - // triggering a ConcurrentModificationException in JDK 9 and above. + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. SingleValueDimensionVectorSelector selector = singleValueDimensionSelectorCache.get(dimensionSpec); if (selector == null) { selector = mappingFunction.apply(dimensionSpec); @@ -209,9 +207,8 @@ public VectorValueSelector makeValueSelector(final String columnName) return column.makeVectorValueSelector(offset); } }; - // We cannot use valueSelectorCache.computeIfAbsent() here since the function being - // applied may modify the valueSelectorCache itself through virtual column references, - // triggering a ConcurrentModificationException in JDK 9 and above. + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. VectorValueSelector columnValueSelector = valueSelectorCache.get(columnName); if (columnValueSelector == null) { columnValueSelector = mappingFunction.apply(columnName); @@ -240,9 +237,8 @@ public VectorObjectSelector makeObjectSelector(final String columnName) return column.makeVectorObjectSelector(offset); } }; - // We cannot use valueSelectorCache.computeIfAbsent() here since the function being - // applied may modify the valueSelectorCache itself through virtual column references, - // triggering a ConcurrentModificationException in JDK 9 and above. + // We cannot use computeIfAbsent() here since the function being applied may modify the cache itself through + // virtual column references, triggering a ConcurrentModificationException in JDK 9 and above. VectorObjectSelector columnValueSelector = objectSelectorCache.get(columnName); if (columnValueSelector == null) { columnValueSelector = mappingFunction.apply(columnName);