apache · clintropolis · Sep 15, 2020 · Sep 12, 2020 · Sep 12, 2020 · Sep 12, 2020
diff --git a/...sing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/...sing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java
@@ -55,6 +55,7 @@
 import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
 import org.apache.druid.query.groupby.strategy.GroupByStrategyV2;
 import org.apache.druid.query.ordering.StringComparator;
+import org.apache.druid.segment.ColumnInspector;
 import org.apache.druid.segment.ColumnSelectorFactory;
 import org.apache.druid.segment.ColumnValueSelector;
 import org.apache.druid.segment.Cursor;
@@ -74,7 +75,6 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
-import java.util.function.Function;
 import java.util.stream.Stream;
 
 /**
@@ -231,7 +231,7 @@ public GroupByEngineIterator make()
                       processingBuffer,
                       fudgeTimestamp,
                       dims,
-                      isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false),
+                      isAllSingleValueDims(columnSelectorFactory, query.getDimensions()),
                       cardinalityForArrayAggregation
                   );
                 } else {
@@ -242,7 +242,7 @@ public GroupByEngineIterator make()
                       processingBuffer,
                       fudgeTimestamp,
                       dims,
-                      isAllSingleValueDims(columnSelectorFactory::getColumnCapabilities, query.getDimensions(), false)
+                      isAllSingleValueDims(columnSelectorFactory, query.getDimensions())
                   );
                 }
               }
@@ -319,13 +319,11 @@ public static int getCardinalityForArrayAggregation(
   /**
    * Checks whether all "dimensions" are either single-valued, or if allowed, nonexistent. Since non-existent column
    * selectors will show up as full of nulls they are effectively single valued, however they can also be null during
-   * broker merge, for example with an 'inline' datasource subquery. 'missingMeansNonExistent' is sort of a hack to let
-   * the vectorized engine, which only operates on actual segments, to still work in this case for non-existent columns.
+   * broker merge, for example with an 'inline' datasource subquery.
    */
   public static boolean isAllSingleValueDims(
-      final Function<String, ColumnCapabilities> capabilitiesFunction,
-      final List<DimensionSpec> dimensions,
-      final boolean missingMeansNonExistent
+      final ColumnInspector inspector,
+      final List<DimensionSpec> dimensions
   )
   {
     return dimensions
@@ -338,10 +336,9 @@ public static boolean isAllSingleValueDims(
                 return false;
               }
 
-              // Now check column capabilities.
-              final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
-              return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) ||
-                     (missingMeansNonExistent && columnCapabilities == null);
+              // Now check column capabilities, which must be present and explicitly not multi-valued
+              final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension());
+              return columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse();
             });
   }
 

diff --git a/...rc/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/...rc/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
@@ -42,6 +42,8 @@
 import org.apache.druid.query.vector.VectorCursorGranularizer;
 import org.apache.druid.segment.DimensionHandlerUtils;
 import org.apache.druid.segment.StorageAdapter;
+import org.apache.druid.segment.column.ColumnCapabilities;
+import org.apache.druid.segment.column.ValueType;
 import org.apache.druid.segment.filter.Filters;
 import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
 import org.apache.druid.segment.vector.VectorCursor;
@@ -55,6 +57,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 public class VectorGroupByEngine
@@ -70,24 +73,47 @@ public static boolean canVectorize(
       @Nullable final Filter filter
   )
   {
-    // Multi-value dimensions are not yet supported.
-    //
-    // Two notes here about how we're handling this check:
-    //   1) After multi-value dimensions are supported, we could alter "GroupByQueryEngineV2.isAllSingleValueDims"
-    //      to accept a ColumnSelectorFactory, which makes more sense than using a StorageAdapter (see #8013).
-    //   2) Technically using StorageAdapter here is bad since it only looks at real columns, but they might
-    //      be shadowed by virtual columns (again, see #8013). But it's fine for now since adapter.canVectorize
-    //      always returns false if there are any virtual columns.
-    //
-    // This situation should sort itself out pretty well once this engine supports multi-valued columns. Then we
-    // won't have to worry about having this all-single-value-dims check here.
-
-    return GroupByQueryEngineV2.isAllSingleValueDims(adapter::getColumnCapabilities, query.getDimensions(), true)
+    Function<String, ColumnCapabilities> capabilitiesFunction = name ->
+        query.getVirtualColumns().getColumnCapabilitiesWithFallback(adapter, name);
+
+    return canVectorizeDimensions(capabilitiesFunction, query.getDimensions())
            && query.getDimensions().stream().allMatch(DimensionSpec::canVectorize)
            && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(adapter))
            && adapter.canVectorize(filter, query.getVirtualColumns(), false);
   }
 
+  public static boolean canVectorizeDimensions(
+      final Function<String, ColumnCapabilities> capabilitiesFunction,
+      final List<DimensionSpec> dimensions
+  )
+  {
+    return dimensions
+        .stream()
+        .allMatch(
+            dimension -> {
+              if (dimension.mustDecorate()) {
+                // group by on multi value dimensions are not currently supported
+                // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors.
+                // To be safe, we must return false here.
+                return false;
+              }
+
+              // Now check column capabilities.
+              final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
+              // null here currently means the column does not exist, nil columns can be vectorized
+              if (columnCapabilities == null) {
+                return true;
+              }
+              // strings must be single valued, dictionary encoded, and have unique dictionary entries
+              if (ValueType.STRING.equals(columnCapabilities.getType())) {
+                return columnCapabilities.hasMultipleValues().isFalse() &&
+                       columnCapabilities.isDictionaryEncoded().isTrue() &&
+                       columnCapabilities.areDictionaryValuesUnique().isTrue();
+              }
+              return columnCapabilities.hasMultipleValues().isFalse();
+            });
+  }
+
   public static Sequence<ResultRow> process(
       final GroupByQuery query,
       final StorageAdapter storageAdapter,

diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorSequenceBuilder.java
@@ -193,7 +193,6 @@ public Cursor apply(final Interval inputInterval)
   public VectorCursor buildVectorized(final int vectorSize)
   {
     // Sanity check - matches QueryableIndexStorageAdapter.canVectorize
-    Preconditions.checkState(virtualColumns.size() == 0, "virtualColumns.size == 0");
     Preconditions.checkState(!descending, "!descending");
 
     final Map<String, BaseColumn> columnCache = new HashMap<>();
@@ -229,17 +228,15 @@ public VectorCursor buildVectorized(final int vectorSize)
         ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset)
         : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset);
 
+    // baseColumnSelectorFactory using baseOffset is the column selector for filtering.
+    final VectorColumnSelectorFactory baseColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(
+        columnCache,
+        baseOffset,
+        closer
+    );
     if (postFilter == null) {
-      return new QueryableIndexVectorCursor(index, baseOffset, closer, columnCache, vectorSize);
+      return new QueryableIndexVectorCursor(baseColumnSelectorFactory, baseOffset, vectorSize, closer);
     } else {
-      // baseColumnSelectorFactory using baseOffset is the column selector for filtering.
-      final VectorColumnSelectorFactory baseColumnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(
-          index,
-          baseOffset,
-          closer,
-          columnCache
-      );
-
       final VectorOffset filteredOffset = FilteredVectorOffset.create(
           baseOffset,
           baseColumnSelectorFactory,
@@ -254,10 +251,31 @@ public VectorCursor buildVectorized(final int vectorSize)
       // object will get hit twice for some of the values (anything that matched the filter). This is probably most
       // noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed
       // this directly but it seems possible in principle.
-      return new QueryableIndexVectorCursor(index, filteredOffset, closer, columnCache, vectorSize);
+      // baseColumnSelectorFactory using baseOffset is the column selector for filtering.
+      final VectorColumnSelectorFactory filteredColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(
+          columnCache,
+          filteredOffset,
+          closer
+      );
+      return new QueryableIndexVectorCursor(filteredColumnSelectorFactory, filteredOffset, vectorSize, closer);
     }
   }
 
+  VectorColumnSelectorFactory makeVectorColumnSelectorFactoryForOffset(
+      Map<String, BaseColumn> columnCache,
+      VectorOffset baseOffset,
+      Closer closer
+  )
+  {
+    return new QueryableIndexVectorColumnSelectorFactory(
+        index,
+        baseOffset,
+        closer,
+        columnCache,
+        virtualColumns
+    );
+  }
+
   /**
    * Search the time column using binary search. Benchmarks on various other approaches (linear search, binary
    * search that switches to linear at various closeness thresholds) indicated that a pure binary search worked best.
@@ -318,17 +336,16 @@ private static class QueryableIndexVectorCursor implements VectorCursor
     private final VectorColumnSelectorFactory columnSelectorFactory;
 
     public QueryableIndexVectorCursor(
-        final QueryableIndex index,
+        final VectorColumnSelectorFactory vectorColumnSelectorFactory,
         final VectorOffset offset,
-        final Closer closer,
-        final Map<String, BaseColumn> columnCache,
-        final int vectorSize
+        final int vectorSize,
+        final Closer closer
     )
     {
+      this.columnSelectorFactory = vectorColumnSelectorFactory;
+      this.vectorSize = vectorSize;
       this.offset = offset;
       this.closer = closer;
-      this.vectorSize = vectorSize;
-      this.columnSelectorFactory = new QueryableIndexVectorColumnSelectorFactory(index, offset, closer, columnCache);
     }
 
     @Override

diff --git a/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/QueryableIndexStorageAdapter.java
@@ -219,9 +219,8 @@ public boolean canVectorize(
       }
     }
 
-    // 1) Virtual columns can't vectorize yet
-    // 2) Vector cursors can't iterate backwards yet
-    return virtualColumns.size() == 0 && !descending;
+    // vector cursors can't iterate backwards yet
+    return virtualColumns.canVectorize(this) && !descending;
   }
 
   @Override