apache · fjy · Jan 22, 2016 · Jan 20, 2016 · fjy · Jan 20, 2016
diff --git a/docs/content/querying/segmentmetadataquery.md b/docs/content/querying/segmentmetadataquery.md
@@ -31,6 +31,7 @@ There are several main parts to a segment metadata query:
 |merge|Merge all individual segment metadata results into a single result|no|
 |context|See [Context](../querying/query-context.html)|no|
 |analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size", "interval"]. See section [analysisTypes](#analysistypes) for more details.|no|
+|lenientAggregatorMerge|If true, and if the "aggregators" analysisType is enabled, aggregators will be merged leniently. See below for details.|no|
 
 The format of the result is:
 
@@ -44,6 +45,9 @@ The format of the result is:
     "dim2" : { "type" : "STRING", "hasMultipleValues" : true, "size" : 100000, "cardinality" : 1504, "errorMessage" : null },
     "metric1" : { "type" : "FLOAT", "hasMultipleValues" : false, "size" : 100000, "cardinality" : null, "errorMessage" : null }
   },
+  "aggregators" : {
+    "metric1" : { "type" : "longSum", "name" : "metric1", "fieldName" : "metric1" }
+  },
   "size" : 300000,
   "numRows" : 5000000
 } ]
@@ -99,18 +103,39 @@ This is a list of properties that determines the amount of information returned
 
 By default, all analysis types will be used. If a property is not needed, omitting it from this list will result in a more efficient query.
 
-There are 3 types of column analyses:
+There are four types of column analyses:
 
 #### cardinality
 
-* Estimated floor of cardinality for each column. Only relevant for dimension columns.
+* `cardinality` in the result will return the estimated floor of cardinality for each column. Only relevant for
+dimension columns.
 
 #### size
 
-* Estimated byte size for the segment columns if they were stored in a flat format
-
-* Estimated total segment byte size in if it was stored in a flat format
+* `size` in the result will contain the estimated total segment byte size as if the data were stored in text format
 
 #### interval
 
-* If present, the SegmentMetadataQuery will return the list of intervals associated with the queried segments.
+* `intervals` in the result will contain the list of intervals associated with the queried segments.
+
+#### aggregators
+
+* `aggregators` in the result will contain the list of aggregators usable for querying metric columns. This may be
+null if the aggregators are unknown or unmergeable (if merging is enabled).
+
+* Merging can be strict or lenient. See *lenientAggregatorMerge* below for details.
+
+* The form of the result is a map of column name to aggregator.
+
+### lenientAggregatorMerge
+
+Conflicts between aggregator metadata across segments can occur if some segments have unknown aggregators, or if
+two segments use incompatible aggregators for the same column (e.g. longSum changed to doubleSum).
+
+Aggregators can be merged strictly (the default) or leniently. With strict merging, if there are any segments
+with unknown aggregators, or any conflicts of any kind, the merged aggregators list will be `null`. With lenient
+merging, segments with unknown aggregators will be ignored, and conflicts between aggregators will only null out
+the aggregator for that particular column.
+
+In particular, with lenient merging, it is possible for an invidiual column's aggregator to be `null`. This will not
+occur with strict merging.
diff --git a/processing/src/main/java/io/druid/query/Druids.java b/processing/src/main/java/io/druid/query/Druids.java
@@ -938,6 +938,7 @@ public static class SegmentMetadataQueryBuilder
     private ColumnIncluderator toInclude;
     private EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes;
     private Boolean merge;
+    private Boolean lenientAggregatorMerge;
     private Map<String, Object> context;
 
     public SegmentMetadataQueryBuilder()
@@ -948,6 +949,7 @@ public SegmentMetadataQueryBuilder()
       analysisTypes = null;
       merge = null;
       context = null;
+      lenientAggregatorMerge = null;
     }
 
     public SegmentMetadataQuery build()
@@ -959,7 +961,8 @@ public SegmentMetadataQuery build()
           merge,
           context,
           analysisTypes,
-          false
+          false,
+          lenientAggregatorMerge
       );
     }
 
@@ -975,6 +978,7 @@ public SegmentMetadataQueryBuilder copy(SegmentMetadataQueryBuilder builder)
           .toInclude(toInclude)
           .analysisTypes(analysisTypesArray)
           .merge(merge)
+          .lenientAggregatorMerge(lenientAggregatorMerge)
           .context(builder.context);
     }
 
@@ -1032,6 +1036,12 @@ public SegmentMetadataQueryBuilder merge(boolean merge)
       return this;
     }
 
+    public SegmentMetadataQueryBuilder lenientAggregatorMerge(boolean lenientAggregatorMerge)
+    {
+      this.lenientAggregatorMerge = lenientAggregatorMerge;
+      return this;
+    }
+
     public SegmentMetadataQueryBuilder context(Map<String, Object> c)
     {
       context = c;

diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java
@@ -20,9 +20,11 @@
 package io.druid.query.metadata;
 
 import com.fasterxml.jackson.core.type.TypeReference;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Function;
 import com.google.common.base.Functions;
 import com.google.common.base.Predicate;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -41,6 +43,8 @@
 import io.druid.query.QueryRunner;
 import io.druid.query.QueryToolChest;
 import io.druid.query.ResultMergeQueryRunner;
+import io.druid.query.aggregation.AggregatorFactory;
+import io.druid.query.aggregation.AggregatorFactoryNotMergeableException;
 import io.druid.query.aggregation.MetricManipulationFn;
 import io.druid.query.metadata.metadata.ColumnAnalysis;
 import io.druid.query.metadata.metadata.SegmentAnalysis;
@@ -51,7 +55,7 @@
 
 import javax.annotation.Nullable;
 import java.nio.ByteBuffer;
-import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -67,13 +71,7 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest<SegmentAn
     @Override
     public SegmentAnalysis apply(SegmentAnalysis analysis)
     {
-      return new SegmentAnalysis(
-          analysis.getId(),
-          analysis.getIntervals() != null ? JodaUtils.condenseIntervals(analysis.getIntervals()) : null,
-          analysis.getColumns(),
-          analysis.getSize(),
-          analysis.getNumRows()
-      );
+      return finalizeAnalysis(analysis);
     }
   };
 
@@ -139,44 +137,7 @@ protected BinaryFn<SegmentAnalysis, SegmentAnalysis, SegmentAnalysis> createMerg
           @Override
           public SegmentAnalysis apply(SegmentAnalysis arg1, SegmentAnalysis arg2)
           {
-            if (arg1 == null) {
-              return arg2;
-            }
-
-            if (arg2 == null) {
-              return arg1;
-            }
-
-            List<Interval> newIntervals = null;
-            if (query.analyzingInterval()) {
-              //List returned by arg1.getIntervals() is immutable, so a new list needs to
-              //be created.
-              newIntervals = new ArrayList<>(arg1.getIntervals());
-              newIntervals.addAll(arg2.getIntervals());
-            }
-
-            final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
-            final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
-            Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
-
-            Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
-            for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
-              final String columnName = entry.getKey();
-              columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
-              rightColumnNames.remove(columnName);
-            }
-
-            for (String columnName : rightColumnNames) {
-              columns.put(columnName, rightColumns.get(columnName));
-            }
-
-            return new SegmentAnalysis(
-                "merged",
-                newIntervals,
-                columns,
-                arg1.getSize() + arg2.getSize(),
-                arg1.getNumRows() + arg2.getNumRows()
-            );
+            return mergeAnalyses(arg1, arg2, query.isLenientAggregatorMerge());
           }
         };
       }
@@ -284,4 +245,110 @@ public boolean apply(T input)
         )
     );
   }
+
+  @VisibleForTesting
+  public static SegmentAnalysis mergeAnalyses(
+      final SegmentAnalysis arg1,
+      final SegmentAnalysis arg2,
+      boolean lenientAggregatorMerge
+  )
+  {
+    if (arg1 == null) {
+      return arg2;
+    }
+
+    if (arg2 == null) {
+      return arg1;
+    }
+
+    List<Interval> newIntervals = null;
+    if (arg1.getIntervals() != null) {
+      newIntervals = Lists.newArrayList();
+      newIntervals.addAll(arg1.getIntervals());
+    }
+    if (arg2.getIntervals() != null) {
+      if (newIntervals == null) {
+        newIntervals = Lists.newArrayList();
+      }
+      newIntervals.addAll(arg2.getIntervals());
+    }
+
+    final Map<String, ColumnAnalysis> leftColumns = arg1.getColumns();
+    final Map<String, ColumnAnalysis> rightColumns = arg2.getColumns();
+    Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
+
+    Set<String> rightColumnNames = Sets.newHashSet(rightColumns.keySet());
+    for (Map.Entry<String, ColumnAnalysis> entry : leftColumns.entrySet()) {
+      final String columnName = entry.getKey();
+      columns.put(columnName, entry.getValue().fold(rightColumns.get(columnName)));
+      rightColumnNames.remove(columnName);
+    }
+
+    for (String columnName : rightColumnNames) {
+      columns.put(columnName, rightColumns.get(columnName));
+    }
+
+    final Map<String, AggregatorFactory> aggregators = Maps.newHashMap();
+
+    if (lenientAggregatorMerge) {
+      // Merge each aggregator individually, ignoring nulls
+      for (SegmentAnalysis analysis : ImmutableList.of(arg1, arg2)) {
+        if (analysis.getAggregators() != null) {
+          for (AggregatorFactory aggregator : analysis.getAggregators().values()) {
+            AggregatorFactory merged = aggregators.get(aggregator.getName());
+            if (merged != null) {
+              try {
+                merged = merged.getMergingFactory(aggregator);
+              }
+              catch (AggregatorFactoryNotMergeableException e) {
+                merged = null;
+              }
+            } else {
+              merged = aggregator;
+            }
+            aggregators.put(aggregator.getName(), merged);
+          }
+        }
+      }
+    } else {
+      final AggregatorFactory[] aggs1 = arg1.getAggregators() != null
+                                        ? arg1.getAggregators()
+                                              .values()
+                                              .toArray(new AggregatorFactory[arg1.getAggregators().size()])
+                                        : null;
+      final AggregatorFactory[] aggs2 = arg2.getAggregators() != null
+                                        ? arg2.getAggregators()
+                                              .values()
+                                              .toArray(new AggregatorFactory[arg2.getAggregators().size()])
+                                        : null;
+      final AggregatorFactory[] merged = AggregatorFactory.mergeAggregators(Arrays.asList(aggs1, aggs2));
+      if (merged != null) {
+        for (AggregatorFactory aggregator : merged) {
+          aggregators.put(aggregator.getName(), aggregator);
+        }
+      }
+    }
+
+    return new SegmentAnalysis(
+        "merged",
+        newIntervals,
+        columns,
+        arg1.getSize() + arg2.getSize(),
+        arg1.getNumRows() + arg2.getNumRows(),
+        aggregators.isEmpty() ? null : aggregators
+    );
+  }
+
+  @VisibleForTesting
+  public static SegmentAnalysis finalizeAnalysis(SegmentAnalysis analysis)
+  {
+    return new SegmentAnalysis(
+        analysis.getId(),
+        analysis.getIntervals() != null ? JodaUtils.condenseIntervals(analysis.getIntervals()) : null,
+        analysis.getColumns(),
+        analysis.getSize(),
+        analysis.getNumRows(),
+        analysis.getAggregators()
+    );
+  }
 }
diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryRunnerFactory.java
@@ -39,10 +39,12 @@
 import io.druid.query.QueryRunnerFactory;
 import io.druid.query.QueryToolChest;
 import io.druid.query.QueryWatcher;
+import io.druid.query.aggregation.AggregatorFactory;
 import io.druid.query.metadata.metadata.ColumnAnalysis;
 import io.druid.query.metadata.metadata.ColumnIncluderator;
 import io.druid.query.metadata.metadata.SegmentAnalysis;
 import io.druid.query.metadata.metadata.SegmentMetadataQuery;
+import io.druid.segment.Metadata;
 import io.druid.segment.Segment;
 import org.joda.time.Interval;
 
@@ -108,14 +110,30 @@ public Sequence<SegmentAnalysis> run(Query<SegmentAnalysis> inQ, Map<String, Obj
         }
         List<Interval> retIntervals = query.analyzingInterval() ? Arrays.asList(segment.getDataInterval()) : null;
 
+        final Map<String, AggregatorFactory> aggregators;
+        if (query.hasAggregators()) {
+          final Metadata metadata = segment.asStorageAdapter().getMetadata();
+          if (metadata != null && metadata.getAggregators() != null) {
+            aggregators = Maps.newHashMap();
+            for (AggregatorFactory aggregator : metadata.getAggregators()) {
+              aggregators.put(aggregator.getName(), aggregator);
+            }
+          } else {
+            aggregators = null;
+          }
+        } else {
+          aggregators = null;
+        }
+
         return Sequences.simple(
             Arrays.asList(
                 new SegmentAnalysis(
                     segment.getIdentifier(),
                     retIntervals,
                     columns,
                     totalSize,
-                    numRows
+                    numRows,
+                    aggregators
                 )
             )
         );
@@ -168,10 +186,10 @@ public Sequence<SegmentAnalysis> call() throws Exception
                       future.cancel(true);
                       throw new QueryInterruptedException("Query interrupted");
                     }
-                    catch(CancellationException e) {
+                    catch (CancellationException e) {
                       throw new QueryInterruptedException("Query cancelled");
                     }
-                    catch(TimeoutException e) {
+                    catch (TimeoutException e) {
                       log.info("Query timeout, cancelling pending results for query id [%s]", query.getId());
                       future.cancel(true);
                       throw new QueryInterruptedException("Query timeout");