apache · nishantmonu51 · Mar 3, 2017 · Mar 3, 2017 · nishantmonu51 · Mar 3, 2017
diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md
@@ -106,15 +106,16 @@ The following SQL queries and features may be executed using approximate algorit
 
 - `COUNT(DISTINCT col)` and `APPROX_COUNT_DISTINCT(col)` aggregations use
 [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf), a fast approximate distinct counting
-algorithm. If you need exact distinct counts, you can instead use
-`SELECT COUNT(*) FROM (SELECT DISTINCT col FROM data_source)`, which will use a slower and more resource intensive exact
-algorithm.
+algorithm. If you need exact distinct counts, set "useApproximateCountDistinct" to "false", either through query
+context or through broker configuration.
 - TopN-style queries with a single grouping column, like
 `SELECT col1, SUM(col2) FROM data_source GROUP BY col1 ORDER BY SUM(col2) DESC LIMIT 100`, by default will be executed
 as [TopN queries](topnquery.html), which use an approximate algorithm. To disable this behavior, and use exact
 algorithms for topN-style queries, set "useApproximateTopN" to "false", either through query context or through broker
 configuration.
 
+In both cases, the exact algorithms are generally slower and more resource intensive.
+
 ### Time functions
 
 Druid's SQL language supports a number of time operations, including:

diff --git a/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java b/sql/src/main/java/io/druid/sql/calcite/planner/Rules.java
@@ -31,6 +31,7 @@
 import org.apache.calcite.interpreter.Bindables;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.volcano.AbstractConverter;
+import org.apache.calcite.rel.rules.AggregateExpandDistinctAggregatesRule;
 import org.apache.calcite.rel.rules.AggregateJoinTransposeRule;
 import org.apache.calcite.rel.rules.AggregateProjectMergeRule;
 import org.apache.calcite.rel.rules.AggregateProjectPullUpConstantsRule;
@@ -202,6 +203,12 @@ private static List<RelOptRule> baseRuleSet(
     rules.addAll(VOLCANO_ABSTRACT_RULES);
     rules.addAll(RELOPTUTIL_ABSTRACT_RULES);
 
+    if (!plannerConfig.isUseApproximateCountDistinct()) {
+      // We'll need this to expand COUNT DISTINCTs.
+      // Avoid AggregateExpandDistinctAggregatesRule.INSTANCE; it uses grouping sets and we don't support those.
+      rules.add(AggregateExpandDistinctAggregatesRule.JOIN);
+    }
+
     if (plannerConfig.isUseFallback()) {
       rules.add(DruidRelToBindableRule.instance());
     }

diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java
@@ -122,6 +122,14 @@ public int getMaxTopNLimit()
       return 0;
     }
   };
+  private static final PlannerConfig PLANNER_CONFIG_NO_HLL = new PlannerConfig()
+  {
+    @Override
+    public boolean isUseApproximateCountDistinct()
+    {
+      return false;
+    }
+  };
   private static final PlannerConfig PLANNER_CONFIG_SELECT_PAGING = new PlannerConfig()
   {
     @Override
@@ -721,6 +729,22 @@ public void testUnplannableQueries() throws Exception
     }
   }
 
+  @Test
+  public void testUnplannableExactCountDistinctQueries() throws Exception
+  {
+    // All of these queries are unplannable in exact COUNT DISTINCT mode.
+
+    final List<String> queries = ImmutableList.of(
+        "SELECT COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo", // two COUNT DISTINCTs, same query
+        "SELECT dim1, COUNT(distinct dim1), COUNT(distinct dim2) FROM druid.foo GROUP BY dim1", // two COUNT DISTINCTs
+        "SELECT COUNT(distinct unique_dim1) FROM druid.foo" // COUNT DISTINCT on sketch cannot be exact
+    );
+
+    for (final String query : queries) {
+      assertQueryIsUnplannable(PLANNER_CONFIG_NO_HLL, query);
+    }
+  }
+
   private void assertQueryIsUnplannable(final String sql)
   {
     assertQueryIsUnplannable(PLANNER_CONFIG_DEFAULT, sql);
@@ -1869,6 +1893,84 @@ public void testCountDistinct() throws Exception
     );
   }
 
+  @Test
+  public void testExactCountDistinct() throws Exception
+  {
+    // When HLL is disabled, do exact count distinct through a nested query.
+
+    testQuery(
+        PLANNER_CONFIG_NO_HLL,
+        "SELECT COUNT(distinct dim2) FROM druid.foo",
+        ImmutableList.<Query>of(
+            GroupByQuery.builder()
+                        .setDataSource(
+                            new QueryDataSource(
+                                GroupByQuery.builder()
+                                            .setDataSource(CalciteTests.DATASOURCE1)
+                                            .setInterval(QSS(Filtration.eternity()))
+                                            .setGranularity(Granularities.ALL)
+                                            .setDimensions(DIMS(new DefaultDimensionSpec("dim2", "d0")))
+                                            .setContext(QUERY_CONTEXT_DEFAULT)
+                                            .build()
+                            )
+                        )
+                        .setInterval(QSS(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setAggregatorSpecs(AGGS(
+                            new CountAggregatorFactory("a0")
+                        ))
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{3L}
+        )
+    );
+  }
+
+  @Test
+  public void testExactCountDistinctWithGroupingAndOtherAggregators() throws Exception
+  {
+    // When HLL is disabled, do exact count distinct through a nested query.
+
+    testQuery(
+        PLANNER_CONFIG_NO_HLL,
+        "SELECT dim2, SUM(cnt), COUNT(distinct dim1) FROM druid.foo GROUP BY dim2",
+        ImmutableList.<Query>of(
+            GroupByQuery.builder()
+                        .setDataSource(
+                            new QueryDataSource(
+                                GroupByQuery.builder()
+                                            .setDataSource(CalciteTests.DATASOURCE1)
+                                            .setInterval(QSS(Filtration.eternity()))
+                                            .setGranularity(Granularities.ALL)
+                                            .setDimensions(DIMS(
+                                                new DefaultDimensionSpec("dim2", "d0"),
+                                                new DefaultDimensionSpec("dim1", "d1")
+                                            ))
+                                            .setAggregatorSpecs(AGGS(new LongSumAggregatorFactory("a0", "cnt")))
+                                            .setContext(QUERY_CONTEXT_DEFAULT)
+                                            .build()
+                            )
+                        )
+                        .setInterval(QSS(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setDimensions(DIMS(new DefaultDimensionSpec("d0", "d0")))
+                        .setAggregatorSpecs(AGGS(
+                            new LongSumAggregatorFactory("a0", "a0"),
+                            new CountAggregatorFactory("a1")
+                        ))
+                        .setContext(QUERY_CONTEXT_DEFAULT)
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"", 3L, 3L},
+            new Object[]{"a", 2L, 2L},
+            new Object[]{"abc", 1L, 1L}
+        )
+    );
+  }
+
   @Test
   public void testApproxCountDistinct() throws Exception
   {
@@ -2844,7 +2946,12 @@ public void testGroupByFloorWithOrderBy() throws Exception
                         .setGranularity(Granularities.ALL)
                         .setDimensions(
                             DIMS(
-                                new ExtractionDimensionSpec("dim1", "d0", ValueType.FLOAT, new BucketExtractionFn(1.0, 0.0))
+                                new ExtractionDimensionSpec(
+                                    "dim1",
+                                    "d0",
+                                    ValueType.FLOAT,
+                                    new BucketExtractionFn(1.0, 0.0)
+                                )
                             )
                         )
                         .setAggregatorSpecs(AGGS(new CountAggregatorFactory("a0")))