apache · xvrl · Feb 2, 2015 · Dec 24, 2014 · fjy · Dec 24, 2014
diff --git a/NOTICE b/NOTICE
@@ -0,0 +1,2 @@
+Druid - a distributed column store.
+Copyright 2012-2015 Metamarkets Group Inc.
diff --git a/common/src/main/java/io/druid/timeline/VersionedIntervalTimeline.java b/common/src/main/java/io/druid/timeline/VersionedIntervalTimeline.java
@@ -499,9 +499,4 @@ public PartitionHolder<ObjectType> getPartitionHolder()
       return partitionHolder;
     }
   }
-
-  public static void main(String[] args)
-  {
-    System.out.println(new Interval(new DateTime(), (DateTime) null));
-  }
 }
diff --git a/pom.xml b/pom.xml
@@ -587,6 +587,8 @@
                         <argLine>-Duser.language=en -Duser.country=US</argLine>
                         <systemPropertyVariables>
                             <user.timezone>UTC</user.timezone>
+                            <user.country>US</user.country>
+                            <user.language>en</user.language>
                         </systemPropertyVariables>
                     </configuration>
                 </plugin>

diff --git a/processing/pom.xml b/processing/pom.xml
@@ -81,7 +81,6 @@
       <artifactId>mapdb</artifactId>
     </dependency>
 
-
     <!-- Tests -->
     <dependency>
       <groupId>junit</groupId>

diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java
@@ -26,15 +26,48 @@
 @JsonSubTypes(value = {
     @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
     @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
-    @JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
+    @JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class),
     @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
     @JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
 })
+/**
+ * A DimExtractionFn is a function that can be used to modify the values of a dimension column.
+ *
+ * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
+ * regular expression with a capture group.  When the regular expression matches the value of a dimension,
+ * the value captured by the group is used for grouping operations instead of the dimension value.
+ */
 public interface DimExtractionFn
 {
+  /**
+   * Returns a byte[] unique to all concrete implementations of DimExtractionFn.  This byte[] is used to
+   * generate a cache key for the specific query.
+   *
+   * @return a byte[] unit to all concrete implements of DimExtractionFn
+   */
   public byte[] getCacheKey();
 
+  /**
+   * The "extraction" function.  This should map a dimension value into some other value.
+   *
+   * In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
+   * empty string is considered invalid output for this method and should instead return null.  This is
+   * a contract on the method rather than enforced at a lower level in order to eliminate a global check
+   * for extraction functions that do not already need one.
+   *
+   *
+   * @param dimValue the original value of the dimension
+   * @return a value that should be used instead of the original
+   */
   public String apply(String dimValue);
 
+  /**
+   * Offers information on whether the extraction will preserve the original ordering of the values.
+   *
+   * Some optimizations of queries is possible if ordering is preserved.  Null values *do* count towards
+   * ordering.
+   *
+   * @return true if ordering is preserved, false otherwise
+   */
   public boolean preservesOrdering();
 }
diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java
@@ -20,6 +20,9 @@
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.base.Function;
+import com.google.common.base.Strings;
+import com.metamx.common.StringUtils;
+import com.google.common.base.Strings;
 import com.metamx.common.StringUtils;
 import org.mozilla.javascript.Context;
 import org.mozilla.javascript.ContextFactory;
@@ -89,7 +92,8 @@ public byte[] getCacheKey()
   @Override
   public String apply(String dimValue)
   {
-    return fn.apply(dimValue);
+    String retVal = fn.apply(dimValue);
+    return Strings.isNullOrEmpty(retVal) ? null : retVal;
   }
 
   @Override

diff --git a/...ry/extraction/PartialDimExtractionFn.java → ...y/extraction/MatchingDimExtractionFn.java b/...ry/extraction/PartialDimExtractionFn.java → ...y/extraction/MatchingDimExtractionFn.java
@@ -27,15 +27,15 @@
 
 /**
  */
-public class PartialDimExtractionFn implements DimExtractionFn
+public class MatchingDimExtractionFn implements DimExtractionFn
 {
   private static final byte CACHE_TYPE_ID = 0x2;
 
   private final String expr;
   private final Pattern pattern;
 
   @JsonCreator
-  public PartialDimExtractionFn(
+  public MatchingDimExtractionFn(
       @JsonProperty("expr") String expr
   )
   {
@@ -70,7 +70,7 @@ public String getExpr()
   @Override
   public boolean preservesOrdering()
   {
-    return true;
+    return false;
   }
 
   @Override

diff --git a/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java b/processing/src/main/java/io/druid/query/filter/BitmapIndexSelector.java
@@ -30,6 +30,5 @@ public interface BitmapIndexSelector
   public int getNumRows();
   public BitmapFactory getBitmapFactory();
   public ImmutableBitmap getBitmapIndex(String dimension, String value);
-  public ImmutableBitmap getBitmapIndex(String dimension, int idx);
   public ImmutableRTree getSpatialIndex(String dimension);
 }
diff --git a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQuery.java
@@ -101,6 +101,11 @@ public List<PostAggregator> getPostAggregatorSpecs()
     return postAggregatorSpecs;
   }
 
+  public boolean isSkipEmptyBuckets()
+  {
+    return Boolean.parseBoolean(getContextValue("skipEmptyBuckets", "false"));
+  }
+
   public TimeseriesQuery withQuerySegmentSpec(QuerySegmentSpec querySegmentSpec)
   {
     return new TimeseriesQuery(

diff --git a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryEngine.java
@@ -49,12 +49,18 @@ public Sequence<Result<TimeseriesResultValue>> process(final TimeseriesQuery que
         query.getGranularity(),
         new Function<Cursor, Result<TimeseriesResultValue>>()
         {
+          private final boolean skipEmptyBuckets = query.isSkipEmptyBuckets();
           private final List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();
 
           @Override
           public Result<TimeseriesResultValue> apply(Cursor cursor)
           {
             Aggregator[] aggregators = QueryRunnerHelper.makeAggregators(cursor, aggregatorSpecs);
+
+            if (skipEmptyBuckets && cursor.isDone()) {
+              return null;
+            }
+
             try {
               while (!cursor.isDone()) {
                 for (Aggregator aggregator : aggregators) {

diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java
@@ -103,10 +103,6 @@ public void scanAndAggregate(
         Aggregator[] theAggregators = rowSelector[dimIndex];
         if (theAggregators == null) {
           String key = query.getDimensionSpec().getDimExtractionFn().apply(dimSelector.lookupName(dimIndex));
-          if (key == null) {
-            rowSelector[dimIndex] = EMPTY_ARRAY;
-            continue;
-          }
           theAggregators = aggregatesStore.get(key);
           if (theAggregators == null) {
             theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());

diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java
@@ -277,7 +277,7 @@ public Map<String, Object> apply(DimensionAndMetricValueExtractor input)
             )
         );
 
-        return new Result<TopNResultValue>(
+        return new Result<>(
             result.getTimestamp(),
             new TopNResultValue(serializedValues)
         );
@@ -400,26 +400,23 @@ public Result<TopNResultValue> apply(Object input)
               retVal.add(vals);
             }
 
-            return new Result<TopNResultValue>(timestamp, new TopNResultValue(retVal));
+            return new Result<>(timestamp, new TopNResultValue(retVal));
           }
         };
       }
 
       @Override
       public Sequence<Result<TopNResultValue>> mergeSequences(Sequence<Sequence<Result<TopNResultValue>>> seqOfSequences)
       {
-        return new MergeSequence<Result<TopNResultValue>>(getOrdering(), seqOfSequences);
+        return new MergeSequence<>(getOrdering(), seqOfSequences);
       }
     };
   }
 
   @Override
   public QueryRunner<Result<TopNResultValue>> preMergeQueryDecoration(QueryRunner<Result<TopNResultValue>> runner)
   {
-    return new IntervalChunkingQueryRunner<Result<TopNResultValue>>(
-        runner,
-        config.getChunkPeriod()
-    );
+    return new IntervalChunkingQueryRunner<>(runner, config.getChunkPeriod());
   }
 
   @Override
@@ -485,7 +482,7 @@ public Result<TopNResultValue> apply(Result<TopNResultValue> input)
                               @Override
                               public Result<TopNResultValue> apply(Result<TopNResultValue> input)
                               {
-                                return new Result<TopNResultValue>(
+                                return new Result<>(
                                     input.getTimestamp(),
                                     new TopNResultValue(
                                         Lists.<Object>newArrayList(
@@ -505,7 +502,7 @@ public Result<TopNResultValue> apply(Result<TopNResultValue> input)
                 );
               }
 
-              return new Result<TopNResultValue>(
+              return new Result<>(
                   input.getTimestamp(),
                   new TopNResultValue(
                       Lists.<Object>newArrayList(

diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryRunnerFactory.java b/processing/src/main/java/io/druid/query/topn/TopNQueryRunnerFactory.java
@@ -82,7 +82,7 @@ public QueryRunner<Result<TopNResultValue>> mergeRunners(
       ExecutorService queryExecutor, Iterable<QueryRunner<Result<TopNResultValue>>> queryRunners
   )
   {
-    return new ChainedExecutionQueryRunner<Result<TopNResultValue>>(
+    return new ChainedExecutionQueryRunner<>(
         queryExecutor, toolchest.getOrdering(), queryWatcher, queryRunners
     );
   }

diff --git a/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java b/processing/src/main/java/io/druid/segment/ColumnSelectorBitmapIndexSelector.java
@@ -17,6 +17,7 @@
 
 package io.druid.segment;
 
+import com.google.common.base.Strings;
 import com.metamx.collections.bitmap.BitmapFactory;
 import com.metamx.collections.bitmap.ImmutableBitmap;
 import com.metamx.collections.spatial.ImmutableRTree;
@@ -112,30 +113,18 @@ public ImmutableBitmap getBitmapIndex(String dimension, String value)
   {
     final Column column = index.getColumn(dimension);
     if (column == null) {
-      return bitmapFactory.makeEmptyImmutableBitmap();
-    }
-    if (!column.getCapabilities().hasBitmapIndexes()) {
-      bitmapFactory.makeEmptyImmutableBitmap();
+      if (Strings.isNullOrEmpty(value)) {
+        return bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), getNumRows());
+      } else {
+        return bitmapFactory.makeEmptyImmutableBitmap();
+      }
     }
 
-    return column.getBitmapIndex().getBitmap(value);
-  }
-
-  @Override
-  public ImmutableBitmap getBitmapIndex(String dimension, int idx)
-  {
-    final Column column = index.getColumn(dimension);
-    if (column == null || column.getCapabilities() == null) {
-      bitmapFactory.makeEmptyImmutableBitmap();
-    }
     if (!column.getCapabilities().hasBitmapIndexes()) {
       bitmapFactory.makeEmptyImmutableBitmap();
     }
 
-    // This is a workaround given the current state of indexing, I feel shame
-    final int index1 = column.getBitmapIndex().hasNulls() ? idx + 1 : idx;
-
-    return column.getBitmapIndex().getBitmap(index1);
+    return column.getBitmapIndex().getBitmap(value);
   }
 
   @Override

diff --git a/processing/src/main/java/io/druid/segment/NullDimensionSelector.java b/processing/src/main/java/io/druid/segment/NullDimensionSelector.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to Metamarkets Group Inc. (Metamarkets) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  Metamarkets licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package io.druid.segment;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Iterators;
+import io.druid.segment.data.IndexedInts;
+
+import java.util.Iterator;
+
+public class NullDimensionSelector implements DimensionSelector
+{
+
+  private static final IndexedInts SINGLETON = new IndexedInts() {
+    @Override
+    public int size() {
+      return 1;
+    }
+
+    @Override
+    public int get(int index) {
+      return 0;
+    }
+
+    @Override
+    public Iterator<Integer> iterator() {
+      return Iterators.singletonIterator(0);
+    }
+  };
+
+  @Override
+  public IndexedInts getRow()
+  {
+    return SINGLETON;
+  }
+
+  @Override
+  public int getValueCardinality()
+  {
+    return 1;
+  }
+
+  @Override
+  public String lookupName(int id)
+  {
+    return null;
+  }
+
+  @Override
+  public int lookupId(String name)
+  {
+    return Strings.isNullOrEmpty(name) ? 0 : -1;
+  }
+}
diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java
@@ -49,6 +49,8 @@
  */
 public class QueryableIndexStorageAdapter implements StorageAdapter
 {
+  private static final NullDimensionSelector NULL_DIMENSION_SELECTOR = new NullDimensionSelector();
+
   private final QueryableIndex index;
 
   public QueryableIndexStorageAdapter(
@@ -274,17 +276,21 @@ public DimensionSelector makeDimensionSelector(String dimension)
                     {
                       DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension);
                       final Column columnDesc = index.getColumn(dimension);
+                      if (columnDesc == null) {
+                        return NULL_DIMENSION_SELECTOR;
+                      }
 
-                      if (cachedColumn == null && columnDesc != null) {
+                      if (cachedColumn == null) {
                         cachedColumn = columnDesc.getDictionaryEncoding();
                         dictionaryColumnCache.put(dimension, cachedColumn);
                       }
 
                       final DictionaryEncodedColumn column = cachedColumn;
 
                       if (column == null) {
-                        return null;
-                      } else if (columnDesc.getCapabilities().hasMultipleValues()) {
+                        return NULL_DIMENSION_SELECTOR;
+                      }
+                      else if (columnDesc.getCapabilities().hasMultipleValues()) {
                         return new DimensionSelector()
                         {
                           @Override
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Druid - a distributed column store.
		Copyright 2012-2015 Metamarkets Group Inc.