Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Druid - a distributed column store.
Copyright 2012-2015 Metamarkets Group Inc.
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,4 @@ public PartitionHolder<ObjectType> getPartitionHolder()
return partitionHolder;
}
}

public static void main(String[] args)
{
System.out.println(new Interval(new DateTime(), (DateTime) null));
}
}
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,8 @@
<argLine>-Duser.language=en -Duser.country=US</argLine>
<systemPropertyVariables>
<user.timezone>UTC</user.timezone>
<user.country>US</user.country>
<user.language>en</user.language>
</systemPropertyVariables>
</configuration>
</plugin>
Expand Down
1 change: 0 additions & 1 deletion processing/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@
<artifactId>mapdb</artifactId>
</dependency>


<!-- Tests -->
<dependency>
<groupId>junit</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,48 @@
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class),
@JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class),
@JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class),
@JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class),
@JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class),
@JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class)
})
/**
* A DimExtractionFn is a function that can be used to modify the values of a dimension column.
*
* A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a
* regular expression with a capture group. When the regular expression matches the value of a dimension,
* the value captured by the group is used for grouping operations instead of the dimension value.
*/
public interface DimExtractionFn
{
/**
* Returns a byte[] unique to all concrete implementations of DimExtractionFn. This byte[] is used to
* generate a cache key for the specific query.
*
* @return a byte[] unit to all concrete implements of DimExtractionFn
*/
public byte[] getCacheKey();

/**
* The "extraction" function. This should map a dimension value into some other value.
*
* In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the
* empty string is considered invalid output for this method and should instead return null. This is
* a contract on the method rather than enforced at a lower level in order to eliminate a global check
* for extraction functions that do not already need one.
*
*
* @param dimValue the original value of the dimension
* @return a value that should be used instead of the original
*/
public String apply(String dimValue);

/**
* Offers information on whether the extraction will preserve the original ordering of the values.
*
* Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards
* ordering.
*
* @return true if ordering is preserved, false otherwise
*/
public boolean preservesOrdering();
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Function;
import com.google.common.base.Strings;
import com.metamx.common.StringUtils;
import com.google.common.base.Strings;
import com.metamx.common.StringUtils;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.ContextFactory;
Expand Down Expand Up @@ -89,7 +92,8 @@ public byte[] getCacheKey()
@Override
public String apply(String dimValue)
{
return fn.apply(dimValue);
String retVal = fn.apply(dimValue);
return Strings.isNullOrEmpty(retVal) ? null : retVal;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@

/**
*/
public class PartialDimExtractionFn implements DimExtractionFn
public class MatchingDimExtractionFn implements DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x2;

private final String expr;
private final Pattern pattern;

@JsonCreator
public PartialDimExtractionFn(
public MatchingDimExtractionFn(
@JsonProperty("expr") String expr
)
{
Expand Down Expand Up @@ -70,7 +70,7 @@ public String getExpr()
@Override
public boolean preservesOrdering()
{
return true;
return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,5 @@ public interface BitmapIndexSelector
public int getNumRows();
public BitmapFactory getBitmapFactory();
public ImmutableBitmap getBitmapIndex(String dimension, String value);
public ImmutableBitmap getBitmapIndex(String dimension, int idx);
public ImmutableRTree getSpatialIndex(String dimension);
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ public List<PostAggregator> getPostAggregatorSpecs()
return postAggregatorSpecs;
}

public boolean isSkipEmptyBuckets()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this only on timeseries query and not other query types?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe the other queries generate empty data entries for time buckets that don't have data, has that changed?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the behavior of groupBy with no dimensions?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only creates an entry for each time bucket that actually exists, iirc

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gianm and I fixed some inconsistencies related to empty time buckets when buckets at query interval boundaries and maxtime don't line up, see #705.
It is still not consistent with groupBy, which confuses users (#701).

Do we need this flag for topN as well?

I believe things would be more consistent overall if we skipped empty buckets by default, since if data is missing for an entire segment granularity, those buckets will be missing anyway, and I don't believe results should depend on segment granularity.

I would be in favor to skip empty buckets by default in 0.7, but we may want to make that change as part of a separate PR.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xvrl re: topN

I don't believe the other queries generate empty data entries for time buckets that don't have data, has that changed?

And yes, skipping empty buckets by default is what timeseries initially did, then I had it auto-generate empty values as an indirect mechanism of figuring out if a segment exists or not (i.e. it will generate 0's if the segment exists and there just is nothing there, where if the segment doesn't exist then it won't generate anything). This proved to be not enough to determine that a segment isn't actually there, though. So, while I agree with you in principle that switching back to the original "never generate empty values" behavior is more correct, the fact is that there might be people who are expecting those values to be generated for them and making this change in a backwards-incompatible manner could make it very difficult for them to actually move forward.

If we want to make the change to timeseries defaulting to not generating anything, that should be done in a subsequent version. That allows people using the system some time to set this parameter first and rework their systems before changing the default.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, we don't have to make those changes in 0.7, only if we felt strongly about making things more consistent and did not want to wait for 0.8 to make that change.

{
return Boolean.parseBoolean(getContextValue("skipEmptyBuckets", "false"));
}

public TimeseriesQuery withQuerySegmentSpec(QuerySegmentSpec querySegmentSpec)
{
return new TimeseriesQuery(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,18 @@ public Sequence<Result<TimeseriesResultValue>> process(final TimeseriesQuery que
query.getGranularity(),
new Function<Cursor, Result<TimeseriesResultValue>>()
{
private final boolean skipEmptyBuckets = query.isSkipEmptyBuckets();
private final List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();

@Override
public Result<TimeseriesResultValue> apply(Cursor cursor)
{
Aggregator[] aggregators = QueryRunnerHelper.makeAggregators(cursor, aggregatorSpecs);

if (skipEmptyBuckets && cursor.isDone()) {
return null;
}

try {
while (!cursor.isDone()) {
for (Aggregator aggregator : aggregators) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,6 @@ public void scanAndAggregate(
Aggregator[] theAggregators = rowSelector[dimIndex];
if (theAggregators == null) {
String key = query.getDimensionSpec().getDimExtractionFn().apply(dimSelector.lookupName(dimIndex));
if (key == null) {
rowSelector[dimIndex] = EMPTY_ARRAY;
continue;
}
theAggregators = aggregatesStore.get(key);
if (theAggregators == null) {
theAggregators = makeAggregators(cursor, query.getAggregatorSpecs());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ public Map<String, Object> apply(DimensionAndMetricValueExtractor input)
)
);

return new Result<TopNResultValue>(
return new Result<>(
result.getTimestamp(),
new TopNResultValue(serializedValues)
);
Expand Down Expand Up @@ -400,26 +400,23 @@ public Result<TopNResultValue> apply(Object input)
retVal.add(vals);
}

return new Result<TopNResultValue>(timestamp, new TopNResultValue(retVal));
return new Result<>(timestamp, new TopNResultValue(retVal));
}
};
}

@Override
public Sequence<Result<TopNResultValue>> mergeSequences(Sequence<Sequence<Result<TopNResultValue>>> seqOfSequences)
{
return new MergeSequence<Result<TopNResultValue>>(getOrdering(), seqOfSequences);
return new MergeSequence<>(getOrdering(), seqOfSequences);
}
};
}

@Override
public QueryRunner<Result<TopNResultValue>> preMergeQueryDecoration(QueryRunner<Result<TopNResultValue>> runner)
{
return new IntervalChunkingQueryRunner<Result<TopNResultValue>>(
runner,
config.getChunkPeriod()
);
return new IntervalChunkingQueryRunner<>(runner, config.getChunkPeriod());
}

@Override
Expand Down Expand Up @@ -485,7 +482,7 @@ public Result<TopNResultValue> apply(Result<TopNResultValue> input)
@Override
public Result<TopNResultValue> apply(Result<TopNResultValue> input)
{
return new Result<TopNResultValue>(
return new Result<>(
input.getTimestamp(),
new TopNResultValue(
Lists.<Object>newArrayList(
Expand All @@ -505,7 +502,7 @@ public Result<TopNResultValue> apply(Result<TopNResultValue> input)
);
}

return new Result<TopNResultValue>(
return new Result<>(
input.getTimestamp(),
new TopNResultValue(
Lists.<Object>newArrayList(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public QueryRunner<Result<TopNResultValue>> mergeRunners(
ExecutorService queryExecutor, Iterable<QueryRunner<Result<TopNResultValue>>> queryRunners
)
{
return new ChainedExecutionQueryRunner<Result<TopNResultValue>>(
return new ChainedExecutionQueryRunner<>(
queryExecutor, toolchest.getOrdering(), queryWatcher, queryRunners
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package io.druid.segment;

import com.google.common.base.Strings;
import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
Expand Down Expand Up @@ -112,30 +113,18 @@ public ImmutableBitmap getBitmapIndex(String dimension, String value)
{
final Column column = index.getColumn(dimension);
if (column == null) {
return bitmapFactory.makeEmptyImmutableBitmap();
}
if (!column.getCapabilities().hasBitmapIndexes()) {
bitmapFactory.makeEmptyImmutableBitmap();
if (Strings.isNullOrEmpty(value)) {
return bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), getNumRows());
} else {
return bitmapFactory.makeEmptyImmutableBitmap();
}
}

return column.getBitmapIndex().getBitmap(value);
}

@Override
public ImmutableBitmap getBitmapIndex(String dimension, int idx)
{
final Column column = index.getColumn(dimension);
if (column == null || column.getCapabilities() == null) {
bitmapFactory.makeEmptyImmutableBitmap();
}
if (!column.getCapabilities().hasBitmapIndexes()) {
bitmapFactory.makeEmptyImmutableBitmap();
}

// This is a workaround given the current state of indexing, I feel shame
final int index1 = column.getBitmapIndex().hasNulls() ? idx + 1 : idx;

return column.getBitmapIndex().getBitmap(index1);
return column.getBitmapIndex().getBitmap(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.segment;

import com.google.common.base.Strings;
import com.google.common.collect.Iterators;
import io.druid.segment.data.IndexedInts;

import java.util.Iterator;

public class NullDimensionSelector implements DimensionSelector
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Class is lacking tests.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, I think I found them as part of Timeseries query runner test.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no direct tests, that's true. It is tested through the various queries, but some direct tests would probably also be meaningful.

{

private static final IndexedInts SINGLETON = new IndexedInts() {
@Override
public int size() {
return 1;
}

@Override
public int get(int index) {
return 0;
}

@Override
public Iterator<Integer> iterator() {
return Iterators.singletonIterator(0);
}
};

@Override
public IndexedInts getRow()
{
return SINGLETON;
}

@Override
public int getValueCardinality()
{
return 1;
}

@Override
public String lookupName(int id)
{
return null;
}

@Override
public int lookupId(String name)
{
return Strings.isNullOrEmpty(name) ? 0 : -1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
*/
public class QueryableIndexStorageAdapter implements StorageAdapter
{
private static final NullDimensionSelector NULL_DIMENSION_SELECTOR = new NullDimensionSelector();

private final QueryableIndex index;

public QueryableIndexStorageAdapter(
Expand Down Expand Up @@ -274,17 +276,21 @@ public DimensionSelector makeDimensionSelector(String dimension)
{
DictionaryEncodedColumn cachedColumn = dictionaryColumnCache.get(dimension);
final Column columnDesc = index.getColumn(dimension);
if (columnDesc == null) {
return NULL_DIMENSION_SELECTOR;
}

if (cachedColumn == null && columnDesc != null) {
if (cachedColumn == null) {
cachedColumn = columnDesc.getDictionaryEncoding();
dictionaryColumnCache.put(dimension, cachedColumn);
}

final DictionaryEncodedColumn column = cachedColumn;

if (column == null) {
return null;
} else if (columnDesc.getCapabilities().hasMultipleValues()) {
return NULL_DIMENSION_SELECTOR;
}
else if (columnDesc.getCapabilities().hasMultipleValues()) {
return new DimensionSelector()
{
@Override
Expand Down
Loading