Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion docs/content/querying/dimensionspecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,39 @@ This allows distinguishing between a null dimension and a lookup resulting in a
For example, specifying `{"":"bar","bat":"baz"}` with dimension values `[null, "foo", "bat"]` and replacing missing values with `"oof"` will yield results of `["bar", "oof", "baz"]`.
Omitting the empty string key will cause the missing value to take over. For example, specifying `{"bat":"baz"}` with dimension values `[null, "foo", "bat"]` and replacing missing values with `"oof"` will yield results of `["oof", "oof", "baz"]`.

### Cascade Extraction Function

Provides chained execution of extraction functions.

A property of `extractionFns` contains an array of any extraction functions, which is executed in the array index order.

Example for chaining [regular expression extraction function](#regular-expression-extraction-function), [javascript extraction function](#javascript-extraction-function), and [substring extraction function](#substring-extraction-function) is as followings.

```json
{
"type" : "cascade",
"extractionFns": [
{
"type" : "regex",
"expr" : "/([^/]+)/",
"replaceMissingValues": false,
"replaceMissingValuesWith": null
},
{
"type" : "javascript",
"function" : "function(str) { return \"the \".concat(str) }"
},
{
"type" : "substring",
"index" : 0, "length" : 7
}
]
}
```

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be really cool if the example showed the dimension value and the final transformed value

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fjy I added an example.

It will transform dimension values with specified extraction functions in the order named.
For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, javascript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`.

### Filtering DimensionSpecs

These are only valid for multi-valued dimensions. If you have a row in druid that has a multi-valued dimension with values ["v1", "v2", "v3"] and you send a groupBy/topN query grouping by that dimension with [query filter](filter.html) for value "v1". In the response you will get 3 rows containing "v1", "v2" and "v3". This behavior might be unintuitive for some use cases.
Expand All @@ -317,7 +350,7 @@ Then groupBy/topN processing pipeline "explodes" all multi-valued dimensions res

In addition to "query filter" which efficiently selects the rows to be processed, you can use the filtering dimension spec to filter for specific values within the values of a multi-valued dimension. These dimensionSpecs take a delegate DimensionSpec and a filtering criteria. From the "exploded" rows, only rows matching the given filtering criteria are returned in the query result.

The following filtered dimension spec acts as a whiltelist or blacklist for values as per the "isWhitelist" attribute value.
The following filtered dimension spec acts as a whitelist or blacklist for values as per the "isWhitelist" attribute value.
```json
{ "type" : "listFiltered", "delegate" : <dimensionSpec>, "values": <array of strings>, "isWhitelist": <optional attribute for true/false, default is true> }
```
Expand Down
2 changes: 1 addition & 1 deletion docs/content/querying/lookups.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ The `simpleJson` lookupParseSpec does not take any parameters. It is simply a li

```json
"namespaceParseSpec":{
"type": "simpleJson"
"format": "simpleJson"
}
```

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Bytes;

import java.util.Arrays;

public class CascadeExtractionFn implements ExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x9;

private final ExtractionFn extractionFns[];
private final ChainedExtractionFn chainedExtractionFn;
private final ChainedExtractionFn DEFAULT_CHAINED_EXTRACTION_FN = new ChainedExtractionFn(
new ExtractionFn() {
public byte[] getCacheKey() {
return new byte[0];
}

public String apply(Object value) {
return null;
}

public String apply(String value) {
return null;
}

public String apply(long value) {
return null;
}

public boolean preservesOrdering() {
return false;
}

public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}

@Override
public String toString() {
return "nullExtractionFn{}";
}
},
null
);

@JsonCreator
public CascadeExtractionFn(
@JsonProperty("extractionFns") ExtractionFn[] extractionFn
)
{
Preconditions.checkArgument(extractionFn != null, "extractionFns should not be null");
this.extractionFns = extractionFn;
if (extractionFns.length == 0) {
this.chainedExtractionFn = DEFAULT_CHAINED_EXTRACTION_FN;
} else {
ChainedExtractionFn root = null;
for (int idx = 0; idx < extractionFns.length; idx++) {
Preconditions.checkArgument(extractionFns[idx] != null, "empty function is not allowed");
root = new ChainedExtractionFn(extractionFns[idx], root);
}
this.chainedExtractionFn = root;
}
}

@JsonProperty
public ExtractionFn[] getExtractionFns() {
return extractionFns;
}

@Override
public byte[] getCacheKey() {
byte[] cacheKey = new byte[] {CACHE_TYPE_ID};

return Bytes.concat(cacheKey, chainedExtractionFn.getCacheKey());
}

@Override
public String apply(Object value) {
return chainedExtractionFn.apply(value);
}

@Override
public String apply(String value){
return chainedExtractionFn.apply(value);
}

@Override
public String apply(long value){
return chainedExtractionFn.apply(value);
}

@Override
public boolean preservesOrdering(){
return chainedExtractionFn.preservesOrdering();
}

@Override
public ExtractionType getExtractionType(){
return chainedExtractionFn.getExtractionType();
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

CascadeExtractionFn that = (CascadeExtractionFn) o;

if (!Arrays.equals(extractionFns, that.extractionFns)) {
return false;
}
if (!chainedExtractionFn.equals(that.chainedExtractionFn)) {
return false;
}

return true;
}

@Override
public int hashCode()
{
return chainedExtractionFn.hashCode();
}

@Override
public String toString() {
return "CascadeExtractionFn{" +
"extractionFns=[" + chainedExtractionFn.toString() + "]}";
}

private class ChainedExtractionFn {
private final ExtractionFn fn;
private final ChainedExtractionFn child;

public ChainedExtractionFn(ExtractionFn fn, ChainedExtractionFn child) {
this.fn = fn;
this.child = child;
}

public byte[] getCacheKey() {
byte[] fnCacheKey = fn.getCacheKey();

return (child != null) ? Bytes.concat(fnCacheKey, child.getCacheKey()) : fnCacheKey;
}

public String apply(Object value) {
return fn.apply((child != null) ? child.apply(value) : value);
}

public String apply(String value){
return fn.apply((child != null) ? child.apply(value) : value);
}

public String apply(long value){
return fn.apply((child != null) ? child.apply(value) : value);
}

public boolean preservesOrdering(){
boolean childPreservesOrdering = (child == null) || child.preservesOrdering();
return fn.preservesOrdering() && childPreservesOrdering;
}

public ExtractionType getExtractionType(){
if (child != null && child.getExtractionType() == ExtractionType.MANY_TO_ONE) {
return ExtractionType.MANY_TO_ONE;
} else {
return fn.getExtractionType();
}
}

public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

ChainedExtractionFn that = (ChainedExtractionFn) o;

if (!fn.equals(that.fn)) {
return false;
}
if (child != null && !child.equals(that.child)) {
return false;
}

return true;
}

public int hashCode()
{
int result = fn.hashCode();
if (child != null) {
result = 31 * result + child.hashCode();
}
return result;
}

public String toString() {
return (child != null)
? Joiner.on(",").join(child.toString(), fn.toString())
: fn.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class)
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class),
@JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
Expand Down
Loading