Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/content/querying/dimensionspecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ matches, otherwise returns null.
{ "type" : "searchQuery", "query" : <search_query_spec> }
```

### Substring Extraction Function

Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
If index is greater than the length of the dimension value, null will be returned.

```json
{ "type" : "substring", "index" : 1, "length" : 4 }
```

The length may be omitted for substring to return the remainder of the dimension value starting from index,
or null if index greater than the length of the dimension value.

```json
{ "type" : "substring", "index" : 3 }
```


### Time Format Extraction Function

Returns the dimension value formatted according to the given format string, time zone, and locale.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
@JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class),
@JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class),
@JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class),
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class)
@JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class),
@JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;

import javax.annotation.Nullable;
import java.nio.ByteBuffer;

/**
*/
public class SubstringDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_TYPE_ID = 0x8;

private final int index;
private final int end;

@JsonCreator
public SubstringDimExtractionFn(
@JsonProperty("index") int index,
@Nullable
@JsonProperty("length") Integer length
)
{

Preconditions.checkArgument(length == null || length > 0, "length must be strictly positive");

this.index = index;
this.end = length != null ? index + length : -1;
}

@Override
public byte[] getCacheKey()
{
return ByteBuffer.allocate(1 + 8)
.put(CACHE_TYPE_ID)
.putInt(this.index)
.putInt(this.end)
.array();
}

@Override
public String apply(String dimValue)
{
if (Strings.isNullOrEmpty(dimValue)) {
return null;
}

int len = dimValue.length();

if (index < len) {
if (end > 0) {
return dimValue.substring(index, Math.min(end, len));
} else {
return dimValue.substring(index);
}
} else {
return null;
}
}

@JsonProperty
public int getIndex()
{
return index;
}

@JsonProperty
public Integer getLength()
{
return end > -1 ? end - index : null;
}

@Override
public boolean preservesOrdering()
{
return index == 0 ? true : false;
}

@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

SubstringDimExtractionFn that = (SubstringDimExtractionFn) o;

if (index != that.index) {
return false;
}
return end == that.end;

}

@Override
public int hashCode()
{
int result = index;
result = 31 * result + end;
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;
import com.google.common.collect.Sets;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;

import java.util.Arrays;
import java.util.Set;

/**
*/
public class SubstringDimExtractionFnTest
{
@Test
public void testSubstrings()
{
ExtractionFn extractionFn = new SubstringDimExtractionFn(1, 3);

Assert.assertEquals("ppl", extractionFn.apply("apple"));
Assert.assertEquals("e", extractionFn.apply("be"));
Assert.assertEquals("ool", extractionFn.apply("cool"));
Assert.assertEquals(null, extractionFn.apply("a"));
}

@Test (expected = IllegalArgumentException.class)
public void testZeroLength() {
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,0);
}

@Test
public void testNoLength()
{
ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,null);

// 0 length substring returns remainder
Assert.assertEquals("abcdef", extractionFnNoLength.apply("/abcdef"));

// 0 length substring empty result is null
Assert.assertEquals(null, extractionFnNoLength.apply("/"));
}

@Test
public void testGetCacheKey()
{
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);

Assert.assertArrayEquals(extractionFn1.getCacheKey(), extractionFn2.getCacheKey());

Assert.assertFalse(Arrays.equals(extractionFn1.getCacheKey(), extractionFn3.getCacheKey()));
}

@Test
public void testHashCode()
{
ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4);
ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4);

Assert.assertEquals(extractionFn1.hashCode(), extractionFn2.hashCode());

Assert.assertNotEquals(extractionFn1.hashCode(), extractionFn3.hashCode());
}

@Test
public void testNullAndEmpty()
{
ExtractionFn extractionFn = new SubstringDimExtractionFn(2,4);
// no match, map empty input value to null
Assert.assertEquals(null, extractionFn.apply(""));
// null value, returns null
Assert.assertEquals(null, extractionFn.apply(null));
// empty match, map empty result to null
Assert.assertEquals(null, extractionFn.apply("/a"));
}

@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();

final String json = "{ \"type\" : \"substring\", \"index\" : 1, \"length\" : 3 }";
final String jsonNoLength = "{ \"type\" : \"substring\", \"index\" : 1 }";

SubstringDimExtractionFn extractionFn = (SubstringDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
SubstringDimExtractionFn extractionFnNoLength = (SubstringDimExtractionFn) objectMapper.readValue(jsonNoLength, ExtractionFn.class);

Assert.assertEquals(1, extractionFn.getIndex());
Assert.assertEquals(new Integer(3), extractionFn.getLength());
Assert.assertEquals(1, extractionFnNoLength.getIndex());
Assert.assertEquals(null, extractionFnNoLength.getLength());

// round trip
Assert.assertEquals(
extractionFn,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFn),
ExtractionFn.class
)
);

Assert.assertEquals(
extractionFnNoLength,
objectMapper.readValue(
objectMapper.writeValueAsBytes(extractionFnNoLength),
ExtractionFn.class
)
);
}
}