From 68ef5f437a455a75f4b688b36a659985efcbc5de Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 17 Nov 2015 19:11:37 -0800 Subject: [PATCH] fixes #1874 adding a substring extraction function, tests, and documentation --- docs/content/querying/dimensionspecs.md | 18 +++ .../druid/query/extraction/ExtractionFn.java | 3 +- .../extraction/SubstringDimExtractionFn.java | 133 ++++++++++++++++++ .../SubstringDimExtractionFnTest.java | 131 +++++++++++++++++ 4 files changed, 284 insertions(+), 1 deletion(-) create mode 100644 processing/src/main/java/io/druid/query/extraction/SubstringDimExtractionFn.java create mode 100644 processing/src/test/java/io/druid/query/extraction/SubstringDimExtractionFnTest.java diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index c350161d9cc9..8d16083ce3b1 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -72,6 +72,24 @@ matches, otherwise returns null. { "type" : "searchQuery", "query" : } ``` +### Substring Extraction Function + +Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired +length exceeds the length of the dimension value, the remainder of the string starting at index will be returned. +If index is greater than the length of the dimension value, null will be returned. + +```json +{ "type" : "substring", "index" : 1, "length" : 4 } +``` + +The length may be omitted for substring to return the remainder of the dimension value starting from index, +or null if index greater than the length of the dimension value. + +```json +{ "type" : "substring", "index" : 3 } +``` + + ### Time Format Extraction Function Returns the dimension value formatted according to the given format string, time zone, and locale. diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java index bec928f9d413..537a309690d2 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java @@ -31,7 +31,8 @@ @JsonSubTypes.Type(name = "javascript", value = JavascriptExtractionFn.class), @JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class), @JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class), - @JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class) + @JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class), + @JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class) }) /** * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension) diff --git a/processing/src/main/java/io/druid/query/extraction/SubstringDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/SubstringDimExtractionFn.java new file mode 100644 index 000000000000..0cb0c27803c3 --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/SubstringDimExtractionFn.java @@ -0,0 +1,133 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +/** + */ +public class SubstringDimExtractionFn extends DimExtractionFn +{ + private static final byte CACHE_TYPE_ID = 0x8; + + private final int index; + private final int end; + + @JsonCreator + public SubstringDimExtractionFn( + @JsonProperty("index") int index, + @Nullable + @JsonProperty("length") Integer length + ) + { + + Preconditions.checkArgument(length == null || length > 0, "length must be strictly positive"); + + this.index = index; + this.end = length != null ? index + length : -1; + } + + @Override + public byte[] getCacheKey() + { + return ByteBuffer.allocate(1 + 8) + .put(CACHE_TYPE_ID) + .putInt(this.index) + .putInt(this.end) + .array(); + } + + @Override + public String apply(String dimValue) + { + if (Strings.isNullOrEmpty(dimValue)) { + return null; + } + + int len = dimValue.length(); + + if (index < len) { + if (end > 0) { + return dimValue.substring(index, Math.min(end, len)); + } else { + return dimValue.substring(index); + } + } else { + return null; + } + } + + @JsonProperty + public int getIndex() + { + return index; + } + + @JsonProperty + public Integer getLength() + { + return end > -1 ? end - index : null; + } + + @Override + public boolean preservesOrdering() + { + return index == 0 ? true : false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.MANY_TO_ONE; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + SubstringDimExtractionFn that = (SubstringDimExtractionFn) o; + + if (index != that.index) { + return false; + } + return end == that.end; + + } + + @Override + public int hashCode() + { + int result = index; + result = 31 * result + end; + return result; + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/SubstringDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/SubstringDimExtractionFnTest.java new file mode 100644 index 000000000000..2f83c3f23e3c --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/SubstringDimExtractionFnTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; +import com.google.common.collect.Sets; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Set; + +/** + */ +public class SubstringDimExtractionFnTest +{ + @Test + public void testSubstrings() + { + ExtractionFn extractionFn = new SubstringDimExtractionFn(1, 3); + + Assert.assertEquals("ppl", extractionFn.apply("apple")); + Assert.assertEquals("e", extractionFn.apply("be")); + Assert.assertEquals("ool", extractionFn.apply("cool")); + Assert.assertEquals(null, extractionFn.apply("a")); + } + + @Test (expected = IllegalArgumentException.class) + public void testZeroLength() { + ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,0); + } + + @Test + public void testNoLength() + { + ExtractionFn extractionFnNoLength = new SubstringDimExtractionFn(1,null); + + // 0 length substring returns remainder + Assert.assertEquals("abcdef", extractionFnNoLength.apply("/abcdef")); + + // 0 length substring empty result is null + Assert.assertEquals(null, extractionFnNoLength.apply("/")); + } + + @Test + public void testGetCacheKey() + { + ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4); + ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4); + ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4); + + Assert.assertArrayEquals(extractionFn1.getCacheKey(), extractionFn2.getCacheKey()); + + Assert.assertFalse(Arrays.equals(extractionFn1.getCacheKey(), extractionFn3.getCacheKey())); + } + + @Test + public void testHashCode() + { + ExtractionFn extractionFn1 = new SubstringDimExtractionFn(2,4); + ExtractionFn extractionFn2 = new SubstringDimExtractionFn(2,4); + ExtractionFn extractionFn3 = new SubstringDimExtractionFn(1,4); + + Assert.assertEquals(extractionFn1.hashCode(), extractionFn2.hashCode()); + + Assert.assertNotEquals(extractionFn1.hashCode(), extractionFn3.hashCode()); + } + + @Test + public void testNullAndEmpty() + { + ExtractionFn extractionFn = new SubstringDimExtractionFn(2,4); + // no match, map empty input value to null + Assert.assertEquals(null, extractionFn.apply("")); + // null value, returns null + Assert.assertEquals(null, extractionFn.apply(null)); + // empty match, map empty result to null + Assert.assertEquals(null, extractionFn.apply("/a")); + } + + @Test + public void testSerde() throws Exception + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + + final String json = "{ \"type\" : \"substring\", \"index\" : 1, \"length\" : 3 }"; + final String jsonNoLength = "{ \"type\" : \"substring\", \"index\" : 1 }"; + + SubstringDimExtractionFn extractionFn = (SubstringDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class); + SubstringDimExtractionFn extractionFnNoLength = (SubstringDimExtractionFn) objectMapper.readValue(jsonNoLength, ExtractionFn.class); + + Assert.assertEquals(1, extractionFn.getIndex()); + Assert.assertEquals(new Integer(3), extractionFn.getLength()); + Assert.assertEquals(1, extractionFnNoLength.getIndex()); + Assert.assertEquals(null, extractionFnNoLength.getLength()); + + // round trip + Assert.assertEquals( + extractionFn, + objectMapper.readValue( + objectMapper.writeValueAsBytes(extractionFn), + ExtractionFn.class + ) + ); + + Assert.assertEquals( + extractionFnNoLength, + objectMapper.readValue( + objectMapper.writeValueAsBytes(extractionFnNoLength), + ExtractionFn.class + ) + ); + } +}