From b4436e5a89ebf73f0b6210184bbd6364eee97b20 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 2 Dec 2016 10:53:07 -0800 Subject: [PATCH] Add "strlen" extractionFn. --- docs/content/querying/dimensionspecs.md | 21 +++++- .../extraction/ExtractionCacheHelper.java | 1 + .../druid/query/extraction/ExtractionFn.java | 3 +- .../query/extraction/StrlenExtractionFn.java | 61 +++++++++++++++++ .../extraction/StrlenExtractionFnTest.java | 65 +++++++++++++++++++ 5 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 processing/src/main/java/io/druid/query/extraction/StrlenExtractionFn.java create mode 100644 processing/src/test/java/io/druid/query/extraction/StrlenExtractionFnTest.java diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index 7e1334b173bc..b226b008c988 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -150,9 +150,13 @@ matches, otherwise returns null. ### Substring Extraction Function -Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired -length exceeds the length of the dimension value, the remainder of the string starting at index will be returned. -If index is greater than the length of the dimension value, null will be returned. +Returns a substring of the dimension value starting from the supplied index and of the desired length. Both index +and length are measured in the number of Unicode code units present in the string as if it were encoded in UTF-16. +Note that some Unicode characters may be represented by two code units. This is the same behavior as the Java String +class's "substring" method. + +If the desired length exceeds the length of the dimension value, the remainder of the string starting at index will +be returned. If index is greater than the length of the dimension value, null will be returned. ```json { "type" : "substring", "index" : 1, "length" : 4 } @@ -165,6 +169,17 @@ or null if index greater than the length of the dimension value. { "type" : "substring", "index" : 3 } ``` +### Strlen Extraction Function + +Returns the length of dimension values, as measured in the number of Unicode code units present in the string as if it +were encoded in UTF-16. Note that some Unicode characters may be represented by two code units. This is the same +behavior as the Java String class's "length" method. + +null strings are considered as having zero length. + +```json +{ "type" : "strlen" } +``` ### Time Format Extraction Function diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java index dd422f62882c..b96f0177818e 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionCacheHelper.java @@ -38,4 +38,5 @@ public class ExtractionCacheHelper public static final byte CACHE_TYPE_ID_UPPER = 0xB; public static final byte CACHE_TYPE_ID_LOWER = 0xC; public static final byte CACHE_TYPE_ID_BUCKET = 0xD; + public static final byte CACHE_TYPE_ID_STRLEN = 0xE; } diff --git a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java index e8b1cbfb8209..33b8f6c7d374 100644 --- a/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/ExtractionFn.java @@ -42,7 +42,8 @@ @JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class), @JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class), @JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class), - @JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class) + @JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class), + @JsonSubTypes.Type(name = "strlen", value = StrlenExtractionFn.class) }) /** * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension) diff --git a/processing/src/main/java/io/druid/query/extraction/StrlenExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/StrlenExtractionFn.java new file mode 100644 index 000000000000..d863a6dff760 --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/StrlenExtractionFn.java @@ -0,0 +1,61 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonCreator; + +public class StrlenExtractionFn extends DimExtractionFn +{ + private static final StrlenExtractionFn INSTANCE = new StrlenExtractionFn(); + + private StrlenExtractionFn() + { + } + + @JsonCreator + public static StrlenExtractionFn instance() + { + return INSTANCE; + } + + @Override + public String apply(String value) + { + return String.valueOf(value == null ? 0 : value.length()); + } + + @Override + public boolean preservesOrdering() + { + return false; + } + + @Override + public ExtractionType getExtractionType() + { + return ExtractionType.MANY_TO_ONE; + } + + @Override + public byte[] getCacheKey() + { + return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_STRLEN}; + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/StrlenExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/StrlenExtractionFnTest.java new file mode 100644 index 000000000000..08b102c6cfb2 --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/StrlenExtractionFnTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.extraction; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +public class StrlenExtractionFnTest +{ + @Test + public void testApply() + { + Assert.assertEquals("0", StrlenExtractionFn.instance().apply(null)); + Assert.assertEquals("0", StrlenExtractionFn.instance().apply("")); + Assert.assertEquals("1", StrlenExtractionFn.instance().apply("x")); + Assert.assertEquals("3", StrlenExtractionFn.instance().apply("foo")); + Assert.assertEquals("3", StrlenExtractionFn.instance().apply("föo")); + Assert.assertEquals("2", StrlenExtractionFn.instance().apply("\uD83D\uDE02")); + Assert.assertEquals("1", StrlenExtractionFn.instance().apply(1)); + Assert.assertEquals("2", StrlenExtractionFn.instance().apply(-1)); + } + + @Test + public void testGetCacheKey() + { + Assert.assertArrayEquals(StrlenExtractionFn.instance().getCacheKey(), StrlenExtractionFn.instance().getCacheKey()); + } + + @Test + public void testSerde() throws Exception + { + final ObjectMapper objectMapper = new DefaultObjectMapper(); + + final String json = "{ \"type\" : \"strlen\" }"; + + StrlenExtractionFn extractionFn = (StrlenExtractionFn) objectMapper.readValue(json, ExtractionFn.class); + StrlenExtractionFn extractionFnRoundTrip = (StrlenExtractionFn) objectMapper.readValue( + objectMapper.writeValueAsString(extractionFn), + ExtractionFn.class + ); + + // Should all actually be the same instance. + Assert.assertTrue(extractionFn == extractionFnRoundTrip); + Assert.assertTrue(extractionFn == StrlenExtractionFn.instance()); + } +}