Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ This product contains a modified version of Andrew Duffy's java-alphanum library
* https://github.com/amjjd/java-alphanum/blob/5c036e2e492cc7f3b7bcdebd46b8f9e2a87927e5/LICENSE.txt (Apache License, Version 2.0)
* HOMEPAGE:
* https://github.com/amjjd/java-alphanum

-------------------------------------------------------------------------------

This product contains codes from Apache hive under the Apache License, Version 2.0

27 changes: 27 additions & 0 deletions common/src/main/java/io/druid/cache/Cacheable.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.cache;

/**
*/
public interface Cacheable
{
byte[] getCacheKey();
}
100 changes: 100 additions & 0 deletions common/src/main/java/io/druid/collections/IterableUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.collections;

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Iterator;

/**
*/
public class IterableUtils
{
// simple cartesian iterable
public static Iterable<Object[]> cartesian(final Iterable... iterables)
{
return cartesian(Object.class, iterables);
}

@SafeVarargs
public static <T> Iterable<T[]> cartesian(final Class<T> clazz, final Iterable<T>... iterables)
{
return new Iterable<T[]>()
{
@Override
public Iterator<T[]> iterator()
{
return new Iterator<T[]>()
{
private final Iterator<T>[] iterators = new Iterator[iterables.length];

private final T[] cached = (T[]) Array.newInstance(clazz, iterables.length);
private final BitSet valid = new BitSet(iterables.length);

@Override
public boolean hasNext()
{
return hasNext(0);
}

private boolean hasNext(int index)
{
if (iterators[index] == null) {
iterators[index] = iterables[index].iterator();
}
for (; hasMore(index); valid.clear(index)) {
if (index == iterables.length - 1 || hasNext(index + 1)) {
return true;
}
}
iterators[index] = null;
return false;
}

private boolean hasMore(int index)
{
return valid.get(index) || iterators[index].hasNext();
}

@Override
public T[] next()
{
for (int index = 0; index < iterables.length; index++) {
if (!valid.get(index)) {
cached[index] = iterators[index].next();
valid.set(index);
}
}
T[] result = Arrays.copyOf(cached, cached.length);
valid.clear(cached.length - 1);
return result;
}

@Override
public void remove()
{
throw new UnsupportedOperationException("remove");
}
};
}
};
}
}
44 changes: 43 additions & 1 deletion common/src/main/java/io/druid/common/utils/SerializerUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import com.google.common.io.ByteStreams;
import com.google.common.io.OutputSupplier;
import com.google.common.primitives.Ints;
import com.metamx.common.Pair;
import com.metamx.common.StringUtils;
import io.druid.cache.Cacheable;
import io.druid.collections.IntList;

import java.io.IOException;
Expand All @@ -37,6 +39,8 @@
public class SerializerUtils
{
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final byte[] EMPTY_BYTES = new byte[0];
private static final byte[][] EMPTY_BYTES_ARRAY = new byte[][] {EMPTY_BYTES};

public <T extends OutputStream> void writeString(T out, String name) throws IOException
{
Expand Down Expand Up @@ -72,14 +76,52 @@ public String readString(ByteBuffer in) throws IOException
final int length = in.getInt();
return StringUtils.fromUtf8(readBytes(in, length));
}

public byte[] readBytes(ByteBuffer in, int length) throws IOException
{
byte[] bytes = new byte[length];
in.get(bytes);
return bytes;
}

public static Pair<Integer, byte[][]> serializeUTFs(String... values)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we set some UTs for this function

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

{
if (values == null) {
return Pair.of(0, EMPTY_BYTES_ARRAY);
}
int totalLength = 0;
byte[][] bytes = new byte[values.length][];

for (int idx = 0; idx < values.length; idx++) {
bytes[idx] = StringUtils.toUtf8(values[idx]);
if (bytes[idx] == null) {
bytes[idx] = EMPTY_BYTES;
} else {
totalLength += bytes[idx].length;
}
}
return Pair.of(totalLength, bytes);
}

public static Pair<Integer, byte[][]> serializeUTFs(Cacheable... values)
{
if (values == null) {
return Pair.of(0, EMPTY_BYTES_ARRAY);
}
int totalLength = 0;
byte[][] bytes = new byte[values.length][];

for (int idx = 0; idx < values.length; idx++) {
bytes[idx] = values[idx].getCacheKey();
if (bytes[idx] == null) {
bytes[idx] = EMPTY_BYTES;
} else {
totalLength += bytes[idx].length;
}
}
return Pair.of(totalLength, bytes);
}

public void writeStrings(OutputStream out, String[] names) throws IOException
{
writeStrings(out, Arrays.asList(names));
Expand Down
58 changes: 58 additions & 0 deletions common/src/test/java/io/druid/collections/IterableUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.collections;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.junit.Assert;
import org.junit.Test;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
*/
public class IterableUtilsTest
{
@Test
public void testBasic() throws Exception
{
Set[] sets = {
ImmutableSet.of("A", "B"),
ImmutableSet.of("1", "2", "3"),
ImmutableSet.of("x", "y")
};

Iterable<List<String>> expected = Sets.cartesianProduct(Arrays.<Set<String>>asList(sets));
Iterable<String[]> result = IterableUtils.cartesian(String.class, sets);
for (String[] x : result) {
System.out.println("> " + Arrays.toString(x));
}

Iterator<List<String>> e = expected.iterator();
Iterator<String[]> r = result.iterator();
while (e.hasNext() && r.hasNext()) {
Assert.assertEquals(String.valueOf(e.next()), Arrays.toString(r.next()));
}
Assert.assertFalse(e.hasNext() || r.hasNext());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package io.druid.common.utils;

import com.metamx.common.Pair;
import io.druid.collections.IntList;
import org.junit.After;
import org.junit.Assert;
Expand Down Expand Up @@ -227,6 +228,16 @@ public void testByteBufferReadStrings() throws IOException
Assert.assertArrayEquals(strings, actuals);
}

@Test
public void testSerializeUTF() throws Exception {
Pair<Integer, byte[][]> result = SerializerUtils.serializeUTFs(strings);
Assert.assertEquals(4, result.lhs.intValue());
Assert.assertEquals(strings.length, result.rhs.length);
Assert.assertArrayEquals(strings[0].getBytes("UTF-8"), result.rhs[0]);
Assert.assertArrayEquals(strings[1].getBytes("UTF-8"), result.rhs[1]);
Assert.assertArrayEquals(strings[2].getBytes("UTF-8"), result.rhs[2]);
}

@After
public void tearDown() throws IOException
{
Expand Down
28 changes: 24 additions & 4 deletions docs/content/querying/filters.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,28 @@ The filter specified at field can be any other filter defined on this page.

### JavaScript filter

The JavaScript filter matches a dimension against the specified JavaScript function predicate. The filter matches values for which the function returns true.
The JavaScript filter matches dimensions against the specified JavaScript function predicate. The filter matches values for which the function returns true.

The function takes a single argument, the dimension value, and returns either true or false.
The function takes same number of arguments as the dimension values, and returns either true or false.

```json
{
"type" : "javascript",
"dimension" : <dimension_string>,
"function" : "function(value) { <...> }"
"dimension" : <a dimension_string>,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this goes into 0.9.0 (which I think it should), we can make the backwards incompatible API change

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to maintain backwards compatibility to allow for rolling upgrades.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xvrl is correct. I didn't think about rolling upgrades. But we can remove "dimension" from the docs and let new people use "dimensions" everywhere

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@navis let's remove "dimension" from the docs but keep it in the code

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for removing dimension from docs. also, let us explicitly call it deprecated in 0.9.0 release notes so that we can remove it in later release from the code.

"dimensions" : <array of dimension_strings>,
"function" : "function(value1, value2, ...) { <...> }"
"byRow" : "false"
}
```

|property|description|required?|
|--------|-----------|---------|
|type|This String should always be "javascript".|yes|
|dimension|The dimension to perform the search over.|either of dimension or dimensions|
|dimensions|The dimensions to perform the search over.|either of dimension or dimensions|
|function|A JSON function to be applied.|yes|
|byRow|If this is true, multi-valued column is handed over as single object array to function. In this case, bitmap index cannot be applied. Default: false.|no|

**Example**
The following matches any dimension values for the dimension `name` between `'bar'` and `'foo'`

Expand All @@ -86,6 +96,16 @@ The following matches any dimension values for the dimension `name` between `'ba
}
```

The following matches rows where the values for the given two dimensions dim1 and dim2 are the same.
```json
{
"type" : "javascript",
"dimensions" : ["dim1","dim2"],
"function" : "function(x, y) { return x === y }",
"byRow" : "true"
}
```

The JavaScript filter supports the use of extraction functions, see [Filtering with Extraction Functions](#filtering-with-extraction-functions) for details.


Expand Down
Loading