-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Add DimensionSelector id -> X caches. #5106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,28 +23,42 @@ | |
| import io.druid.hll.HyperLogLogCollector; | ||
| import io.druid.query.aggregation.cardinality.CardinalityAggregator; | ||
| import io.druid.segment.DimensionSelector; | ||
| import io.druid.segment.DimensionSelectorUtils; | ||
| import io.druid.segment.data.IndexedInts; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.function.IntFunction; | ||
|
|
||
| public class StringCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy<DimensionSelector> | ||
| public class StringCardinalityAggregatorColumnSelectorStrategy implements CardinalityAggregatorColumnSelectorStrategy | ||
| { | ||
| public static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; | ||
| public static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; | ||
| private static final String CARDINALITY_AGG_NULL_STRING = "\u0000"; | ||
| private static final char CARDINALITY_AGG_SEPARATOR = '\u0001'; | ||
|
|
||
| // Number of entries to cache. Each one is a 128 bit hash, so with overhead, 12500 entries occupies about 250KB | ||
| private static final int CACHE_SIZE = 12500; | ||
|
|
||
| private final DimensionSelector selector; | ||
| private final IntFunction<byte[]> hashFunction; | ||
|
|
||
| public StringCardinalityAggregatorColumnSelectorStrategy(final DimensionSelector selector, final int numRows) | ||
| { | ||
| this.selector = selector; | ||
| this.hashFunction = DimensionSelectorUtils.cacheIfPossible(selector, this::hashOneValue, numRows, CACHE_SIZE); | ||
| } | ||
|
|
||
| @Override | ||
| public void hashRow(DimensionSelector dimSelector, Hasher hasher) | ||
| public void hashRow(final Hasher hasher) | ||
| { | ||
| final IndexedInts row = dimSelector.getRow(); | ||
| final IndexedInts row = selector.getRow(); | ||
| final int size = row.size(); | ||
| // nothing to add to hasher if size == 0, only handle size == 1 and size != 0 cases. | ||
| if (size == 1) { | ||
| final String value = dimSelector.lookupName(row.get(0)); | ||
| final String value = selector.lookupName(row.get(0)); | ||
| hasher.putUnencodedChars(nullToSpecial(value)); | ||
| } else if (size != 0) { | ||
| final String[] values = new String[size]; | ||
| for (int i = 0; i < size; ++i) { | ||
| final String value = dimSelector.lookupName(row.get(i)); | ||
| final String value = selector.lookupName(row.get(i)); | ||
| values[i] = nullToSpecial(value); | ||
| } | ||
| // Values need to be sorted to ensure consistent multi-value ordering across different segments | ||
|
|
@@ -59,17 +73,21 @@ public void hashRow(DimensionSelector dimSelector, Hasher hasher) | |
| } | ||
|
|
||
| @Override | ||
| public void hashValues(DimensionSelector dimSelector, HyperLogLogCollector collector) | ||
| public void hashValues(final HyperLogLogCollector collector) | ||
| { | ||
| IndexedInts row = dimSelector.getRow(); | ||
| IndexedInts row = selector.getRow(); | ||
| for (int i = 0; i < row.size(); i++) { | ||
| int index = row.get(i); | ||
| final String value = dimSelector.lookupName(index); | ||
| collector.add(CardinalityAggregator.hashFn.hashUnencodedChars(nullToSpecial(value)).asBytes()); | ||
| collector.add(hashFunction.apply(row.get(i))); | ||
| } | ||
| } | ||
|
|
||
| private String nullToSpecial(String value) | ||
| private byte[] hashOneValue(final int id) | ||
| { | ||
| final String value = selector.lookupName(id); | ||
| return CardinalityAggregator.hashFn.hashUnencodedChars(nullToSpecial(value)).asBytes(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better the field called |
||
| } | ||
|
|
||
| private static String nullToSpecial(String value) | ||
| { | ||
| return value == null ? CARDINALITY_AGG_NULL_STRING : value; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| /* | ||
| * Licensed to Metamarkets Group Inc. (Metamarkets) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. Metamarkets licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package io.druid.query.dimension; | ||
|
|
||
| import java.util.function.IntFunction; | ||
|
|
||
| /** | ||
| * Array cache for an IntFunction, intended for use with DimensionSelectors. | ||
| * | ||
| * @see io.druid.segment.DimensionSelectorUtils#cacheIfPossible | ||
| */ | ||
| public class ArrayCacheIntFunction<T> implements IntFunction<T> | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a note that this class is unsafe for concurrent use.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe just call it "ArrayDimensionCache" (and "LruDimensionCache") with a method like "getOrCompute()", and then return method reference in |
||
| { | ||
| private final IntFunction<T> function; | ||
| private final Object[] cache; | ||
|
|
||
| public ArrayCacheIntFunction(final IntFunction<T> function, final int cacheSize) | ||
| { | ||
| this.function = function; | ||
| this.cache = new Object[cacheSize]; | ||
| } | ||
|
|
||
| @Override | ||
| public T apply(final int id) | ||
| { | ||
| // Will not cache the result if "function" returns null. I'm hoping that this is the right choice, and enabling | ||
| // null caching isn't worth the overhead of using some additional data structures to differentiate between a null | ||
| // result and an uncached result. | ||
|
|
||
| if (cache[id] == null) { | ||
| final T value = function.apply(id); | ||
| cache[id] = value; | ||
| return value; | ||
| } else { | ||
| //noinspection unchecked | ||
| return (T) cache[id]; | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Field could be
final