-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Use binary search to improve DimensionRangeShardSpec lookup #12417
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.timeline.partition; | ||
|
|
||
| import com.google.common.collect.Ordering; | ||
| import org.apache.druid.data.input.InputRow; | ||
| import org.apache.druid.data.input.StringTuple; | ||
| import org.apache.druid.java.util.common.ISE; | ||
| import org.apache.druid.java.util.common.guava.Comparators; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.util.Arrays; | ||
| import java.util.Comparator; | ||
| import java.util.List; | ||
|
|
||
| public abstract class BaseDimensionRangeShardSpec implements ShardSpec | ||
| { | ||
| protected final List<String> dimensions; | ||
| @Nullable | ||
| protected final StringTuple start; | ||
| @Nullable | ||
| protected final StringTuple end; | ||
|
|
||
| protected BaseDimensionRangeShardSpec( | ||
| List<String> dimensions, | ||
| @Nullable StringTuple start, | ||
| @Nullable StringTuple end | ||
| ) | ||
| { | ||
| this.dimensions = dimensions; | ||
| this.start = start; | ||
| this.end = end; | ||
| } | ||
|
|
||
| @Override | ||
| public ShardSpecLookup getLookup(final List<? extends ShardSpec> shardSpecs) | ||
| { | ||
| return createLookup(dimensions, shardSpecs); | ||
| } | ||
|
|
||
| private static ShardSpecLookup createLookup(List<String> dimensions, List<? extends ShardSpec> shardSpecs) | ||
| { | ||
| BaseDimensionRangeShardSpec[] rangeShardSpecs = new BaseDimensionRangeShardSpec[shardSpecs.size()]; | ||
| for (int i = 0; i < shardSpecs.size(); i++) { | ||
| rangeShardSpecs[i] = (BaseDimensionRangeShardSpec) shardSpecs.get(i); | ||
| } | ||
| final Comparator<StringTuple> startComparator = Comparators.naturalNullsFirst(); | ||
| final Comparator<StringTuple> endComparator = Ordering.natural().nullsLast(); | ||
|
|
||
| final Comparator<BaseDimensionRangeShardSpec> shardSpecComparator = Comparator | ||
| .comparing((BaseDimensionRangeShardSpec spec) -> spec.start, startComparator) | ||
| .thenComparing(spec -> spec.end, endComparator); | ||
|
|
||
| Arrays.sort(rangeShardSpecs, shardSpecComparator); | ||
|
|
||
| return (long timestamp, InputRow row) -> { | ||
| StringTuple inputRowTuple = getInputRowTuple(dimensions, row); | ||
| int startIndex = 0; | ||
| int endIndex = shardSpecs.size() - 1; | ||
| while (startIndex <= endIndex) { | ||
| int mid = (startIndex + endIndex) >>> 1; | ||
| BaseDimensionRangeShardSpec rangeShardSpec = rangeShardSpecs[mid]; | ||
| if (startComparator.compare(inputRowTuple, rangeShardSpec.start) < 0) { | ||
| endIndex = mid - 1; | ||
| } else if (endComparator.compare(inputRowTuple, rangeShardSpec.end) < 0) { | ||
| return rangeShardSpec; | ||
| } else { | ||
| startIndex = mid + 1; | ||
| } | ||
| } | ||
|
Comment on lines
+73
to
+87
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could probably simplify this using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The array component is BaseDimensionRangeShardSpec while the key is StringTuple, so I can not directly call Arrays.binarySearch
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could use a dummy key. It would still be cleaner than writing the binary search logic yourself. final StringTuple searchTuple = getInputRowTuple(dimensions, row);
final BaseDimensionRangeShardSpec searchKey = new DimensionRangeShardSpec(dimensions, searchTuple, searchTuple, 0, 1);
final int searchResult = Arrays.binarySearch(rangeShardSpecs, searchKey, shardSpecComparator);
if (searchResult < 0) {
throw new ISE("row[%s] doesn't fit in any shard[%s]", row, shardSpecs);
} else {
return rangeShardSpecs[searchResult];
}Please let me know if this seems cleaner.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Arrays.binarySearch requires the searchKey to equals one array element, while actually we do need find one that equals or contains the searchKey. For example: input: then the expect result is [null, "c"], but the Arrays.binarySearch will return -2, not 0. |
||
| throw new ISE("row[%s] doesn't fit in any shard[%s]", row, shardSpecs); | ||
| }; | ||
| } | ||
|
|
||
| protected static StringTuple getInputRowTuple(List<String> dimensions, InputRow inputRow) | ||
| { | ||
| final String[] inputDimensionValues = new String[dimensions.size()]; | ||
| for (int i = 0; i < dimensions.size(); ++i) { | ||
| // Get the values of this dimension, treat multiple values as null | ||
| List<String> values = inputRow.getDimension(dimensions.get(i)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: please add the comment originally present in this method.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added back |
||
| inputDimensionValues[i] = values != null && values.size() == 1 ? values.get(0) : null; | ||
| } | ||
|
|
||
| return StringTuple.create(inputDimensionValues); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: You could use
shardSpecs.toArray()for cleaner code.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Directly call toArray cause compile error as the component is not BaseDimensionRangeShardSpec
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, that makes sense. I had missed the cast.