-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Attempt to coerce COMPLEX to number in numeric aggregators. #16564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,8 +20,14 @@ | |
| package org.apache.druid.spectator.histogram; | ||
|
|
||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import com.google.common.collect.ImmutableList; | ||
| import com.google.common.collect.ImmutableMap; | ||
| import com.netflix.spectator.api.histogram.PercentileBuckets; | ||
| import org.apache.druid.data.input.InputRow; | ||
| import org.apache.druid.data.input.MapBasedInputRow; | ||
| import org.apache.druid.data.input.impl.NoopInputRowParser; | ||
| import org.apache.druid.jackson.DefaultObjectMapper; | ||
| import org.apache.druid.java.util.common.DateTimes; | ||
| import org.apache.druid.java.util.common.granularity.Granularities; | ||
| import org.apache.druid.java.util.common.guava.Sequence; | ||
| import org.apache.druid.query.Druids; | ||
|
|
@@ -32,6 +38,9 @@ | |
| import org.apache.druid.query.aggregation.AggregationTestHelper; | ||
| import org.apache.druid.query.aggregation.AggregatorFactory; | ||
| import org.apache.druid.query.aggregation.AggregatorUtil; | ||
| import org.apache.druid.query.aggregation.CountAggregatorFactory; | ||
| import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; | ||
| import org.apache.druid.query.groupby.GroupByQuery; | ||
| import org.apache.druid.query.groupby.GroupByQueryConfig; | ||
| import org.apache.druid.query.groupby.GroupByQueryRunnerTest; | ||
| import org.apache.druid.query.groupby.ResultRow; | ||
|
|
@@ -42,13 +51,17 @@ | |
| import org.apache.druid.query.metadata.metadata.SegmentAnalysis; | ||
| import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; | ||
| import org.apache.druid.query.timeseries.TimeseriesResultValue; | ||
| import org.apache.druid.segment.IncrementalIndexSegment; | ||
| import org.apache.druid.segment.IndexIO; | ||
| import org.apache.druid.segment.QueryableIndex; | ||
| import org.apache.druid.segment.QueryableIndexSegment; | ||
| import org.apache.druid.segment.Segment; | ||
| import org.apache.druid.segment.TestHelper; | ||
| import org.apache.druid.segment.column.ColumnConfig; | ||
| import org.apache.druid.segment.incremental.IncrementalIndex; | ||
| import org.apache.druid.testing.InitializedNullHandlingTest; | ||
| import org.apache.druid.timeline.SegmentId; | ||
| import org.joda.time.DateTime; | ||
| import org.junit.Assert; | ||
| import org.junit.Rule; | ||
| import org.junit.Test; | ||
|
|
@@ -59,6 +72,7 @@ | |
| import java.io.File; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
@@ -716,6 +730,59 @@ public void testPercentilePostAggregator() throws Exception | |
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testBuildingAndCountingHistogramsIncrementalIndex() throws Exception | ||
| { | ||
| List<String> dimensions = Collections.singletonList("d"); | ||
| int n = 10; | ||
| DateTime startOfDay = DateTimes.of("2000-01-01"); | ||
| List<InputRow> inputRows = new ArrayList<>(n); | ||
| for (int i = 1; i <= n; i++) { | ||
| String val = String.valueOf(i * 1.0d); | ||
|
|
||
| inputRows.add(new MapBasedInputRow( | ||
| startOfDay.plusMinutes(i), | ||
| dimensions, | ||
| ImmutableMap.of("x", i, "d", val) | ||
| )); | ||
| } | ||
|
|
||
| IncrementalIndex index = AggregationTestHelper.createIncrementalIndex( | ||
| inputRows.iterator(), | ||
| new NoopInputRowParser(null), | ||
| new AggregatorFactory[]{ | ||
| new CountAggregatorFactory("count"), | ||
| new SpectatorHistogramAggregatorFactory("histogram", "x") | ||
| }, | ||
| 0, | ||
| Granularities.NONE, | ||
| 100, | ||
| false | ||
| ); | ||
|
|
||
| ImmutableList<Segment> segments = ImmutableList.of( | ||
| new IncrementalIndexSegment(index, SegmentId.dummy("test")), | ||
| helper.persistIncrementalIndex(index, null) | ||
| ); | ||
|
|
||
| GroupByQuery query = new GroupByQuery.Builder() | ||
| .setDataSource("test") | ||
| .setGranularity(Granularities.HOUR) | ||
| .setInterval("1970/2050") | ||
| .setAggregatorSpecs( | ||
| new DoubleSumAggregatorFactory("doubleSum", "histogram") | ||
| ).build(); | ||
|
|
||
| Sequence<ResultRow> seq = helper.runQueryOnSegmentsObjs(segments, query); | ||
|
|
||
| List<ResultRow> results = seq.toList(); | ||
| Assert.assertEquals(1, results.size()); | ||
| // Check timestamp | ||
| Assert.assertEquals(startOfDay.getMillis(), results.get(0).get(0)); | ||
| // Check doubleSum | ||
| Assert.assertEquals(n * segments.size(), (Double) results.get(0).get(1), 0.001); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be just
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, the Since
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, that's my bad then. Do we need both: for this test to be meaningful?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's fine to have both the in-memory and the persisted indexes in the test. This way we can test when the query hit both type of indexes. |
||
| } | ||
|
|
||
| private static void assertResultsMatch(List<ResultRow> results, int rowNum, String expectedProduct) | ||
| { | ||
| ResultRow row = results.get(rowNum); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.druid.spectator.histogram; | ||
|
|
||
| import org.easymock.EasyMock; | ||
| import org.junit.Assert; | ||
| import org.junit.Test; | ||
|
|
||
| public class SpectatorHistogramIndexBasedComplexColumnTest | ||
| { | ||
| @Test | ||
| public void testComplexColumn() | ||
| { | ||
| final SpectatorHistogramIndexed mockIndexed = EasyMock.createMock(SpectatorHistogramIndexed.class); | ||
| EasyMock.replay(mockIndexed); | ||
|
|
||
| final String typeName = "type"; | ||
| final SpectatorHistogramIndexBasedComplexColumn column = | ||
| new SpectatorHistogramIndexBasedComplexColumn("type", mockIndexed); | ||
| Assert.assertEquals(typeName, column.getTypeName()); | ||
| Assert.assertEquals(-1, column.getLength()); | ||
|
|
||
| EasyMock.verify(mockIndexed); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we want to claim a length of -1 here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The method is specced as returning the serialized size of the column in bytes, or -1 if unknown.
index.size()returns a row count, which doesn't match the specced behavior. TheSpectatorHistogramIndexeddoesn't seem to know its own serialized size, and thegetLength()method doesn't seem to be used anywhere important, so I figured changing this to -1 was a good idea.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's not important then -1 seems fine.
We can very likely compute (or estimate an upper bound) of the size of the column if it will optimize something elsewhere.