-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Deserialize complex dimensions in group by queries to their respective types when reading from spilled files and cached results #16620
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
57e8512
12f1d08
7fd9fa8
f1fc861
5590ad6
94b38a8
9e36e42
f647c3f
1c8561c
3dc54d3
33d55ad
c57c5ff
1f3ff61
c2724b9
e9eec6a
6f868d1
abd8274
c0594cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| package org.apache.druid.query.datasourcemetadata; | ||
|
|
||
| import com.fasterxml.jackson.core.type.TypeReference; | ||
| import com.fasterxml.jackson.databind.ObjectMapper; | ||
| import com.google.common.base.Function; | ||
| import com.google.common.base.Functions; | ||
| import com.google.inject.Inject; | ||
|
|
@@ -38,6 +39,7 @@ | |
| import org.apache.druid.query.context.ResponseContext; | ||
| import org.apache.druid.timeline.LogicalSegment; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.util.List; | ||
| import java.util.stream.Collectors; | ||
|
|
||
|
|
@@ -119,4 +121,10 @@ public CacheStrategy getCacheStrategy(DataSourceMetadataQuery query) | |
| { | ||
| return null; | ||
| } | ||
|
|
||
| @Override | ||
| public CacheStrategy getCacheStrategy(DataSourceMetadataQuery query, @Nullable ObjectMapper mapper) | ||
| { | ||
| return null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: if this is returning null is the override really needed?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added for clarity, will remove it.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah maybe is safer to leave since default is to not cache I guess? I don't feel super strongly either way |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,8 +78,10 @@ | |
| import org.apache.druid.segment.column.NullableTypeStrategy; | ||
| import org.apache.druid.segment.column.RowSignature; | ||
| import org.apache.druid.segment.column.ValueType; | ||
| import org.apache.druid.segment.nested.StructuredData; | ||
| import org.joda.time.DateTime; | ||
|
|
||
| import javax.annotation.Nullable; | ||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
|
|
@@ -472,7 +474,7 @@ public void serialize( | |
| // Deserializer that can deserialize either array- or map-based rows. | ||
| final JsonDeserializer<ResultRow> deserializer = new JsonDeserializer<ResultRow>() | ||
| { | ||
| final Class<?>[] dimensionClasses = createDimensionClasses(); | ||
| final Class<?>[] dimensionClasses = createDimensionClasses(query); | ||
| boolean containsComplexDimensions = query.getDimensions() | ||
| .stream() | ||
| .anyMatch( | ||
|
|
@@ -525,30 +527,6 @@ public ResultRow deserialize(final JsonParser jp, final DeserializationContext c | |
| return ResultRow.of(objectArray); | ||
| } | ||
| } | ||
|
|
||
| private Class<?>[] createDimensionClasses() | ||
| { | ||
| final List<DimensionSpec> queryDimensions = query.getDimensions(); | ||
| final Class<?>[] classes = new Class[queryDimensions.size()]; | ||
| for (int i = 0; i < queryDimensions.size(); ++i) { | ||
| final ColumnType dimensionOutputType = queryDimensions.get(i).getOutputType(); | ||
| if (dimensionOutputType.is(ValueType.COMPLEX)) { | ||
| NullableTypeStrategy nullableTypeStrategy = dimensionOutputType.getNullableStrategy(); | ||
| if (!nullableTypeStrategy.groupable()) { | ||
| throw DruidException.defensive( | ||
| "Ungroupable dimension [%s] with type [%s] found in the query.", | ||
| queryDimensions.get(i).getDimension(), | ||
| dimensionOutputType | ||
| ); | ||
| } | ||
| classes[i] = nullableTypeStrategy.getClazz(); | ||
| } else { | ||
| classes[i] = Object.class; | ||
| } | ||
| } | ||
| return classes; | ||
| } | ||
|
|
||
| }; | ||
|
|
||
| class GroupByResultRowModule extends SimpleModule | ||
|
|
@@ -598,9 +576,32 @@ public Sequence<ResultRow> run(QueryPlus<ResultRow> queryPlus, ResponseContext r | |
| ); | ||
| } | ||
|
|
||
| @Nullable | ||
| @Override | ||
| public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy(final GroupByQuery query) | ||
| public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy(GroupByQuery query) | ||
| { | ||
| return getCacheStrategy(query, null); | ||
| } | ||
|
|
||
| @Override | ||
| public CacheStrategy<ResultRow, Object, GroupByQuery> getCacheStrategy( | ||
| final GroupByQuery query, | ||
| @Nullable final ObjectMapper mapper | ||
| ) | ||
| { | ||
|
|
||
| for (DimensionSpec dimension : query.getDimensions()) { | ||
| if (dimension.getOutputType().is(ValueType.COMPLEX) && !dimension.getOutputType().equals(ColumnType.NESTED_DATA)) { | ||
| if (mapper == null) { | ||
| throw DruidException.defensive( | ||
| "Cannot deserialize complex dimension of type[%s] from result cache if object mapper is not provided", | ||
| dimension.getOutputType().getComplexTypeName() | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| final Class<?>[] dimensionClasses = createDimensionClasses(query); | ||
|
|
||
| return new CacheStrategy<ResultRow, Object, GroupByQuery>() | ||
| { | ||
| private static final byte CACHE_STRATEGY_VERSION = 0x1; | ||
|
|
@@ -727,13 +728,29 @@ public ResultRow apply(Object input) | |
| int dimPos = 0; | ||
| while (dimsIter.hasNext() && results.hasNext()) { | ||
| final DimensionSpec dimensionSpec = dimsIter.next(); | ||
|
|
||
| // Must convert generic Jackson-deserialized type into the proper type. | ||
| resultRow.set( | ||
| dimensionStart + dimPos, | ||
| DimensionHandlerUtils.convertObjectToType(results.next(), dimensionSpec.getOutputType()) | ||
| ); | ||
|
|
||
| final Object dimensionObject = results.next(); | ||
| final Object dimensionObjectCasted; | ||
|
|
||
| final ColumnType outputType = dimensionSpec.getOutputType(); | ||
|
|
||
| // Must convert generic Jackson-deserialized type into the proper type. The downstream functions expect the | ||
| // dimensions to be of appropriate types for further processing like merging and comparing. | ||
| if (outputType.is(ValueType.COMPLEX)) { | ||
| // Json columns can interpret generic data objects appropriately, hence they are wrapped as is in StructuredData. | ||
| // They don't need to converted them from Object.class to StructuredData.class using object mapper as that is an | ||
| // expensive operation that will be wasteful. | ||
| if (outputType.equals(ColumnType.NESTED_DATA)) { | ||
| dimensionObjectCasted = StructuredData.wrap(dimensionObject); | ||
| } else { | ||
| dimensionObjectCasted = mapper.convertValue(dimensionObject, dimensionClasses[dimPos]); | ||
| } | ||
| } else { | ||
| dimensionObjectCasted = DimensionHandlerUtils.convertObjectToType( | ||
| dimensionObject, | ||
| dimensionSpec.getOutputType() | ||
| ); | ||
| } | ||
| resultRow.set(dimensionStart + dimPos, dimensionObjectCasted); | ||
| dimPos++; | ||
| } | ||
|
|
||
|
|
@@ -861,4 +878,27 @@ private static BitSet extractionsToRewrite(GroupByQuery query) | |
|
|
||
| return retVal; | ||
| } | ||
|
|
||
| private static Class<?>[] createDimensionClasses(final GroupByQuery query) | ||
| { | ||
| final List<DimensionSpec> queryDimensions = query.getDimensions(); | ||
| final Class<?>[] classes = new Class[queryDimensions.size()]; | ||
| for (int i = 0; i < queryDimensions.size(); ++i) { | ||
| final ColumnType dimensionOutputType = queryDimensions.get(i).getOutputType(); | ||
| if (dimensionOutputType.is(ValueType.COMPLEX)) { | ||
| NullableTypeStrategy nullableTypeStrategy = dimensionOutputType.getNullableStrategy(); | ||
| if (!nullableTypeStrategy.groupable()) { | ||
| throw DruidException.defensive( | ||
| "Ungroupable dimension [%s] with type [%s] found in the query.", | ||
| queryDimensions.get(i).getDimension(), | ||
| dimensionOutputType | ||
| ); | ||
| } | ||
| classes[i] = nullableTypeStrategy.getClazz(); | ||
| } else { | ||
| classes[i] = Object.class; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any reason not to use the real class here? |
||
| } | ||
| } | ||
| return classes; | ||
| } | ||
| } | ||
Check notice
Code scanning / CodeQL
Deprecated method or constructor invocation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not relevant, the deprecated method is used to maintain backward compatibility.