From bff926e4f41172e7622427aa54d7e1538f6e115f Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 8 Nov 2021 16:54:08 -0800 Subject: [PATCH 01/10] revert ColumnAnalysis type, add typeSignature and use it for DruidSchema --- docs/querying/segmentmetadataquery.md | 8 +- .../druid/query/metadata/SegmentAnalyzer.java | 17 +- .../metadata/metadata/ColumnAnalysis.java | 33 ++- .../apache/druid/query/DoubleStorageTest.java | 19 +- ...egmentMetadataQueryQueryToolChestTest.java | 4 +- .../metadata/SegmentMetadataQueryTest.java | 27 ++- .../SegmentMetadataUnionQueryTest.java | 2 + .../metadata/metadata/ColumnAnalysisTest.java | 204 ++++++++++++++++-- .../druid/sql/calcite/schema/DruidSchema.java | 10 +- 9 files changed, 277 insertions(+), 47 deletions(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 779d51cd95ca..db3e873ff0d9 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -87,9 +87,11 @@ The format of the result is: } ] ``` -Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`. -Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric. -Timestamp column will have type `LONG`. +All columns will contain a `typeSignature` which is the Druid internal representation of the type information for this column, is what is show in [`INFORMATION_SCHEMA.COLUMNS`](../querying/sql.md#columns-table) table in SQL, and is typically the value used to supply Druid with JSON type information at query or ingest time. This value will be `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX` (e.g. `COMPLEX`). + +Additionally, columns will have a legacy friendly `type` name. This might match `typeSignature` for some column types (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`) but for COMPLEX columns will only contain the name of the underlying complex type such as `hyperUnique`. + +The timestamp column will always have `typeSignature` and `type` as `LONG`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 6e96cd971c33..2a9d705aec66 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -192,7 +192,8 @@ private ColumnAnalysis analyzeNumericColumn( } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -248,7 +249,8 @@ private ColumnAnalysis analyzeStringColumn( } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -326,7 +328,8 @@ public Long accumulate(Long accumulated, Cursor cursor) } return new ColumnAnalysis( - capabilities.asTypeString(), + capabilities.toColumnType(), + capabilities.getType().name(), capabilities.hasMultipleValues().isTrue(), capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls size, @@ -343,8 +346,6 @@ private ColumnAnalysis analyzeComplexColumn( final String typeName ) { - // serialize using asTypeString (which is also used for JSON so can easily round-trip complex type info back into ColumnType) - final String serdeTypeName = ColumnType.ofComplex(typeName).asTypeString(); try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) { final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue(); final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue(); @@ -359,7 +360,8 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis( - serdeTypeName, + capabilities.toColumnType(), + typeName, hasMultipleValues, hasNulls, 0, @@ -377,7 +379,8 @@ private ColumnAnalysis analyzeComplexColumn( } return new ColumnAnalysis( - serdeTypeName, + capabilities.toColumnType(), + typeName, hasMultipleValues, hasNulls, size, diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index 4edb4b7517dc..87de1d56fb88 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -22,6 +22,8 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.column.ColumnType; import java.util.Objects; @@ -33,10 +35,11 @@ public class ColumnAnalysis public static ColumnAnalysis error(String reason) { - return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); + return new ColumnAnalysis(ColumnType.STRING, "STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason); } private final String type; + private final ColumnType typeSignature; private final boolean hasMultipleValues; private final boolean hasNulls; private final long size; @@ -47,6 +50,7 @@ public static ColumnAnalysis error(String reason) @JsonCreator public ColumnAnalysis( + @JsonProperty("typeSignature") ColumnType typeSignature, @JsonProperty("type") String type, @JsonProperty("hasMultipleValues") boolean hasMultipleValues, @JsonProperty("hasNulls") boolean hasNulls, @@ -57,6 +61,7 @@ public ColumnAnalysis( @JsonProperty("errorMessage") String errorMessage ) { + this.typeSignature = typeSignature; this.type = type; this.hasMultipleValues = hasMultipleValues; this.hasNulls = hasNulls; @@ -73,6 +78,12 @@ public String getType() return type; } + @JsonProperty + public ColumnType getTypeSignature() + { + return typeSignature; + } + @JsonProperty public boolean isHasMultipleValues() { @@ -136,7 +147,19 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) } if (!type.equals(rhs.getType())) { - return ColumnAnalysis.error("cannot_merge_diff_types"); + return ColumnAnalysis.error( + StringUtils.format("cannot_merge_diff_types: [%s] and [%s]", type, rhs.getType()) + ); + } + + if (!typeSignature.equals(rhs.getTypeSignature())) { + return ColumnAnalysis.error( + StringUtils.format( + "cannot_merge_diff_types: [%s] and [%s]", + typeSignature.asTypeString(), + rhs.getTypeSignature().asTypeString() + ) + ); } Integer cardinality = getCardinality(); @@ -153,6 +176,7 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) Comparable newMax = choose(maxValue, rhs.maxValue, true); return new ColumnAnalysis( + typeSignature, type, multipleValues, hasNulls || rhs.hasNulls, @@ -181,6 +205,7 @@ public String toString() { return "ColumnAnalysis{" + "type='" + type + '\'' + + ", columnType=" + typeSignature + ", hasMultipleValues=" + hasMultipleValues + ", hasNulls=" + hasNulls + ", size=" + size + @@ -205,6 +230,7 @@ public boolean equals(Object o) hasNulls == that.hasNulls && size == that.size && Objects.equals(type, that.type) && + Objects.equals(typeSignature, that.typeSignature) && Objects.equals(cardinality, that.cardinality) && Objects.equals(minValue, that.minValue) && Objects.equals(maxValue, that.maxValue) && @@ -214,6 +240,7 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash(type, + typeSignature, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); } } diff --git a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java index ee58422828e1..d7f2aaa8495d 100644 --- a/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java +++ b/processing/src/test/java/org/apache/druid/query/DoubleStorageTest.java @@ -51,6 +51,7 @@ import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; @@ -155,7 +156,8 @@ public static Collection dataFeeder() ImmutableMap.of( TIME_COLUMN, new ColumnAnalysis( - ValueType.LONG.toString(), + ColumnType.LONG, + ValueType.LONG.name(), false, false, 100, @@ -166,7 +168,8 @@ public static Collection dataFeeder() ), DIM_NAME, new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), false, false, 120, @@ -177,7 +180,8 @@ public static Collection dataFeeder() ), DIM_FLOAT_NAME, new ColumnAnalysis( - ValueType.DOUBLE.toString(), + ColumnType.DOUBLE, + ValueType.DOUBLE.name(), false, false, 80, @@ -200,7 +204,8 @@ public static Collection dataFeeder() ImmutableMap.of( TIME_COLUMN, new ColumnAnalysis( - ValueType.LONG.toString(), + ColumnType.LONG, + ValueType.LONG.name(), false, false, 100, @@ -211,7 +216,8 @@ public static Collection dataFeeder() ), DIM_NAME, new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), false, false, 120, @@ -222,7 +228,8 @@ public static Collection dataFeeder() ), DIM_FLOAT_NAME, new ColumnAnalysis( - ValueType.FLOAT.toString(), + ColumnType.FLOAT, + ValueType.FLOAT.name(), false, false, 80, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 752db63d9832..934b433e52f5 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -37,6 +37,7 @@ import org.apache.druid.query.metadata.metadata.SegmentAnalysis; import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.query.spec.LegacySegmentSpec; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.timeline.LogicalSegment; import org.joda.time.Interval; @@ -79,7 +80,8 @@ public void testCacheStrategy() throws Exception ImmutableMap.of( "placement", new ColumnAnalysis( - ValueType.STRING.toString(), + ColumnType.STRING, + ValueType.STRING.name(), true, false, 10881, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index 3e6acc1b62fe..499a061b348c 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -52,6 +52,7 @@ import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.timeline.LogicalSegment; @@ -207,6 +208,7 @@ public SegmentMetadataQueryTest( ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -218,6 +220,7 @@ public SegmentMetadataQueryTest( ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -229,6 +232,7 @@ public SegmentMetadataQueryTest( ), "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -251,6 +255,7 @@ public SegmentMetadataQueryTest( ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -262,6 +267,7 @@ public SegmentMetadataQueryTest( ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -273,6 +279,7 @@ public SegmentMetadataQueryTest( ), "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -310,6 +317,7 @@ public void testSegmentMetadataQueryWithRollupMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -321,6 +329,7 @@ public void testSegmentMetadataQueryWithRollupMerge() ), "placementish", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), true, false, @@ -380,6 +389,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -391,6 +401,7 @@ public void testSegmentMetadataQueryWithHasMultipleValuesMerge() ), "placementish", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), true, false, @@ -450,6 +461,7 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -461,7 +473,8 @@ public void testSegmentMetadataQueryWithComplexColumnMerge() ), "quality_uniques", new ColumnAnalysis( - "COMPLEX", + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", false, true, 0, @@ -521,6 +534,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge() size2 = mmap2 ? 10881 : 10764; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -543,6 +557,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() size2 = mmap2 ? 6882 : 6808; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -565,6 +580,7 @@ public void testSegmentMetadataQueryWithDefaultAnalysisMerge3() size2 = mmap2 ? 9765 : 9660; } ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -588,6 +604,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( ImmutableMap.of( "__time", new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, false, @@ -599,6 +616,7 @@ private void testSegmentMetadataQueryWithDefaultAnalysisMerge( ), "index", new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, false, @@ -654,6 +672,7 @@ public void testSegmentMetadataQueryWithNoAnalysisTypesMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -717,6 +736,7 @@ public void testSegmentMetadataQueryWithAggregatorsMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -776,6 +796,7 @@ public void testSegmentMetadataQueryWithTimestampSpecMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -835,6 +856,7 @@ public void testSegmentMetadataQueryWithQueryGranularityMerge() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, @@ -1299,6 +1321,7 @@ public void testAnanlysisTypesBeingSet() public void testLongNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.LONG, ValueType.LONG.toString(), false, NullHandling.replaceWithDefault() ? false : true, @@ -1315,6 +1338,7 @@ public void testLongNullableColumn() public void testDoubleNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.DOUBLE, ValueType.DOUBLE.toString(), false, NullHandling.replaceWithDefault() ? false : true, @@ -1332,6 +1356,7 @@ public void testDoubleNullableColumn() public void testFloatNullableColumn() { ColumnAnalysis analysis = new ColumnAnalysis( + ColumnType.FLOAT, ValueType.FLOAT.toString(), false, NullHandling.replaceWithDefault() ? false : true, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java index 2680d4b30e25..c8862c57c67b 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataUnionQueryTest.java @@ -36,6 +36,7 @@ import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestIndex; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.Test; @@ -101,6 +102,7 @@ public void testSegmentMetadataUnionQuery() ImmutableMap.of( "placement", new ColumnAnalysis( + ColumnType.STRING, ValueType.STRING.toString(), false, false, diff --git a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java index 798d67a06fb1..9b1f75b5d763 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/metadata/ColumnAnalysisTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnType; import org.junit.Assert; import org.junit.Test; @@ -36,13 +37,43 @@ private void assertSerDe(ColumnAnalysis analysis) throws Exception @Test public void testFoldStringColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, "aaA", "Zzz", null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", true, false, 3L, 4, "aAA", "ZZz", null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 1L, + 2, + "aaA", + "Zzz", + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + true, + false, + 3L, + 4, + "aAA", + "ZZz", + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", true, false, 4L, 4, "aAA", "Zzz", null); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + true, + false, + 4L, + 4, + "aAA", + "Zzz", + null + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -56,7 +87,17 @@ public void testFoldStringColumns() throws Exception @Test public void testFoldWithNull() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("STRING", false, false, 1L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 1L, + 2, + null, + null, + null + ); Assert.assertEquals(analysis1, analysis1.fold(null)); assertSerDe(analysis1); } @@ -64,13 +105,43 @@ public void testFoldWithNull() throws Exception @Test public void testFoldComplexColumns() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("hyperUnique", false, false, 0L, null, null, null, null); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 0L, + null, + null, + null, + null + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); @@ -84,26 +155,83 @@ public void testFoldComplexColumns() throws Exception @Test public void testFoldDifferentTypes() throws Exception { - final ColumnAnalysis analysis1 = new ColumnAnalysis("hyperUnique", false, false, 1L, 1, null, null, null); - final ColumnAnalysis analysis2 = new ColumnAnalysis("COMPLEX", false, false, 2L, 2, null, null, null); + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", + false, + false, + 1L, + 1, + null, + null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.UNKNOWN_COMPLEX, + "COMPLEX", + false, + false, + 2L, + 2, + null, + null, + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis( - "STRING", + final ColumnAnalysis expected = ColumnAnalysis.error("cannot_merge_diff_types: [hyperUnique] and [COMPLEX]"); + final ColumnAnalysis expected2 = ColumnAnalysis.error("cannot_merge_diff_types: [COMPLEX] and [hyperUnique]"); + ColumnAnalysis fold1 = analysis1.fold(analysis2); + ColumnAnalysis fold2 = analysis2.fold(analysis1); + Assert.assertEquals(expected, fold1); + Assert.assertEquals(expected2, fold2); + + assertSerDe(fold1); + assertSerDe(fold2); + } + + @Test + public void testFoldDifferentTypeSignatures() throws Exception + { + // not sure that this should be able to happen in real life but if messages are artifically constructed + final ColumnAnalysis analysis1 = new ColumnAnalysis( + ColumnType.ofComplex("hyperUnique"), + "hyperUnique", false, false, - -1L, + 1L, + 1, + null, null, + null + ); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.UNKNOWN_COMPLEX, + "hyperUnique", + false, + false, + 2L, + 2, null, null, - "error:cannot_merge_diff_types" + null + ); + + assertSerDe(analysis1); + assertSerDe(analysis2); + + final ColumnAnalysis expected = ColumnAnalysis.error( + "cannot_merge_diff_types: [COMPLEX] and [COMPLEX]" + ); + final ColumnAnalysis expected2 = ColumnAnalysis.error( + "cannot_merge_diff_types: [COMPLEX] and [COMPLEX]" ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); - Assert.assertEquals(expected, fold2); + Assert.assertEquals(expected2, fold2); assertSerDe(fold1); assertSerDe(fold2); @@ -118,7 +246,17 @@ public void testFoldSameErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:foo" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -132,12 +270,32 @@ public void testFoldSameErrors() throws Exception public void testFoldErrorAndNoError() throws Exception { final ColumnAnalysis analysis1 = ColumnAnalysis.error("foo"); - final ColumnAnalysis analysis2 = new ColumnAnalysis("STRING", false, false, 2L, 2, "a", "z", null); + final ColumnAnalysis analysis2 = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + 2L, + 2, + "a", + "z", + null + ); assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:foo"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:foo" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); @@ -156,7 +314,17 @@ public void testFoldDifferentErrors() throws Exception assertSerDe(analysis1); assertSerDe(analysis2); - final ColumnAnalysis expected = new ColumnAnalysis("STRING", false, false, -1L, null, null, null, "error:multiple_errors"); + final ColumnAnalysis expected = new ColumnAnalysis( + ColumnType.STRING, + "STRING", + false, + false, + -1L, + null, + null, + null, + "error:multiple_errors" + ); ColumnAnalysis fold1 = analysis1.fold(analysis2); ColumnAnalysis fold2 = analysis2.fold(analysis1); Assert.assertEquals(expected, fold1); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java index 0d01d6fa678e..0c1dffa3d0e1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java @@ -884,15 +884,9 @@ private static RowSignature analysisToRowSignature(final SegmentAnalysis analysi continue; } - ColumnType valueType = null; - try { - valueType = ColumnType.fromString(entry.getValue().getType()); - } - catch (IllegalArgumentException ignored) { - } + ColumnType valueType = entry.getValue().getTypeSignature(); - // Assume unrecognized types are some flavor of COMPLEX. This throws away information about exactly - // what kind of complex column it is, which we may want to preserve some day. + // this shouldn't happen, but if it does assume types are some flavor of COMPLEX. if (valueType == null) { valueType = ColumnType.UNKNOWN_COMPLEX; } From be1c2ba7e23b039fa055e08d1a45770632b45342 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 8 Nov 2021 18:25:03 -0800 Subject: [PATCH 02/10] review stuffs --- docs/querying/segmentmetadataquery.md | 6 +-- .../SegmentMetadataQueryQueryToolChest.java | 52 ++++--------------- .../metadata/metadata/ColumnAnalysis.java | 32 ++++++++---- .../metadata/metadata/ColumnIncluderator.java | 4 +- .../metadata/SegmentMetadataQuery.java | 32 ++---------- ...egmentMetadataQueryQueryToolChestTest.java | 2 +- 6 files changed, 39 insertions(+), 89 deletions(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index db3e873ff0d9..4bdbd8f4e8d0 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -87,11 +87,11 @@ The format of the result is: } ] ``` -All columns will contain a `typeSignature` which is the Druid internal representation of the type information for this column, is what is show in [`INFORMATION_SCHEMA.COLUMNS`](../querying/sql.md#columns-table) table in SQL, and is typically the value used to supply Druid with JSON type information at query or ingest time. This value will be `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX` (e.g. `COMPLEX`). +All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX`, e.g. `COMPLEX`. -Additionally, columns will have a legacy friendly `type` name. This might match `typeSignature` for some column types (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`) but for COMPLEX columns will only contain the name of the underlying complex type such as `hyperUnique`. +Columns will also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`. -The timestamp column will always have `typeSignature` and `type` as `LONG`. +New applications should use `typeSignature`, not `type`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index a40bd69ee55c..1bb24ef2e716 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -23,7 +23,6 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; -import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -58,8 +57,6 @@ import org.joda.time.DateTime; import org.joda.time.Interval; -import javax.annotation.Nullable; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -75,16 +72,10 @@ public class SegmentMetadataQueryQueryToolChest extends QueryToolChest TYPE_REFERENCE = new TypeReference() { }; - private static final byte[] SEGMENT_METADATA_CACHE_PREFIX = new byte[]{0x4}; + private static final byte SEGMENT_METADATA_CACHE_PREFIX = 0x4; private static final byte SEGMENT_METADATA_QUERY = 0x16; - private static final Function MERGE_TRANSFORM_FN = new Function() - { - @Override - public SegmentAnalysis apply(SegmentAnalysis analysis) - { - return finalizeAnalysis(analysis); - } - }; + private static final Function MERGE_TRANSFORM_FN = + SegmentMetadataQueryQueryToolChest::finalizeAnalysis; private final SegmentMetadataQueryConfig config; private final GenericQueryMetricsFactory queryMetricsFactory; @@ -195,13 +186,9 @@ public boolean isCacheable(SegmentMetadataQuery query, boolean willMergeRunners) public byte[] computeCacheKey(SegmentMetadataQuery query) { SegmentMetadataQuery updatedQuery = query.withFinalizedAnalysisTypes(config); - byte[] includerBytes = updatedQuery.getToInclude().getCacheKey(); - byte[] analysisTypesBytes = updatedQuery.getAnalysisTypesCacheKey(); - return ByteBuffer.allocate(1 + includerBytes.length + analysisTypesBytes.length) - .put(SEGMENT_METADATA_CACHE_PREFIX) - .put(includerBytes) - .put(analysisTypesBytes) - .array(); + return new CacheKeyBuilder(SEGMENT_METADATA_CACHE_PREFIX).appendCacheable(updatedQuery.getToInclude()) + .appendCacheables(updatedQuery.getAnalysisTypes()) + .build(); } @Override @@ -223,27 +210,13 @@ public TypeReference getCacheObjectClazz() @Override public Function prepareForCache(boolean isResultLevelCache) { - return new Function() - { - @Override - public SegmentAnalysis apply(@Nullable SegmentAnalysis input) - { - return input; - } - }; + return input -> input; } @Override public Function pullFromCache(boolean isResultLevelCache) { - return new Function() - { - @Override - public SegmentAnalysis apply(@Nullable SegmentAnalysis input) - { - return input; - } - }; + return input -> input; } }; } @@ -266,14 +239,7 @@ public List filterSegments(SegmentMetadataQuery qu return Lists.newArrayList( Iterables.filter( segments, - new Predicate() - { - @Override - public boolean apply(T input) - { - return (input.getInterval().overlaps(targetInterval)); - } - } + input -> (input.getInterval().overlaps(targetInterval)) ) ); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java index 87de1d56fb88..4d68cfa32d48 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnAnalysis.java @@ -73,15 +73,16 @@ public ColumnAnalysis( } @JsonProperty - public String getType() + public ColumnType getTypeSignature() { - return type; + return typeSignature; } @JsonProperty - public ColumnType getTypeSignature() + @Deprecated + public String getType() { - return typeSignature; + return type; } @JsonProperty @@ -146,13 +147,13 @@ public ColumnAnalysis fold(ColumnAnalysis rhs) return rhs; } - if (!type.equals(rhs.getType())) { + if (!Objects.equals(type, rhs.getType())) { return ColumnAnalysis.error( StringUtils.format("cannot_merge_diff_types: [%s] and [%s]", type, rhs.getType()) ); } - if (!typeSignature.equals(rhs.getTypeSignature())) { + if (!Objects.equals(typeSignature, rhs.getTypeSignature())) { return ColumnAnalysis.error( StringUtils.format( "cannot_merge_diff_types: [%s] and [%s]", @@ -204,8 +205,8 @@ private T choose(T obj1, T obj2, boolean max) public String toString() { return "ColumnAnalysis{" + - "type='" + type + '\'' + - ", columnType=" + typeSignature + + "typeSignature='" + typeSignature + '\'' + + ", type=" + type + ", hasMultipleValues=" + hasMultipleValues + ", hasNulls=" + hasNulls + ", size=" + size + @@ -229,8 +230,8 @@ public boolean equals(Object o) return hasMultipleValues == that.hasMultipleValues && hasNulls == that.hasNulls && size == that.size && - Objects.equals(type, that.type) && Objects.equals(typeSignature, that.typeSignature) && + Objects.equals(type, that.type) && Objects.equals(cardinality, that.cardinality) && Objects.equals(minValue, that.minValue) && Objects.equals(maxValue, that.maxValue) && @@ -240,7 +241,16 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(type, - typeSignature, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage); + return Objects.hash( + typeSignature, + type, + hasMultipleValues, + hasNulls, + size, + cardinality, + minValue, + maxValue, + errorMessage + ); } } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java index 600b2773d2ed..cf092f458d7f 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/ColumnIncluderator.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.java.util.common.Cacheable; /** */ @@ -30,8 +31,7 @@ @JsonSubTypes.Type(name = "all", value = AllColumnIncluderator.class), @JsonSubTypes.Type(name = "list", value = ListColumnIncluderator.class) }) -public interface ColumnIncluderator +public interface ColumnIncluderator extends Cacheable { boolean include(String columnName); - byte[] getCacheKey(); } diff --git a/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java b/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java index 3888f8c989bb..746dc4f224b8 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/metadata/SegmentMetadataQuery.java @@ -23,7 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.Cacheable; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.query.BaseQuery; @@ -38,7 +38,6 @@ import org.apache.druid.query.spec.QuerySegmentSpec; import org.joda.time.Interval; -import java.nio.ByteBuffer; import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -46,16 +45,9 @@ public class SegmentMetadataQuery extends BaseQuery { - /** - * The SegmentMetadataQuery cache key may contain UTF-8 column name strings. - * Prepend 0xFF before the analysisTypes as a separator to avoid - * any potential confusion with string values. - */ - public static final byte[] ANALYSIS_TYPES_CACHE_PREFIX = new byte[] {(byte) 0xFF}; - private static final QuerySegmentSpec DEFAULT_SEGMENT_SPEC = new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY); - public enum AnalysisType + public enum AnalysisType implements Cacheable { CARDINALITY, SIZE, @@ -79,6 +71,7 @@ public static AnalysisType fromString(String name) return valueOf(StringUtils.toUpperCase(name)); } + @Override public byte[] getCacheKey() { return new byte[] {(byte) this.ordinal()}; @@ -193,25 +186,6 @@ public boolean hasRollup() return analysisTypes.contains(AnalysisType.ROLLUP); } - public byte[] getAnalysisTypesCacheKey() - { - int size = 1; - List typeBytesList = Lists.newArrayListWithExpectedSize(analysisTypes.size()); - for (AnalysisType analysisType : analysisTypes) { - final byte[] bytes = analysisType.getCacheKey(); - typeBytesList.add(bytes); - size += bytes.length; - } - - final ByteBuffer bytes = ByteBuffer.allocate(size); - bytes.put(ANALYSIS_TYPES_CACHE_PREFIX); - for (byte[] typeBytes : typeBytesList) { - bytes.put(typeBytes); - } - - return bytes.array(); - } - @Override public Query withOverriddenContext(Map contextOverride) { diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 934b433e52f5..b93c160b2906 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -70,7 +70,7 @@ public void testCacheStrategy() throws Exception new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()).getCacheStrategy(query); // Test cache key generation - byte[] expectedKey = {0x04, 0x01, (byte) 0xFF, 0x00, 0x02, 0x04}; + byte[] expectedKey = {0x04, 0x09, 0x01, 0x0A, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x04}; byte[] actualKey = strategy.computeCacheKey(query); Assert.assertArrayEquals(expectedKey, actualKey); From 3cf5a81e39cb1959a2be1224e48a95502c04d72e Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 8 Nov 2021 19:44:31 -0800 Subject: [PATCH 03/10] maybe null --- .../java/org/apache/druid/query/metadata/SegmentAnalyzer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index 2a9d705aec66..a60f4eeb9a21 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -360,7 +360,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis( - capabilities.toColumnType(), + capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities.toColumnType(), typeName, hasMultipleValues, hasNulls, @@ -379,7 +379,7 @@ private ColumnAnalysis analyzeComplexColumn( } return new ColumnAnalysis( - capabilities.toColumnType(), + capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities.toColumnType(), typeName, hasMultipleValues, hasNulls, From d5a32c6c62f95dcd2f4896933635f298423473a2 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 8 Nov 2021 19:45:23 -0800 Subject: [PATCH 04/10] better maybe null --- .../java/org/apache/druid/query/metadata/SegmentAnalyzer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index a60f4eeb9a21..760ca7b2f979 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -360,7 +360,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis( - capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities.toColumnType(), + capabilities == null ? ColumnType.ofComplex(typeName) : capabilities.toColumnType(), typeName, hasMultipleValues, hasNulls, @@ -379,7 +379,7 @@ private ColumnAnalysis analyzeComplexColumn( } return new ColumnAnalysis( - capabilities == null ? ColumnType.UNKNOWN_COMPLEX : capabilities.toColumnType(), + capabilities == null ? ColumnType.ofComplex(typeName) : capabilities.toColumnType(), typeName, hasMultipleValues, hasNulls, From 17fd3dc2c4f1ead25a2a9eb69e1e756a9d3e1fb5 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 9 Nov 2021 10:35:48 -0800 Subject: [PATCH 05/10] Update docs/querying/segmentmetadataquery.md --- docs/querying/segmentmetadataquery.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 4bdbd8f4e8d0..08757f705a5c 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -87,7 +87,7 @@ The format of the result is: } ] ``` -All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX`, e.g. `COMPLEX`. +All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX`, e.g. `COMPLEX`. Columns will also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`. From aba20be09853042cde56b4937ca84b6bc935041b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 9 Nov 2021 10:57:13 -0800 Subject: [PATCH 06/10] Update docs/querying/segmentmetadataquery.md Co-authored-by: Charles Smith --- docs/querying/segmentmetadataquery.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index 08757f705a5c..b7ae9377e5ae 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -89,7 +89,7 @@ The format of the result is: All columns contain a `typeSignature` that Druid uses to represent the column type information internally. The `typeSignature` is typically the same value used to identify the JSON type information at query or ingest time. One of: `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX`, e.g. `COMPLEX`. -Columns will also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`. +Columns also have a legacy `type` name. For some column types, the value may match the `typeSignature` (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`). For `COMPLEX` columns, the `type` only contains the name of the underlying complex type such as `hyperUnique`. New applications should use `typeSignature`, not `type`. From a2214b244305aa1fdc2f70256ce9ae20dfd79bb8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 9 Nov 2021 17:49:07 -0800 Subject: [PATCH 07/10] fix null right --- .../org/apache/druid/query/metadata/SegmentAnalyzer.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index a93412c8bf96..600b773540c8 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -43,6 +43,7 @@ import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.ColumnTypeFactory; import org.apache.druid.segment.column.ComplexColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.TypeSignature; @@ -364,7 +365,7 @@ private ColumnAnalysis analyzeComplexColumn( final Function inputSizeFn = serde.inputSizeFn(); if (inputSizeFn == null) { return new ColumnAnalysis( - capabilities.toColumnType(), + ColumnTypeFactory.ofType(typeSignature), typeName, hasMultipleValues, hasNulls, @@ -383,7 +384,7 @@ private ColumnAnalysis analyzeComplexColumn( } return new ColumnAnalysis( - capabilities.toColumnType(), + ColumnTypeFactory.ofType(typeSignature), typeName, hasMultipleValues, hasNulls, From eca1c1c5013d6d79414ef0cbc70abeba81e09ae7 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 9 Nov 2021 22:31:05 -0800 Subject: [PATCH 08/10] sad --- .../org/apache/druid/query/metadata/SegmentAnalyzerTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java index 14ed7833f04b..9e2781f466dc 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentAnalyzerTest.java @@ -295,7 +295,8 @@ public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException Map analyses = analyzer.analyze(segment); ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator); Assert.assertFalse(columnAnalysis.isError()); - Assert.assertEquals("COMPLEX", columnAnalysis.getType()); + Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType()); + Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature()); } // Persist the index. From 870fb3bf9c7033713a0803200e2a619ae99f31cd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 10 Nov 2021 02:11:36 -0800 Subject: [PATCH 09/10] oops --- .../src/test/resources/hadoop/batch_hadoop_queries.json | 5 +++++ .../src/test/resources/queries/twitterstream_queries.json | 3 +++ .../test/resources/queries/wikipedia_editstream_queries.json | 2 ++ 3 files changed, 10 insertions(+) diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index 2a390b2ec1f7..564d3f5c1a90 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -15,6 +15,7 @@ "id": "merged", "columns": { "location": { + "typeSignature": "STRING",, "type": "STRING", "size": 0, "hasMultipleValues": false, @@ -25,6 +26,7 @@ "errorMessage": null }, "user_id_sketch": { + "typeSignature": "COMPLEX", "type": "thetaSketch", "size": 0, "hasMultipleValues": false, @@ -35,6 +37,7 @@ "errorMessage": null }, "other_metric": { + "typeSignature": "COMPLEX", "type": "thetaSketch", "size": 0, "hasMultipleValues": false, @@ -45,6 +48,7 @@ "errorMessage": null }, "__time": { + "typeSignature": "LONG", "type": "LONG", "size": 0, "hasMultipleValues": false, @@ -55,6 +59,7 @@ "errorMessage": null }, "product": { + "typeSignature": "STRING", "type": "STRING", "size": 0, "hasMultipleValues": false, diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json index a0796fd4f9b8..cdd4057eb572 100644 --- a/integration-tests/src/test/resources/queries/twitterstream_queries.json +++ b/integration-tests/src/test/resources/queries/twitterstream_queries.json @@ -596,6 +596,7 @@ "intervals":["2013-01-01T00:00:00.000Z/2013-01-02T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -618,6 +619,7 @@ "intervals":["2013-01-02T00:00:00.000Z/2013-01-03T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -640,6 +642,7 @@ "intervals":["2013-01-03T00:00:00.000Z/2013-01-04T00:00:00.000Z"], "columns":{ "has_links":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, diff --git a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json index 90cd3cbc0e16..59a5c6ca70b8 100644 --- a/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json +++ b/integration-tests/src/test/resources/queries/wikipedia_editstream_queries.json @@ -1402,6 +1402,7 @@ "intervals":["2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z"], "columns":{ "country_name":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, @@ -1412,6 +1413,7 @@ "hasNulls":true }, "language":{ + "typeSignature": "STRING", "type":"STRING", "hasMultipleValues":false, "size":0, From 945fed671263c86b434923f85ec16e307788c283 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 10 Nov 2021 09:09:04 -0800 Subject: [PATCH 10/10] Update batch_hadoop_queries.json --- .../src/test/resources/hadoop/batch_hadoop_queries.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json index 564d3f5c1a90..50397b1d7f44 100644 --- a/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json +++ b/integration-tests/src/test/resources/hadoop/batch_hadoop_queries.json @@ -15,7 +15,7 @@ "id": "merged", "columns": { "location": { - "typeSignature": "STRING",, + "typeSignature": "STRING", "type": "STRING", "size": 0, "hasMultipleValues": false,