From afa9c5bd26e20396bcf4b492851e044969049cef Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 16 Oct 2025 15:02:33 -0700 Subject: [PATCH 1/2] fix bug with NestedCommonFormatColumnFormatSpec default value resolution (#18638) changes: * fix a bug that prevented job and system level auto column format specs from overriding `objectFieldsDictionaryEncoding` and `objectStorageEncoding` even if the user specified no format spec on the column itself --- .../org/apache/druid/segment/IndexSpec.java | 8 +- .../NestedCommonFormatColumnFormatSpec.java | 190 +++++++++--------- ...estedCommonFormatColumnFormatSpecTest.java | 48 +++++ 3 files changed, 147 insertions(+), 99 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java index 9726a8137288..0b287f1ff7c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java @@ -267,9 +267,13 @@ public IndexSpec getEffectiveSpec() } if (autoColumnFormatSpec != null) { - bob.withAutoColumnFormatSpec(autoColumnFormatSpec.getEffectiveSpec(this)); + bob.withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(autoColumnFormatSpec, this) + ); } else if (defaultSpec.autoColumnFormatSpec != null) { - bob.withAutoColumnFormatSpec(defaultSpec.autoColumnFormatSpec.getEffectiveSpec(this)); + bob.withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.autoColumnFormatSpec, this) + ); } return bob.build(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java index 592e759b72ed..38595696c11a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java @@ -41,10 +41,9 @@ public class NestedCommonFormatColumnFormatSpec { private static final NestedCommonFormatColumnFormatSpec DEFAULT = - NestedCommonFormatColumnFormatSpec.builder() - .setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) - .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) - .build(); + builder().setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) + .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) + .build(); public static Builder builder() { @@ -56,102 +55,27 @@ public static Builder builder(NestedCommonFormatColumnFormatSpec spec) return new Builder(spec); } + /** + * Create a {@link NestedCommonFormatColumnFormatSpec} with all fields fully populated. Values from the supplied + * column format spec take priority, any null values are then populated by checking + * {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable, + * and finally resorting to hard coded defaults. + */ public static NestedCommonFormatColumnFormatSpec getEffectiveFormatSpec( @Nullable NestedCommonFormatColumnFormatSpec columnFormatSpec, IndexSpec indexSpec ) { - return Objects.requireNonNullElse(columnFormatSpec, DEFAULT).getEffectiveSpec(indexSpec); - } - - @Nullable - private final StringEncodingStrategy objectFieldsDictionaryEncoding; - @Nullable - private final ObjectStorageEncoding objectStorageEncoding; - @Nullable - private final CompressionStrategy objectStorageCompression; - @Nullable - private final StringEncodingStrategy stringDictionaryEncoding; - @Nullable - private final CompressionStrategy dictionaryEncodedColumnCompression; - @Nullable - private final CompressionFactory.LongEncodingStrategy longColumnEncoding; - @Nullable - private final CompressionStrategy longColumnCompression; - @Nullable - private final CompressionStrategy doubleColumnCompression; - @Nullable - private final BitmapSerdeFactory bitmapEncoding; - - @JsonCreator - public NestedCommonFormatColumnFormatSpec( - @JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, - @JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding, - @JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression, - @JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding, - @JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression, - @JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, - @JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression, - @JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression - ) - { - this( - objectFieldsDictionaryEncoding, - objectStorageEncoding, - objectStorageCompression, - stringDictionaryEncoding, - dictionaryEncodedColumnCompression, - longColumnEncoding, - longColumnCompression, - doubleColumnCompression, - null - ); - } - - /** - * Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values - * for {@link #getEffectiveSpec(IndexSpec)}. {@link #bitmapEncoding} cannot vary per column, and is always set from - * {@link IndexSpec#getBitmapSerdeFactory()}. - */ - protected NestedCommonFormatColumnFormatSpec( - @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, - @Nullable ObjectStorageEncoding objectStorageEncoding, - @Nullable CompressionStrategy objectStorageCompression, - @Nullable StringEncodingStrategy stringDictionaryEncoding, - @Nullable CompressionStrategy dictionaryEncodedColumnCompression, - @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, - @Nullable CompressionStrategy longColumnCompression, - @Nullable CompressionStrategy doubleColumnCompression, - @Nullable BitmapSerdeFactory bitmapEncoding - ) - { - this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding; - this.objectStorageEncoding = objectStorageEncoding; - this.objectStorageCompression = objectStorageCompression; - this.stringDictionaryEncoding = stringDictionaryEncoding; - this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression; - this.longColumnEncoding = longColumnEncoding; - this.longColumnCompression = longColumnCompression; - this.doubleColumnCompression = doubleColumnCompression; - this.bitmapEncoding = bitmapEncoding; - } + final Builder builder = columnFormatSpec == null ? builder() : builder(columnFormatSpec); - /** - * Fully populate all fields of {@link NestedCommonFormatColumnFormatSpec}. Null values are populated first checking - * {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable, - * and finally resorting to hard coded defaults. - */ - public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) - { - // this is a defensive check, the json spec can't set this, only the builder can - if (bitmapEncoding != null && !bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) { + // this is a defensive check, the json spec of the column can't set this, only the builder can + if (builder.bitmapEncoding != null && !builder.bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) { throw new ISE( "bitmapEncoding[%s] does not match indexSpec.bitmap[%s]", - bitmapEncoding, + builder.bitmapEncoding, indexSpec.getBitmapSerdeFactory() ); } - Builder builder = new Builder(this); builder.setBitmapEncoding(indexSpec.getBitmapSerdeFactory()); final NestedCommonFormatColumnFormatSpec defaultSpec; @@ -161,7 +85,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) defaultSpec = DEFAULT; } - if (objectFieldsDictionaryEncoding == null) { + if (builder.objectFieldsDictionaryEncoding == null) { if (defaultSpec.getObjectFieldsDictionaryEncoding() != null) { builder.setObjectFieldsDictionaryEncoding(defaultSpec.getObjectFieldsDictionaryEncoding()); } else { @@ -169,7 +93,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (objectStorageEncoding == null) { + if (builder.objectStorageEncoding == null) { if (defaultSpec.getObjectStorageEncoding() != null) { builder.setObjectStorageEncoding(defaultSpec.getObjectStorageEncoding()); } else { @@ -177,7 +101,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (objectStorageCompression == null) { + if (builder.objectStorageCompression == null) { if (defaultSpec.getObjectStorageCompression() != null) { builder.setObjectStorageCompression(defaultSpec.getObjectStorageCompression()); } else if (indexSpec.getJsonCompression() != null) { @@ -187,7 +111,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (stringDictionaryEncoding == null) { + if (builder.stringDictionaryEncoding == null) { if (defaultSpec.getStringDictionaryEncoding() != null) { builder.setStringDictionaryEncoding(defaultSpec.getStringDictionaryEncoding()); } else { @@ -195,7 +119,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (dictionaryEncodedColumnCompression == null) { + if (builder.dictionaryEncodedColumnCompression == null) { if (defaultSpec.getDictionaryEncodedColumnCompression() != null) { builder.setDictionaryEncodedColumnCompression(defaultSpec.getDictionaryEncodedColumnCompression()); } else { @@ -203,7 +127,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (longColumnEncoding == null) { + if (builder.longColumnEncoding == null) { if (defaultSpec.getLongColumnEncoding() != null) { builder.setLongColumnEncoding(defaultSpec.getLongColumnEncoding()); } else { @@ -211,7 +135,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (longColumnCompression == null) { + if (builder.longColumnCompression == null) { if (defaultSpec.getLongColumnCompression() != null) { builder.setLongColumnCompression(defaultSpec.getLongColumnCompression()); } else { @@ -219,7 +143,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (doubleColumnCompression == null) { + if (builder.doubleColumnCompression == null) { if (defaultSpec.getDoubleColumnCompression() != null) { builder.setDoubleColumnCompression(defaultSpec.getDoubleColumnCompression()); } else { @@ -230,6 +154,78 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) return builder.build(); } + @Nullable + private final StringEncodingStrategy objectFieldsDictionaryEncoding; + @Nullable + private final ObjectStorageEncoding objectStorageEncoding; + @Nullable + private final CompressionStrategy objectStorageCompression; + @Nullable + private final StringEncodingStrategy stringDictionaryEncoding; + @Nullable + private final CompressionStrategy dictionaryEncodedColumnCompression; + @Nullable + private final CompressionFactory.LongEncodingStrategy longColumnEncoding; + @Nullable + private final CompressionStrategy longColumnCompression; + @Nullable + private final CompressionStrategy doubleColumnCompression; + @Nullable + private final BitmapSerdeFactory bitmapEncoding; + + @JsonCreator + public NestedCommonFormatColumnFormatSpec( + @JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, + @JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding, + @JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression, + @JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding, + @JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression, + @JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, + @JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression, + @JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression + ) + { + this( + objectFieldsDictionaryEncoding, + objectStorageEncoding, + objectStorageCompression, + stringDictionaryEncoding, + dictionaryEncodedColumnCompression, + longColumnEncoding, + longColumnCompression, + doubleColumnCompression, + null + ); + } + + /** + * Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values + * for {@link #getEffectiveFormatSpec(NestedCommonFormatColumnFormatSpec, IndexSpec)}. {@link #bitmapEncoding} cannot + * vary per column, and is always set from {@link IndexSpec#getBitmapSerdeFactory()}. + */ + protected NestedCommonFormatColumnFormatSpec( + @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, + @Nullable ObjectStorageEncoding objectStorageEncoding, + @Nullable CompressionStrategy objectStorageCompression, + @Nullable StringEncodingStrategy stringDictionaryEncoding, + @Nullable CompressionStrategy dictionaryEncodedColumnCompression, + @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, + @Nullable CompressionStrategy longColumnCompression, + @Nullable CompressionStrategy doubleColumnCompression, + @Nullable BitmapSerdeFactory bitmapEncoding + ) + { + this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding; + this.objectStorageEncoding = objectStorageEncoding; + this.objectStorageCompression = objectStorageCompression; + this.stringDictionaryEncoding = stringDictionaryEncoding; + this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression; + this.longColumnEncoding = longColumnEncoding; + this.longColumnCompression = longColumnCompression; + this.doubleColumnCompression = doubleColumnCompression; + this.bitmapEncoding = bitmapEncoding; + } + @Nullable @JsonProperty public StringEncodingStrategy getObjectFieldsDictionaryEncoding() diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java index f489ab11f00e..6ffb613c561f 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java @@ -94,6 +94,54 @@ public void testGetEffectiveSpecDefaults() ); } + @Test + public void testEffectiveSpecIndexSpecOverrides() + { + StringEncodingStrategy frontcoded = new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1); + NestedCommonFormatColumnFormatSpec defaults = NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec( + null, + IndexSpec.builder() + .withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.builder() + .setObjectFieldsDictionaryEncoding(frontcoded) + .setObjectStorageEncoding(ObjectStorageEncoding.NONE) + .build() + ) + .withMetricCompression(CompressionStrategy.LZF) + .build() + .getEffectiveSpec() + ); + + Assert.assertEquals( + frontcoded, + defaults.getObjectFieldsDictionaryEncoding() + ); + Assert.assertEquals( + ObjectStorageEncoding.NONE, + defaults.getObjectStorageEncoding() + ); + Assert.assertEquals( + CompressionStrategy.LZ4, + defaults.getObjectStorageCompression() + ); + Assert.assertEquals( + IndexSpec.getDefault().getEffectiveSpec().getDimensionCompression(), + defaults.getDictionaryEncodedColumnCompression() + ); + Assert.assertEquals( + IndexSpec.getDefault().getEffectiveSpec().getStringDictionaryEncoding(), + defaults.getStringDictionaryEncoding() + ); + Assert.assertEquals( + CompressionStrategy.LZF, + defaults.getLongColumnCompression() + ); + Assert.assertEquals( + CompressionStrategy.LZF, + defaults.getDoubleColumnCompression() + ); + } + @Test public void testGetEffectiveSpecMerge() { From 8fc82411a89f0503f3fd9d81fd0d4eea2ddff126 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 17 Oct 2025 11:31:01 -0700 Subject: [PATCH 2/2] fix test to align with 35 branch --- .../nested/NestedCommonFormatColumnFormatSpecTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java index 6ffb613c561f..15cb36befa1c 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java @@ -104,7 +104,6 @@ public void testEffectiveSpecIndexSpecOverrides() .withAutoColumnFormatSpec( NestedCommonFormatColumnFormatSpec.builder() .setObjectFieldsDictionaryEncoding(frontcoded) - .setObjectStorageEncoding(ObjectStorageEncoding.NONE) .build() ) .withMetricCompression(CompressionStrategy.LZF) @@ -116,10 +115,6 @@ public void testEffectiveSpecIndexSpecOverrides() frontcoded, defaults.getObjectFieldsDictionaryEncoding() ); - Assert.assertEquals( - ObjectStorageEncoding.NONE, - defaults.getObjectStorageEncoding() - ); Assert.assertEquals( CompressionStrategy.LZ4, defaults.getObjectStorageCompression()