From f8c75a0526e093e6c033207c5b553bf68c0de008 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 15 Oct 2025 11:01:27 -0700 Subject: [PATCH 1/2] fix bug with NestedCommonFormatColumnFormatSpec default value resolution changes: * fix a bug that prevented job and system level auto column format specs from overriding `objectFieldsDictionaryEncoding` and `objectStorageEncoding` even if the user specified no format spec on the column itself --- .../NestedCommonFormatColumnFormatSpec.java | 14 +++--- ...estedCommonFormatColumnFormatSpecTest.java | 48 +++++++++++++++++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java index 592e759b72ed..e199365f643b 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java @@ -40,11 +40,11 @@ */ public class NestedCommonFormatColumnFormatSpec { - private static final NestedCommonFormatColumnFormatSpec DEFAULT = - NestedCommonFormatColumnFormatSpec.builder() - .setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) - .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) - .build(); + private static final NestedCommonFormatColumnFormatSpec OPERATOR_DEFAULT = builder().build(); + private static final NestedCommonFormatColumnFormatSpec SYSTEM_DEFAULT = + builder().setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) + .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) + .build(); public static Builder builder() { @@ -61,7 +61,7 @@ public static NestedCommonFormatColumnFormatSpec getEffectiveFormatSpec( IndexSpec indexSpec ) { - return Objects.requireNonNullElse(columnFormatSpec, DEFAULT).getEffectiveSpec(indexSpec); + return Objects.requireNonNullElse(columnFormatSpec, OPERATOR_DEFAULT).getEffectiveSpec(indexSpec); } @Nullable @@ -158,7 +158,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) if (indexSpec.getAutoColumnFormatSpec() != null) { defaultSpec = indexSpec.getAutoColumnFormatSpec(); } else { - defaultSpec = DEFAULT; + defaultSpec = SYSTEM_DEFAULT; } if (objectFieldsDictionaryEncoding == null) { diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java index f489ab11f00e..6ffb613c561f 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpecTest.java @@ -94,6 +94,54 @@ public void testGetEffectiveSpecDefaults() ); } + @Test + public void testEffectiveSpecIndexSpecOverrides() + { + StringEncodingStrategy frontcoded = new StringEncodingStrategy.FrontCoded(4, FrontCodedIndexed.V1); + NestedCommonFormatColumnFormatSpec defaults = NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec( + null, + IndexSpec.builder() + .withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.builder() + .setObjectFieldsDictionaryEncoding(frontcoded) + .setObjectStorageEncoding(ObjectStorageEncoding.NONE) + .build() + ) + .withMetricCompression(CompressionStrategy.LZF) + .build() + .getEffectiveSpec() + ); + + Assert.assertEquals( + frontcoded, + defaults.getObjectFieldsDictionaryEncoding() + ); + Assert.assertEquals( + ObjectStorageEncoding.NONE, + defaults.getObjectStorageEncoding() + ); + Assert.assertEquals( + CompressionStrategy.LZ4, + defaults.getObjectStorageCompression() + ); + Assert.assertEquals( + IndexSpec.getDefault().getEffectiveSpec().getDimensionCompression(), + defaults.getDictionaryEncodedColumnCompression() + ); + Assert.assertEquals( + IndexSpec.getDefault().getEffectiveSpec().getStringDictionaryEncoding(), + defaults.getStringDictionaryEncoding() + ); + Assert.assertEquals( + CompressionStrategy.LZF, + defaults.getLongColumnCompression() + ); + Assert.assertEquals( + CompressionStrategy.LZF, + defaults.getDoubleColumnCompression() + ); + } + @Test public void testGetEffectiveSpecMerge() { From e2060e2a588df9358399e4fa5b004e2dda42157c Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 16 Oct 2025 10:07:36 -0700 Subject: [PATCH 2/2] adjustments --- .../org/apache/druid/segment/IndexSpec.java | 8 +- .../NestedCommonFormatColumnFormatSpec.java | 188 +++++++++--------- 2 files changed, 98 insertions(+), 98 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java index 9726a8137288..0b287f1ff7c4 100644 --- a/processing/src/main/java/org/apache/druid/segment/IndexSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/IndexSpec.java @@ -267,9 +267,13 @@ public IndexSpec getEffectiveSpec() } if (autoColumnFormatSpec != null) { - bob.withAutoColumnFormatSpec(autoColumnFormatSpec.getEffectiveSpec(this)); + bob.withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(autoColumnFormatSpec, this) + ); } else if (defaultSpec.autoColumnFormatSpec != null) { - bob.withAutoColumnFormatSpec(defaultSpec.autoColumnFormatSpec.getEffectiveSpec(this)); + bob.withAutoColumnFormatSpec( + NestedCommonFormatColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.autoColumnFormatSpec, this) + ); } return bob.build(); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java index e199365f643b..38595696c11a 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnFormatSpec.java @@ -40,8 +40,7 @@ */ public class NestedCommonFormatColumnFormatSpec { - private static final NestedCommonFormatColumnFormatSpec OPERATOR_DEFAULT = builder().build(); - private static final NestedCommonFormatColumnFormatSpec SYSTEM_DEFAULT = + private static final NestedCommonFormatColumnFormatSpec DEFAULT = builder().setObjectFieldsDictionaryEncoding(StringEncodingStrategy.UTF8_STRATEGY) .setObjectStorageEncoding(ObjectStorageEncoding.SMILE) .build(); @@ -56,112 +55,37 @@ public static Builder builder(NestedCommonFormatColumnFormatSpec spec) return new Builder(spec); } + /** + * Create a {@link NestedCommonFormatColumnFormatSpec} with all fields fully populated. Values from the supplied + * column format spec take priority, any null values are then populated by checking + * {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable, + * and finally resorting to hard coded defaults. + */ public static NestedCommonFormatColumnFormatSpec getEffectiveFormatSpec( @Nullable NestedCommonFormatColumnFormatSpec columnFormatSpec, IndexSpec indexSpec ) { - return Objects.requireNonNullElse(columnFormatSpec, OPERATOR_DEFAULT).getEffectiveSpec(indexSpec); - } - - @Nullable - private final StringEncodingStrategy objectFieldsDictionaryEncoding; - @Nullable - private final ObjectStorageEncoding objectStorageEncoding; - @Nullable - private final CompressionStrategy objectStorageCompression; - @Nullable - private final StringEncodingStrategy stringDictionaryEncoding; - @Nullable - private final CompressionStrategy dictionaryEncodedColumnCompression; - @Nullable - private final CompressionFactory.LongEncodingStrategy longColumnEncoding; - @Nullable - private final CompressionStrategy longColumnCompression; - @Nullable - private final CompressionStrategy doubleColumnCompression; - @Nullable - private final BitmapSerdeFactory bitmapEncoding; - - @JsonCreator - public NestedCommonFormatColumnFormatSpec( - @JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, - @JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding, - @JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression, - @JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding, - @JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression, - @JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, - @JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression, - @JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression - ) - { - this( - objectFieldsDictionaryEncoding, - objectStorageEncoding, - objectStorageCompression, - stringDictionaryEncoding, - dictionaryEncodedColumnCompression, - longColumnEncoding, - longColumnCompression, - doubleColumnCompression, - null - ); - } - - /** - * Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values - * for {@link #getEffectiveSpec(IndexSpec)}. {@link #bitmapEncoding} cannot vary per column, and is always set from - * {@link IndexSpec#getBitmapSerdeFactory()}. - */ - protected NestedCommonFormatColumnFormatSpec( - @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, - @Nullable ObjectStorageEncoding objectStorageEncoding, - @Nullable CompressionStrategy objectStorageCompression, - @Nullable StringEncodingStrategy stringDictionaryEncoding, - @Nullable CompressionStrategy dictionaryEncodedColumnCompression, - @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, - @Nullable CompressionStrategy longColumnCompression, - @Nullable CompressionStrategy doubleColumnCompression, - @Nullable BitmapSerdeFactory bitmapEncoding - ) - { - this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding; - this.objectStorageEncoding = objectStorageEncoding; - this.objectStorageCompression = objectStorageCompression; - this.stringDictionaryEncoding = stringDictionaryEncoding; - this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression; - this.longColumnEncoding = longColumnEncoding; - this.longColumnCompression = longColumnCompression; - this.doubleColumnCompression = doubleColumnCompression; - this.bitmapEncoding = bitmapEncoding; - } + final Builder builder = columnFormatSpec == null ? builder() : builder(columnFormatSpec); - /** - * Fully populate all fields of {@link NestedCommonFormatColumnFormatSpec}. Null values are populated first checking - * {@link IndexSpec#getAutoColumnFormatSpec()}, then falling back to fields on {@link IndexSpec} itself if applicable, - * and finally resorting to hard coded defaults. - */ - public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) - { - // this is a defensive check, the json spec can't set this, only the builder can - if (bitmapEncoding != null && !bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) { + // this is a defensive check, the json spec of the column can't set this, only the builder can + if (builder.bitmapEncoding != null && !builder.bitmapEncoding.equals(indexSpec.getBitmapSerdeFactory())) { throw new ISE( "bitmapEncoding[%s] does not match indexSpec.bitmap[%s]", - bitmapEncoding, + builder.bitmapEncoding, indexSpec.getBitmapSerdeFactory() ); } - Builder builder = new Builder(this); builder.setBitmapEncoding(indexSpec.getBitmapSerdeFactory()); final NestedCommonFormatColumnFormatSpec defaultSpec; if (indexSpec.getAutoColumnFormatSpec() != null) { defaultSpec = indexSpec.getAutoColumnFormatSpec(); } else { - defaultSpec = SYSTEM_DEFAULT; + defaultSpec = DEFAULT; } - if (objectFieldsDictionaryEncoding == null) { + if (builder.objectFieldsDictionaryEncoding == null) { if (defaultSpec.getObjectFieldsDictionaryEncoding() != null) { builder.setObjectFieldsDictionaryEncoding(defaultSpec.getObjectFieldsDictionaryEncoding()); } else { @@ -169,7 +93,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (objectStorageEncoding == null) { + if (builder.objectStorageEncoding == null) { if (defaultSpec.getObjectStorageEncoding() != null) { builder.setObjectStorageEncoding(defaultSpec.getObjectStorageEncoding()); } else { @@ -177,7 +101,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (objectStorageCompression == null) { + if (builder.objectStorageCompression == null) { if (defaultSpec.getObjectStorageCompression() != null) { builder.setObjectStorageCompression(defaultSpec.getObjectStorageCompression()); } else if (indexSpec.getJsonCompression() != null) { @@ -187,7 +111,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (stringDictionaryEncoding == null) { + if (builder.stringDictionaryEncoding == null) { if (defaultSpec.getStringDictionaryEncoding() != null) { builder.setStringDictionaryEncoding(defaultSpec.getStringDictionaryEncoding()); } else { @@ -195,7 +119,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (dictionaryEncodedColumnCompression == null) { + if (builder.dictionaryEncodedColumnCompression == null) { if (defaultSpec.getDictionaryEncodedColumnCompression() != null) { builder.setDictionaryEncodedColumnCompression(defaultSpec.getDictionaryEncodedColumnCompression()); } else { @@ -203,7 +127,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (longColumnEncoding == null) { + if (builder.longColumnEncoding == null) { if (defaultSpec.getLongColumnEncoding() != null) { builder.setLongColumnEncoding(defaultSpec.getLongColumnEncoding()); } else { @@ -211,7 +135,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (longColumnCompression == null) { + if (builder.longColumnCompression == null) { if (defaultSpec.getLongColumnCompression() != null) { builder.setLongColumnCompression(defaultSpec.getLongColumnCompression()); } else { @@ -219,7 +143,7 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) } } - if (doubleColumnCompression == null) { + if (builder.doubleColumnCompression == null) { if (defaultSpec.getDoubleColumnCompression() != null) { builder.setDoubleColumnCompression(defaultSpec.getDoubleColumnCompression()); } else { @@ -230,6 +154,78 @@ public NestedCommonFormatColumnFormatSpec getEffectiveSpec(IndexSpec indexSpec) return builder.build(); } + @Nullable + private final StringEncodingStrategy objectFieldsDictionaryEncoding; + @Nullable + private final ObjectStorageEncoding objectStorageEncoding; + @Nullable + private final CompressionStrategy objectStorageCompression; + @Nullable + private final StringEncodingStrategy stringDictionaryEncoding; + @Nullable + private final CompressionStrategy dictionaryEncodedColumnCompression; + @Nullable + private final CompressionFactory.LongEncodingStrategy longColumnEncoding; + @Nullable + private final CompressionStrategy longColumnCompression; + @Nullable + private final CompressionStrategy doubleColumnCompression; + @Nullable + private final BitmapSerdeFactory bitmapEncoding; + + @JsonCreator + public NestedCommonFormatColumnFormatSpec( + @JsonProperty("objectFieldsDictionaryEncoding") @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, + @JsonProperty("objectStorageEncoding") @Nullable ObjectStorageEncoding objectStorageEncoding, + @JsonProperty("objectStorageCompression") @Nullable CompressionStrategy objectStorageCompression, + @JsonProperty("stringDictionaryEncoding") @Nullable StringEncodingStrategy stringDictionaryEncoding, + @JsonProperty("dictionaryEncodedColumnCompression") @Nullable CompressionStrategy dictionaryEncodedColumnCompression, + @JsonProperty("longColumnEncoding") @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, + @JsonProperty("longColumnCompression") @Nullable CompressionStrategy longColumnCompression, + @JsonProperty("doubleColumnCompression") @Nullable CompressionStrategy doubleColumnCompression + ) + { + this( + objectFieldsDictionaryEncoding, + objectStorageEncoding, + objectStorageCompression, + stringDictionaryEncoding, + dictionaryEncodedColumnCompression, + longColumnEncoding, + longColumnCompression, + doubleColumnCompression, + null + ); + } + + /** + * Internal constructor used by {@link Builder} to set {@link #bitmapEncoding} during the process of resolving values + * for {@link #getEffectiveFormatSpec(NestedCommonFormatColumnFormatSpec, IndexSpec)}. {@link #bitmapEncoding} cannot + * vary per column, and is always set from {@link IndexSpec#getBitmapSerdeFactory()}. + */ + protected NestedCommonFormatColumnFormatSpec( + @Nullable StringEncodingStrategy objectFieldsDictionaryEncoding, + @Nullable ObjectStorageEncoding objectStorageEncoding, + @Nullable CompressionStrategy objectStorageCompression, + @Nullable StringEncodingStrategy stringDictionaryEncoding, + @Nullable CompressionStrategy dictionaryEncodedColumnCompression, + @Nullable CompressionFactory.LongEncodingStrategy longColumnEncoding, + @Nullable CompressionStrategy longColumnCompression, + @Nullable CompressionStrategy doubleColumnCompression, + @Nullable BitmapSerdeFactory bitmapEncoding + ) + { + this.objectFieldsDictionaryEncoding = objectFieldsDictionaryEncoding; + this.objectStorageEncoding = objectStorageEncoding; + this.objectStorageCompression = objectStorageCompression; + this.stringDictionaryEncoding = stringDictionaryEncoding; + this.dictionaryEncodedColumnCompression = dictionaryEncodedColumnCompression; + this.longColumnEncoding = longColumnEncoding; + this.longColumnCompression = longColumnCompression; + this.doubleColumnCompression = doubleColumnCompression; + this.bitmapEncoding = bitmapEncoding; + } + @Nullable @JsonProperty public StringEncodingStrategy getObjectFieldsDictionaryEncoding()