From b8bc8c353e2dd792d93bed80d05c11bc2b035315 Mon Sep 17 00:00:00 2001 From: Jay Kanakiya Date: Mon, 23 Mar 2026 09:29:41 -0700 Subject: [PATCH 1/3] change maxStringLength default --- docs/configuration/index.md | 2 +- docs/ingestion/ingestion-spec.md | 2 +- .../data/input/impl/StringDimensionSchema.java | 17 ++++++++++------- .../apache/druid/guice/BuiltInTypesModule.java | 8 +++++--- .../druid/segment/StringDimensionHandler.java | 6 ++++-- .../druid/segment/StringDimensionIndexer.java | 10 +++++++--- .../input/impl/StringDimensionSchemaTest.java | 2 +- .../druid/guice/BuiltInTypesModuleTest.java | 6 +++--- 8 files changed, 32 insertions(+), 21 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 853ec3878a26..ee2d7a333c03 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1424,7 +1424,7 @@ Additional Peon configs include: |`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to store empty columns during ingestion. When set to true, Druid stores every column specified in the [`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use the string-based schemaless ingestion and don't specify any dimensions to ingest, you must also set [`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for Druid to store empty columns.

If you set `storeEmptyColumns` to false, Druid SQL queries referencing empty columns will fail. If you intend to leave `storeEmptyColumns` disabled, you should either ingest placeholder data for empty columns or else not query on empty columns.

You can overwrite this configuration by setting `storeEmptyColumns` in the [task context](../ingestion/tasks.md#context-parameters).|true| |`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task to be used to store temporary files on disk. This config is generally intended for internal usage. Attempts to set it are very likely to be overwritten by the TaskRunner that executes the task, so be sure of what you expect to happen before directly adjusting this configuration parameter. The config is documented here primarily to provide an understanding of what it means if/when someone sees that it has been set. A value of -1 disables this limit. |-1| |`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0| -|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects).|0 (no truncation)| +|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects).|`null` (no truncation)| If the Peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations: diff --git a/docs/ingestion/ingestion-spec.md b/docs/ingestion/ingestion-spec.md index 72ec6d793d38..c6f0b1269275 100644 --- a/docs/ingestion/ingestion-spec.md +++ b/docs/ingestion/ingestion-spec.md @@ -243,7 +243,7 @@ Dimension objects can have the following components: | name | The name of the dimension. This will be used as the field name to read from input records, as well as the column name stored in generated segments.

Note that you can use a [`transformSpec`](#transformspec) if you want to rename columns during ingestion time. | none (required) | | createBitmapIndex | For `string` typed dimensions, whether or not bitmap indexes should be created for the column in generated segments. Creating a bitmap index requires more storage, but speeds up certain kinds of filtering (especially equality and prefix filtering). Only supported for `string` typed dimensions. | `true` | | multiValueHandling | For `string` typed dimensions, specifies the type of handling for [multi-value fields](../querying/multi-value-dimensions.md). Possible values are `array` (ingest string arrays as-is), `sorted_array` (sort string arrays during ingestion), and `sorted_set` (sort and de-duplicate string arrays during ingestion). This parameter is ignored for types other than `string`. | `sorted_array` | -| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. | `0` (no truncation) | +| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. | `null` (no truncation) | #### Inclusions and exclusions diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index ab00952e867a..8562d0ee0d61 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -34,12 +34,14 @@ public class StringDimensionSchema extends DimensionSchema { private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true; - public static int getDefaultMaxStringLength() + @Nullable + public static Integer getDefaultMaxStringLength() { return BuiltInTypesModule.getMaxStringLength(); } - private final int maxStringLength; + @Nullable + private final Integer maxStringLength; @JsonCreator public static StringDimensionSchema create(String name) @@ -56,7 +58,7 @@ public StringDimensionSchema( ) { super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); - this.maxStringLength = maxStringLength != null && maxStringLength > 0 ? maxStringLength : getDefaultMaxStringLength(); + this.maxStringLength = maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); } public StringDimensionSchema( @@ -65,17 +67,18 @@ public StringDimensionSchema( Boolean createBitmapIndex ) { - this(name, multiValueHandling, createBitmapIndex, getDefaultMaxStringLength()); + this(name, multiValueHandling, createBitmapIndex, null); } public StringDimensionSchema(String name) { - this(name, null, DEFAULT_CREATE_BITMAP_INDEX, getDefaultMaxStringLength()); + this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null); } @JsonProperty - @JsonInclude(JsonInclude.Include.NON_DEFAULT) - public int getMaxStringLength() + @JsonInclude(JsonInclude.Include.NON_NULL) + @Nullable + public Integer getMaxStringLength() { return maxStringLength; } diff --git a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java index e260a4bd8b66..5205c6ba311b 100644 --- a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java +++ b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java @@ -53,7 +53,8 @@ public class BuiltInTypesModule implements DruidModule */ private static DimensionSchema.MultiValueHandling STRING_MV_MODE = DimensionSchema.MultiValueHandling.SORTED_ARRAY; private static IndexSpec DEFAULT_INDEX_SPEC = IndexSpec.builder().build(); - private static int MAX_STRING_LENGTH = 0; + @Nullable + private static Integer MAX_STRING_LENGTH = null; /** * @return the configured string multi value handling mode from the system config if set; otherwise, returns @@ -138,12 +139,13 @@ private static void setMaxStringLengthIfConfigured(@Nullable Integer maxStringLe } @VisibleForTesting - public static void setMaxStringLength(int maxStringLength) + public static void setMaxStringLength(@Nullable Integer maxStringLength) { MAX_STRING_LENGTH = maxStringLength; } - public static int getMaxStringLength() + @Nullable + public static Integer getMaxStringLength() { return MAX_STRING_LENGTH; } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index d2b41ab7a4ba..0d23fe24aa77 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -32,6 +32,7 @@ import org.apache.druid.segment.selector.settable.SettableDimensionValueSelector; import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import javax.annotation.Nullable; import java.io.File; import java.util.Collections; import java.util.Comparator; @@ -104,7 +105,8 @@ private static IndexedInts getRow(ColumnValueSelector s) private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; private final boolean hasSpatialIndexes; - private final int maxStringLength; + @Nullable + private final Integer maxStringLength; public StringDimensionHandler( String dimensionName, @@ -121,7 +123,7 @@ public StringDimensionHandler( MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes, - int maxStringLength + @Nullable Integer maxStringLength ) { this.dimensionName = dimensionName; diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index d41fe6fea980..d3b02e7e0199 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -58,7 +58,8 @@ public class StringDimensionIndexer extends DictionaryEncodedColumnIndexer 0 && value != null && value.length() > maxStringLength) { + if (maxStringLength != null && value != null && value.length() > maxStringLength) { return value.substring(0, maxStringLength); } return value; diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java index 3354ac8b82a1..60b1bd7ecc5b 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java @@ -59,6 +59,6 @@ public void testDeserializeFromJson() throws JsonProcessingException + "}"; final StringDimensionSchema schema = (StringDimensionSchema) jsonMapper.readValue(json, DimensionSchema.class); Assert.assertEquals(new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false), schema); - Assert.assertEquals(200, schema.getMaxStringLength()); + Assert.assertEquals(Integer.valueOf(200), schema.getMaxStringLength()); } } diff --git a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java index 189a8a2bdf38..4fdcad50e87c 100644 --- a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java +++ b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java @@ -60,7 +60,7 @@ public static void setup() public void teardownEach() { BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build()); - BuiltInTypesModule.setMaxStringLength(0); + BuiltInTypesModule.setMaxStringLength(null); } @AfterClass @@ -75,7 +75,7 @@ public static void teardown() ); } BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build()); - BuiltInTypesModule.setMaxStringLength(0); + BuiltInTypesModule.setMaxStringLength(null); } @Test @@ -98,7 +98,7 @@ public void testDefaults() BuiltInTypesModule.getStringMultiValueHandlingMode() ); - Assertions.assertEquals(0, BuiltInTypesModule.getMaxStringLength()); + Assertions.assertNull(BuiltInTypesModule.getMaxStringLength()); } @Test From 5e143152e87ac53650696322922295a22dcbaef8 Mon Sep 17 00:00:00 2001 From: Jay Kanakiya Date: Mon, 23 Mar 2026 11:50:46 -0700 Subject: [PATCH 2/3] address comments --- docs/configuration/index.md | 2 +- docs/ingestion/ingestion-spec.md | 2 +- .../druid/data/input/impl/StringDimensionSchema.java | 11 ++++++++++- .../druid/segment/DefaultColumnFormatConfig.java | 2 +- .../apache/druid/segment/StringDimensionIndexer.java | 3 ++- .../data/input/impl/StringDimensionSchemaTest.java | 10 ++++++++++ 6 files changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/configuration/index.md b/docs/configuration/index.md index ee2d7a333c03..c31cf0747cba 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1424,7 +1424,7 @@ Additional Peon configs include: |`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to store empty columns during ingestion. When set to true, Druid stores every column specified in the [`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use the string-based schemaless ingestion and don't specify any dimensions to ingest, you must also set [`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for Druid to store empty columns.

If you set `storeEmptyColumns` to false, Druid SQL queries referencing empty columns will fail. If you intend to leave `storeEmptyColumns` disabled, you should either ingest placeholder data for empty columns or else not query on empty columns.

You can overwrite this configuration by setting `storeEmptyColumns` in the [task context](../ingestion/tasks.md#context-parameters).|true| |`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task to be used to store temporary files on disk. This config is generally intended for internal usage. Attempts to set it are very likely to be overwritten by the TaskRunner that executes the task, so be sure of what you expect to happen before directly adjusting this configuration parameter. The config is documented here primarily to provide an understanding of what it means if/when someone sees that it has been set. A value of -1 disables this limit. |-1| |`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0| -|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects).|`null` (no truncation)| +|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects). Value must be >= 0.|`null` (no truncation)| If the Peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations: diff --git a/docs/ingestion/ingestion-spec.md b/docs/ingestion/ingestion-spec.md index c6f0b1269275..b2a0e41f48d7 100644 --- a/docs/ingestion/ingestion-spec.md +++ b/docs/ingestion/ingestion-spec.md @@ -243,7 +243,7 @@ Dimension objects can have the following components: | name | The name of the dimension. This will be used as the field name to read from input records, as well as the column name stored in generated segments.

Note that you can use a [`transformSpec`](#transformspec) if you want to rename columns during ingestion time. | none (required) | | createBitmapIndex | For `string` typed dimensions, whether or not bitmap indexes should be created for the column in generated segments. Creating a bitmap index requires more storage, but speeds up certain kinds of filtering (especially equality and prefix filtering). Only supported for `string` typed dimensions. | `true` | | multiValueHandling | For `string` typed dimensions, specifies the type of handling for [multi-value fields](../querying/multi-value-dimensions.md). Possible values are `array` (ingest string arrays as-is), `sorted_array` (sort string arrays during ingestion), and `sorted_set` (sort and de-duplicate string arrays during ingestion). This parameter is ignored for types other than `string`. | `sorted_array` | -| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. | `null` (no truncation) | +| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. Value must be >= 0. | `null` (no truncation) | #### Inclusions and exclusions diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index 8562d0ee0d61..9be7ffddd2e7 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -40,6 +40,15 @@ public static Integer getDefaultMaxStringLength() return BuiltInTypesModule.getMaxStringLength(); } + @Nullable + private static Integer validateMaxStringLength(@Nullable Integer maxStringLength) + { + if (maxStringLength != null && maxStringLength < 0) { + throw new IllegalArgumentException("maxStringLength must be >= 0, got " + maxStringLength); + } + return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); + } + @Nullable private final Integer maxStringLength; @@ -58,7 +67,7 @@ public StringDimensionSchema( ) { super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); - this.maxStringLength = maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); + this.maxStringLength = validateMaxStringLength(maxStringLength); } public StringDimensionSchema( diff --git a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java index 19b875b5f6cb..fcc1e223afb9 100644 --- a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java @@ -71,7 +71,7 @@ private static String validateMultiValueHandlingMode( @Nullable private static Integer validateMaxStringLength(@Nullable Integer maxStringLength) { - if (maxStringLength != null && maxStringLength <= 0) { + if (maxStringLength != null && maxStringLength < 0) { throw DruidException.forPersona(DruidException.Persona.OPERATOR) .ofCategory(DruidException.Category.INVALID_INPUT) .build( diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index d3b02e7e0199..6cfd2347d378 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -86,8 +86,9 @@ public StringDimensionIndexer( } /** - * Truncates the value to {@link #maxStringLength} characters if configured, otherwise returns it as-is. + * Truncates the value to the first {@link #maxStringLength} characters if configured, otherwise returns it as-is. */ + @Nullable private String truncateIfNeeded(String value) { if (maxStringLength != null && value != null && value.length() > maxStringLength) { diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java index 60b1bd7ecc5b..6a74901ac5dc 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java @@ -61,4 +61,14 @@ public void testDeserializeFromJson() throws JsonProcessingException Assert.assertEquals(new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false), schema); Assert.assertEquals(Integer.valueOf(200), schema.getMaxStringLength()); } + + @Test + public void testInvalidMaxStringLength() + { + final Exception exception = Assert.assertThrows( + IllegalArgumentException.class, + () -> new StringDimensionSchema("dim", null, true, -1) + ); + Assert.assertTrue(exception.getMessage().contains("maxStringLength must be >= 0")); + } } From 72a9fb14185d95eea07c6336abe27c1eb1357a62 Mon Sep 17 00:00:00 2001 From: Jay Kanakiya Date: Mon, 23 Mar 2026 12:22:31 -0700 Subject: [PATCH 3/3] use Druid exception and update error texts --- .../druid/data/input/impl/StringDimensionSchema.java | 9 ++++++--- .../apache/druid/segment/DefaultColumnFormatConfig.java | 2 +- .../org/apache/druid/segment/StringDimensionIndexer.java | 2 +- .../druid/data/input/impl/StringDimensionSchemaTest.java | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index 9be7ffddd2e7..018d9ca5c35e 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.error.DruidException; import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.segment.DimensionHandler; import org.apache.druid.segment.StringDimensionHandler; @@ -41,10 +42,12 @@ public static Integer getDefaultMaxStringLength() } @Nullable - private static Integer validateMaxStringLength(@Nullable Integer maxStringLength) + private static Integer validateMaxStringLength(String name, @Nullable Integer maxStringLength) { if (maxStringLength != null && maxStringLength < 0) { - throw new IllegalArgumentException("maxStringLength must be >= 0, got " + maxStringLength); + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("maxStringLength for column [%s] must be >= 0, got [%s]", name, maxStringLength); } return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength(); } @@ -67,7 +70,7 @@ public StringDimensionSchema( ) { super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); - this.maxStringLength = validateMaxStringLength(maxStringLength); + this.maxStringLength = validateMaxStringLength(name, maxStringLength); } public StringDimensionSchema( diff --git a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java index fcc1e223afb9..56118b02686d 100644 --- a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java @@ -76,7 +76,7 @@ private static Integer validateMaxStringLength(@Nullable Integer maxStringLength .ofCategory(DruidException.Category.INVALID_INPUT) .build( "Invalid value[%s] specified for 'druid.indexing.formats.maxStringLength'." - + " Value must be a positive integer.", + + " Value must be a non-negative integer.", maxStringLength ); } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index 6cfd2347d378..88f60ee8042d 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -89,7 +89,7 @@ public StringDimensionIndexer( * Truncates the value to the first {@link #maxStringLength} characters if configured, otherwise returns it as-is. */ @Nullable - private String truncateIfNeeded(String value) + private String truncateIfNeeded(@Nullable String value) { if (maxStringLength != null && value != null && value.length() > maxStringLength) { return value.substring(0, maxStringLength); diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java index 6a74901ac5dc..dbee07bddb81 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.AnnotationIntrospector; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import org.apache.druid.error.DruidException; import org.apache.druid.guice.DruidSecondaryModule; import org.apache.druid.guice.GuiceAnnotationIntrospector; import org.junit.Assert; @@ -66,9 +67,9 @@ public void testDeserializeFromJson() throws JsonProcessingException public void testInvalidMaxStringLength() { final Exception exception = Assert.assertThrows( - IllegalArgumentException.class, + DruidException.class, () -> new StringDimensionSchema("dim", null, true, -1) ); - Assert.assertTrue(exception.getMessage().contains("maxStringLength must be >= 0")); + Assert.assertTrue(exception.getMessage().contains("maxStringLength for column [dim] must be >= 0")); } }