Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ Additional Peon configs include:
|`druid.indexer.task.storeEmptyColumns`|Boolean value for whether or not to store empty columns during ingestion. When set to true, Druid stores every column specified in the [`dimensionsSpec`](../ingestion/ingestion-spec.md#dimensionsspec). If you use the string-based schemaless ingestion and don't specify any dimensions to ingest, you must also set [`includeAllDimensions`](../ingestion/ingestion-spec.md#dimensionsspec) for Druid to store empty columns.<br/><br/>If you set `storeEmptyColumns` to false, Druid SQL queries referencing empty columns will fail. If you intend to leave `storeEmptyColumns` disabled, you should either ingest placeholder data for empty columns or else not query on empty columns.<br/><br/>You can overwrite this configuration by setting `storeEmptyColumns` in the [task context](../ingestion/tasks.md#context-parameters).|true|
|`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task to be used to store temporary files on disk. This config is generally intended for internal usage. Attempts to set it are very likely to be overwritten by the TaskRunner that executes the task, so be sure of what you expect to happen before directly adjusting this configuration parameter. The config is documented here primarily to provide an understanding of what it means if/when someone sees that it has been set. A value of -1 disables this limit. |-1|
|`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0|
|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects).|0 (no truncation)|
|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects). Value must be >= 0.|`null` (no truncation)|

If the Peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations:

Expand Down
2 changes: 1 addition & 1 deletion docs/ingestion/ingestion-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ Dimension objects can have the following components:
| name | The name of the dimension. This will be used as the field name to read from input records, as well as the column name stored in generated segments.<br /><br />Note that you can use a [`transformSpec`](#transformspec) if you want to rename columns during ingestion time. | none (required) |
| createBitmapIndex | For `string` typed dimensions, whether or not bitmap indexes should be created for the column in generated segments. Creating a bitmap index requires more storage, but speeds up certain kinds of filtering (especially equality and prefix filtering). Only supported for `string` typed dimensions. | `true` |
| multiValueHandling | For `string` typed dimensions, specifies the type of handling for [multi-value fields](../querying/multi-value-dimensions.md). Possible values are `array` (ingest string arrays as-is), `sorted_array` (sort string arrays during ingestion), and `sorted_set` (sort and de-duplicate string arrays during ingestion). This parameter is ignored for types other than `string`. | `sorted_array` |
| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. | `0` (no truncation) |
| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. Value must be >= 0. | `null` (no truncation) |

#### Inclusions and exclusions

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.error.DruidException;
import org.apache.druid.guice.BuiltInTypesModule;
import org.apache.druid.segment.DimensionHandler;
import org.apache.druid.segment.StringDimensionHandler;
Expand All @@ -34,12 +35,25 @@ public class StringDimensionSchema extends DimensionSchema
{
private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true;

public static int getDefaultMaxStringLength()
@Nullable
public static Integer getDefaultMaxStringLength()
{
return BuiltInTypesModule.getMaxStringLength();
}

private final int maxStringLength;
@Nullable
private static Integer validateMaxStringLength(String name, @Nullable Integer maxStringLength)
{
if (maxStringLength != null && maxStringLength < 0) {
throw DruidException.forPersona(DruidException.Persona.USER)
.ofCategory(DruidException.Category.INVALID_INPUT)
.build("maxStringLength for column [%s] must be >= 0, got [%s]", name, maxStringLength);
}
return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength();
}

@Nullable
private final Integer maxStringLength;

@JsonCreator
public static StringDimensionSchema create(String name)
Expand All @@ -56,7 +70,7 @@ public StringDimensionSchema(
)
{
super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex);
this.maxStringLength = maxStringLength != null && maxStringLength > 0 ? maxStringLength : getDefaultMaxStringLength();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this validation be updated to allow 0 values now: if (maxStringLength != null && maxStringLength < 0)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated it to allow 0.

this.maxStringLength = validateMaxStringLength(name, maxStringLength);
}

public StringDimensionSchema(
Expand All @@ -65,17 +79,18 @@ public StringDimensionSchema(
Boolean createBitmapIndex
)
{
this(name, multiValueHandling, createBitmapIndex, getDefaultMaxStringLength());
this(name, multiValueHandling, createBitmapIndex, null);
}

public StringDimensionSchema(String name)
{
this(name, null, DEFAULT_CREATE_BITMAP_INDEX, getDefaultMaxStringLength());
this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null);
}

@JsonProperty
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public int getMaxStringLength()
@JsonInclude(JsonInclude.Include.NON_NULL)
@Nullable
public Integer getMaxStringLength()
{
return maxStringLength;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ public class BuiltInTypesModule implements DruidModule
*/
private static DimensionSchema.MultiValueHandling STRING_MV_MODE = DimensionSchema.MultiValueHandling.SORTED_ARRAY;
private static IndexSpec DEFAULT_INDEX_SPEC = IndexSpec.builder().build();
private static int MAX_STRING_LENGTH = 0;
@Nullable
private static Integer MAX_STRING_LENGTH = null;

/**
* @return the configured string multi value handling mode from the system config if set; otherwise, returns
Expand Down Expand Up @@ -138,12 +139,13 @@ private static void setMaxStringLengthIfConfigured(@Nullable Integer maxStringLe
}

@VisibleForTesting
public static void setMaxStringLength(int maxStringLength)
public static void setMaxStringLength(@Nullable Integer maxStringLength)
{
MAX_STRING_LENGTH = maxStringLength;
}

public static int getMaxStringLength()
@Nullable
public static Integer getMaxStringLength()
{
return MAX_STRING_LENGTH;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ private static String validateMultiValueHandlingMode(
@Nullable
private static Integer validateMaxStringLength(@Nullable Integer maxStringLength)
{
if (maxStringLength != null && maxStringLength <= 0) {
if (maxStringLength != null && maxStringLength < 0) {
throw DruidException.forPersona(DruidException.Persona.OPERATOR)
.ofCategory(DruidException.Category.INVALID_INPUT)
.build(
"Invalid value[%s] specified for 'druid.indexing.formats.maxStringLength'."
+ " Value must be a positive integer.",
+ " Value must be a non-negative integer.",
maxStringLength
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.druid.segment.selector.settable.SettableDimensionValueSelector;
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;

import javax.annotation.Nullable;
import java.io.File;
import java.util.Collections;
import java.util.Comparator;
Expand Down Expand Up @@ -104,7 +105,8 @@ private static IndexedInts getRow(ColumnValueSelector s)
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
private final int maxStringLength;
@Nullable
private final Integer maxStringLength;

public StringDimensionHandler(
String dimensionName,
Expand All @@ -121,7 +123,7 @@ public StringDimensionHandler(
MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes,
int maxStringLength
@Nullable Integer maxStringLength
)
{
this.dimensionName = dimensionName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ public class StringDimensionIndexer extends DictionaryEncodedColumnIndexer<int[]
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
private final int maxStringLength;
@Nullable
private final Integer maxStringLength;
private volatile boolean hasMultipleValues = false;

public StringDimensionIndexer(
Expand All @@ -74,7 +75,7 @@ public StringDimensionIndexer(
@Nullable MultiValueHandling multiValueHandling,
boolean hasBitmapIndexes,
boolean hasSpatialIndexes,
int maxStringLength
@Nullable Integer maxStringLength
)
{
super(new StringDimensionDictionary());
Expand All @@ -84,9 +85,13 @@ public StringDimensionIndexer(
this.maxStringLength = maxStringLength;
}

private String truncateIfNeeded(String value)
/**
* Truncates the value to the first {@link #maxStringLength} characters if configured, otherwise returns it as-is.
*/
@Nullable
private String truncateIfNeeded(@Nullable String value)
{
if (maxStringLength > 0 && value != null && value.length() > maxStringLength) {
if (maxStringLength != null && value != null && value.length() > maxStringLength) {
return value.substring(0, maxStringLength);
}
return value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.fasterxml.jackson.databind.AnnotationIntrospector;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import org.apache.druid.error.DruidException;
import org.apache.druid.guice.DruidSecondaryModule;
import org.apache.druid.guice.GuiceAnnotationIntrospector;
import org.junit.Assert;
Expand Down Expand Up @@ -59,6 +60,16 @@ public void testDeserializeFromJson() throws JsonProcessingException
+ "}";
final StringDimensionSchema schema = (StringDimensionSchema) jsonMapper.readValue(json, DimensionSchema.class);
Assert.assertEquals(new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false), schema);
Assert.assertEquals(200, schema.getMaxStringLength());
Assert.assertEquals(Integer.valueOf(200), schema.getMaxStringLength());
}

@Test
public void testInvalidMaxStringLength()
{
final Exception exception = Assert.assertThrows(
DruidException.class,
() -> new StringDimensionSchema("dim", null, true, -1)
);
Assert.assertTrue(exception.getMessage().contains("maxStringLength for column [dim] must be >= 0"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public static void setup()
public void teardownEach()
{
BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build());
BuiltInTypesModule.setMaxStringLength(0);
BuiltInTypesModule.setMaxStringLength(null);
}

@AfterClass
Expand All @@ -75,7 +75,7 @@ public static void teardown()
);
}
BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build());
BuiltInTypesModule.setMaxStringLength(0);
BuiltInTypesModule.setMaxStringLength(null);
}

@Test
Expand All @@ -98,7 +98,7 @@ public void testDefaults()
BuiltInTypesModule.getStringMultiValueHandlingMode()
);

Assertions.assertEquals(0, BuiltInTypesModule.getMaxStringLength());
Assertions.assertNull(BuiltInTypesModule.getMaxStringLength());
}

@Test
Expand Down
Loading