From 41a0efe0c8f210674d54122f00c0fab9c284d9ac Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sat, 13 Apr 2024 17:31:58 +0530 Subject: [PATCH 1/4] Update default value of useMaxMemoryEstimates for Hadoop jobs --- .../MaterializedViewSupervisorSpec.java | 1 + .../druid/indexer/HadoopTuningConfig.java | 74 +++++++++++-------- .../druid/indexer/IndexGeneratorJob.java | 1 + .../indexer/BatchDeltaIngestionTest.java | 2 +- .../DetermineHashedPartitionsJobTest.java | 2 +- .../indexer/DeterminePartitionsJobTest.java | 2 +- .../DetermineRangePartitionsJobTest.java | 2 +- .../indexer/HadoopDruidIndexerConfigTest.java | 2 +- .../druid/indexer/HadoopTuningConfigTest.java | 2 +- .../druid/indexer/IndexGeneratorJobTest.java | 2 +- .../apache/druid/indexer/JobHelperTest.java | 2 +- .../indexer/path/GranularityPathSpecTest.java | 2 +- .../incremental/AppendableIndexBuilder.java | 2 +- .../incremental/AppendableIndexSpec.java | 10 ++- .../IncrementalIndexRowSizeTest.java | 1 - 15 files changed, 62 insertions(+), 45 deletions(-) diff --git a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java index eec08ff133df..7d59cb63ea5b 100644 --- a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java +++ b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorSpec.java @@ -184,6 +184,7 @@ public HadoopIndexTask createTask(Interval interval, String version, List= 0, "Not support persistBackgroundCount < 0"); this.useExplicitVersion = useExplicitVersion; - this.allowedHadoopPrefix = allowedHadoopPrefix == null ? ImmutableList.of() : allowedHadoopPrefix; - - this.ignoreInvalidRows = ignoreInvalidRows == null ? false : ignoreInvalidRows; - if (maxParseExceptions != null) { - this.maxParseExceptions = maxParseExceptions; - } else { - if (!this.ignoreInvalidRows) { - this.maxParseExceptions = 0; - } else { - this.maxParseExceptions = TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS; - } - } - this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; + this.allowedHadoopPrefix = Configs.valueOrDefault(allowedHadoopPrefix, Collections.emptyList()); - this.useYarnRMJobStatusFallback = useYarnRMJobStatusFallback == null ? true : useYarnRMJobStatusFallback; + this.ignoreInvalidRows = Configs.valueOrDefault(ignoreInvalidRows, false); + this.maxParseExceptions = Configs.valueOrDefault( + maxParseExceptions, + this.ignoreInvalidRows ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : 0 + ); + this.logParseExceptions = Configs.valueOrDefault(logParseExceptions, TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS); + this.useYarnRMJobStatusFallback = Configs.valueOrDefault(useYarnRMJobStatusFallback, true); if (awaitSegmentAvailabilityTimeoutMillis == null || awaitSegmentAvailabilityTimeoutMillis < 0) { this.awaitSegmentAvailabilityTimeoutMillis = DEFAULT_AWAIT_SEGMENT_AVAILABILITY_TIMEOUT_MILLIS; @@ -263,6 +266,12 @@ public long getMaxBytesInMemory() return maxBytesInMemory; } + @JsonProperty + public boolean isUseMaxMemoryEstimates() + { + return useMaxMemoryEstimates; + } + @JsonProperty public boolean isLeaveIntermediate() { @@ -372,6 +381,7 @@ public HadoopTuningConfig withWorkingPath(String path) appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, + false, leaveIntermediate, cleanupOnFailure, overwriteFiles, @@ -404,6 +414,7 @@ public HadoopTuningConfig withVersion(String ver) appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, + false, leaveIntermediate, cleanupOnFailure, overwriteFiles, @@ -436,6 +447,7 @@ public HadoopTuningConfig withShardSpecs(Map> specs appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, + false, leaveIntermediate, cleanupOnFailure, overwriteFiles, diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java index 967b55665c85..c60874e7111d 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/IndexGeneratorJob.java @@ -310,6 +310,7 @@ private static IncrementalIndex makeIncrementalIndex( .setIndexSchema(indexSchema) .setMaxRowCount(tuningConfig.getMaxRowsInMemory()) .setMaxBytesInMemory(tuningConfig.getMaxBytesInMemoryOrDefault()) + .setUseMaxMemoryEstimates(tuningConfig.isUseMaxMemoryEstimates()) .build(); if (oldDimOrder != null && !indexSchema.getDimensionsSpec().hasCustomDimensions()) { diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java index ce725230b963..831e26fa4c4a 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java @@ -474,7 +474,7 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index fb1ff1520a28..1e8e381cf31e 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -216,7 +216,7 @@ public DetermineHashedPartitionsJobTest( null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java index 5f338936a409..ee2b5140b23f 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java @@ -327,7 +327,7 @@ public DeterminePartitionsJobTest( null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java index f10a898d1260..e79d066ab55c 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineRangePartitionsJobTest.java @@ -382,7 +382,7 @@ public DetermineRangePartitionsJobTest( null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java index f69ad04915c5..162b28ac8365 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -265,7 +265,7 @@ HadoopIngestionSpec build() null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java index 97c764dd10b1..37564a730825 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java @@ -48,7 +48,7 @@ public void testSerde() throws Exception null, 100, null, - true, + false, true, true, true, true, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java index 2b8fc6749e31..2581e3e9d2a6 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java @@ -532,7 +532,7 @@ public void setUp() throws Exception null, maxRowsInMemory, maxBytesInMemory, - true, + false, true, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java index 3c1b80166c51..a0cf23e0d321 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java @@ -164,7 +164,7 @@ public void setup() throws Exception null, null, null, - false, + false, false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java index d1aee3d66992..c5bb41a086d5 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java @@ -64,7 +64,7 @@ public class GranularityPathSpecTest null, null, null, - false, + false, false, false, false, false, diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexBuilder.java b/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexBuilder.java index 777c4cc1fd7a..fcfca99cdfe8 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexBuilder.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexBuilder.java @@ -37,7 +37,7 @@ public abstract class AppendableIndexBuilder // DruidInputSource since that is the only case where we can have existing metrics. // This is currently only use by auto compaction and should not be use for anything else. protected boolean preserveExistingMetrics = false; - protected boolean useMaxMemoryEstimates = true; + protected boolean useMaxMemoryEstimates = false; protected final Logger log = new Logger(this.getClass()); diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexSpec.java b/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexSpec.java index 67cdabdf5673..44b56aaa13ad 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexSpec.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/AppendableIndexSpec.java @@ -22,14 +22,18 @@ import org.apache.druid.guice.annotations.UnstableApi; /** - * AppendableIndexSpec describes the in-memory indexing method for data ingestion. + * Describes the in-memory indexing method for data ingestion. */ @UnstableApi public interface AppendableIndexSpec { - // Returns a builder of the appendable index. + /** + * Creates a new builder of the appendable index. + */ AppendableIndexBuilder builder(); - // Returns the default max bytes in memory for this index. + /** + * Returns the default max bytes in memory for this index. + */ long getDefaultMaxBytesInMemory(); } diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java index 27ccf182e096..ea448fdf8edf 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java @@ -54,7 +54,6 @@ public IncrementalIndexRowSizeTest(String indexType) throws JsonProcessingExcept .setSimpleTestingIndexSchema(new CountAggregatorFactory("cnt")) .setMaxRowCount(10_000) .setMaxBytesInMemory(1_000) - .setUseMaxMemoryEstimates(true) .build()) ); } From 15be1e20dbad9bb96ce49fe694ab29c4ba491846 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sat, 13 Apr 2024 17:39:59 +0530 Subject: [PATCH 2/4] Fix formatting --- .../java/org/apache/druid/indexer/BatchDeltaIngestionTest.java | 3 ++- .../apache/druid/indexer/DetermineHashedPartitionsJobTest.java | 3 ++- .../org/apache/druid/indexer/DeterminePartitionsJobTest.java | 3 ++- .../org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java | 3 ++- .../java/org/apache/druid/indexer/HadoopTuningConfigTest.java | 3 ++- .../java/org/apache/druid/indexer/IndexGeneratorJobTest.java | 3 ++- .../src/test/java/org/apache/druid/indexer/JobHelperTest.java | 3 ++- .../org/apache/druid/indexer/path/GranularityPathSpecTest.java | 3 ++- 8 files changed, 16 insertions(+), 8 deletions(-) diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java index 831e26fa4c4a..ed8b8c0bb093 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/BatchDeltaIngestionTest.java @@ -474,7 +474,8 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, null, null, - false, false, + false, + false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java index 1e8e381cf31e..24a8ee0ef7eb 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -216,7 +216,8 @@ public DetermineHashedPartitionsJobTest( null, null, null, - false, false, + false, + false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java index ee2b5140b23f..a3c98f29565b 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/DeterminePartitionsJobTest.java @@ -327,7 +327,8 @@ public DeterminePartitionsJobTest( null, null, null, - false, false, + false, + false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java index 162b28ac8365..8aead05d625b 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -265,7 +265,8 @@ HadoopIngestionSpec build() null, null, null, - false, false, + false, + false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java index 37564a730825..5857f92172f2 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/HadoopTuningConfigTest.java @@ -48,7 +48,8 @@ public void testSerde() throws Exception null, 100, null, - false, true, + false, + true, true, true, true, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java index 2581e3e9d2a6..e14ade454f4c 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/IndexGeneratorJobTest.java @@ -532,7 +532,8 @@ public void setUp() throws Exception null, maxRowsInMemory, maxBytesInMemory, - false, true, + false, + true, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java index a0cf23e0d321..7069e9a78de3 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/JobHelperTest.java @@ -164,7 +164,8 @@ public void setup() throws Exception null, null, null, - false, false, + false, + false, false, false, false, diff --git a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java index c5bb41a086d5..8af77ca0e4fd 100644 --- a/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java +++ b/indexing-hadoop/src/test/java/org/apache/druid/indexer/path/GranularityPathSpecTest.java @@ -64,7 +64,8 @@ public class GranularityPathSpecTest null, null, null, - false, false, + false, + false, false, false, false, From d77eb846893a1c3f319dc8a3fab46c4cbc1cc6f7 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sun, 14 Apr 2024 23:00:37 +0530 Subject: [PATCH 3/4] Revert test change --- .../druid/segment/incremental/IncrementalIndexRowSizeTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java index ea448fdf8edf..27ccf182e096 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexRowSizeTest.java @@ -54,6 +54,7 @@ public IncrementalIndexRowSizeTest(String indexType) throws JsonProcessingExcept .setSimpleTestingIndexSchema(new CountAggregatorFactory("cnt")) .setMaxRowCount(10_000) .setMaxBytesInMemory(1_000) + .setUseMaxMemoryEstimates(true) .build()) ); } From 6e7f12c5ec4b132b44be01af4f9e55fb033ac023 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Thu, 25 Apr 2024 13:23:51 +0530 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Zoltan Haindrich --- .../java/org/apache/druid/indexer/HadoopTuningConfig.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java b/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java index 3960ed60ca3d..9da6ead38cf1 100644 --- a/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java +++ b/indexing-hadoop/src/main/java/org/apache/druid/indexer/HadoopTuningConfig.java @@ -381,7 +381,7 @@ public HadoopTuningConfig withWorkingPath(String path) appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, - false, + useMaxMemoryEstimates, leaveIntermediate, cleanupOnFailure, overwriteFiles, @@ -414,7 +414,7 @@ public HadoopTuningConfig withVersion(String ver) appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, - false, + useMaxMemoryEstimates, leaveIntermediate, cleanupOnFailure, overwriteFiles, @@ -447,7 +447,7 @@ public HadoopTuningConfig withShardSpecs(Map> specs appendableIndexSpec, maxRowsInMemory, maxBytesInMemory, - false, + useMaxMemoryEstimates, leaveIntermediate, cleanupOnFailure, overwriteFiles,