From 6ed67be6a749ce0ce03c4a90038d6fe5cb38d771 Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Mon, 26 Apr 2021 21:26:43 -0700 Subject: [PATCH 1/3] Add feature to automatically remove rules based on retention period --- docs/configuration/index.md | 3 + docs/operations/metrics.md | 1 + .../druid/metadata/MetadataRuleManager.java | 8 + .../metadata/SQLMetadataRuleManager.java | 28 ++++ .../coordinator/DruidCoordinatorConfig.java | 8 + .../server/coordinator/duty/KillRules.java | 76 +++++++++ .../metadata/SQLMetadataRuleManagerTest.java | 152 ++++++++++++++++- .../CuratorDruidCoordinatorTest.java | 2 + .../coordinator/DruidCoordinatorTest.java | 2 + .../coordinator/HttpLoadQueuePeonTest.java | 2 + .../server/coordinator/LoadQueuePeonTest.java | 6 + .../coordinator/LoadQueuePeonTester.java | 2 + .../TestDruidCoordinatorConfig.java | 18 ++ .../coordinator/duty/KillAuditLogTest.java | 8 + .../coordinator/duty/KillRulesTest.java | 154 ++++++++++++++++++ .../duty/KillUnusedSegmentsTest.java | 2 + .../org/apache/druid/cli/CliCoordinator.java | 6 + 17 files changed, 477 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java create mode 100644 server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java diff --git a/docs/configuration/index.md b/docs/configuration/index.md index d49bf75675fa..f11d70e0b597 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -750,6 +750,9 @@ These Coordinator static configurations can be defined in the `coordinator/runti |`druid.coordinator.kill.audit.on`| Boolean value for whether to enable automatic deletion of audit logs. If set to true, Coordinator will periodically remove audit logs from the audit table entries in metadata storage.| No | False| |`druid.coordinator.kill.audit.period`| How often to do automatic deletion of audit logs in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Value must be greater than `druid.coordinator.period.metadataStoreManagementPeriod`. Only applies if `druid.coordinator.kill.audit.on` is set to True.| No| `P1D`| |`druid.coordinator.kill.audit.durationToRetain`| Duration of audit logs to be retained from created time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Only applies if `druid.coordinator.kill.audit.on` is set to True.| Yes if `druid.coordinator.kill.audit.on` is set to True| None| +|`druid.coordinator.kill.rule.on`| Boolean value for whether to enable automatic deletion of rules. If set to true, Coordinator will periodically remove rules of inactive datasource (datasource with no used and unused segments) from the rule table in metadata storage.| No | False| +|`druid.coordinator.kill.rule.period`| How often to do automatic deletion of rules in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Value must be greater than `druid.coordinator.period.metadataStoreManagementPeriod`. Only applies if `druid.coordinator.kill.rule.on` is set to True.| No| `P1D`| +|`druid.coordinator.kill.rule.durationToRetain`| Duration of rules to be retained from created time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) duration format. Only applies if `druid.coordinator.kill.rule.on` is set to True.| Yes if `druid.coordinator.kill.rule.on` is set to True| None| ##### Segment Management |Property|Possible Values|Description|Default| diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 5a2d5fb76fd7..9770ec3c52aa 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -257,6 +257,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina |`coordinator/time`|Approximate Coordinator duty runtime in milliseconds. The duty dimension is the string alias of the Duty that is being run.|duty.|Varies.| |`coordinator/global/time`|Approximate runtime of a full coordination cycle in milliseconds. The `dutyGroup` dimension indicates what type of coordination this run was. i.e. Historical Management vs Indexing|`dutyGroup`|Varies.| |`metadata/kill/audit/count`|Total number of audit logs automatically deleted from metadata store audit table per each Coordinator kill audit duty run. This metric can help adjust `druid.coordinator.kill.audit.durationToRetain` configuration based on if more or less audit logs need to be deleted per cycle. Note that this metric is only emitted when `druid.coordinator.kill.audit.on` is set to true.| |Varies.| +|`metadata/kill/rule/count`|Total number of rules automatically deleted from metadata store rule table per each Coordinator kill rule duty run. This metric can help adjust `druid.coordinator.kill.rule.durationToRetain` configuration based on if more or less rules need to be deleted per cycle. Note that this metric is only emitted when `druid.coordinator.kill.rule.on` is set to true.| |Varies.| If `emitBalancingStats` is set to `true` in the Coordinator [dynamic configuration](../configuration/index.md#dynamic-configuration), then [log entries](../configuration/logging.md) for class diff --git a/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java b/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java index 0c8019754356..858a0947719c 100644 --- a/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java +++ b/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java @@ -42,4 +42,12 @@ public interface MetadataRuleManager List getRulesWithDefault(String dataSource); boolean overrideRule(String dataSource, List rulesConfig, AuditInfo auditInfo); + + /** + * Remove rules for non-existence datasource (datasource with no segment) created older than the given timestamp. + * + * @param timestamp timestamp in milliseconds + * @return number of rules removed + */ + int removeRulesOlderThan(long timestamp); } diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java index 48aaff45b06d..44195bb79b92 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java @@ -48,6 +48,7 @@ import org.skife.jdbi.v2.StatementContext; import org.skife.jdbi.v2.TransactionCallback; import org.skife.jdbi.v2.TransactionStatus; +import org.skife.jdbi.v2.Update; import org.skife.jdbi.v2.tweak.HandleCallback; import org.skife.jdbi.v2.tweak.ResultSetMapper; @@ -421,8 +422,35 @@ public Void inTransaction(Handle handle, TransactionStatus transactionStatus) th return true; } + @Override + public int removeRulesOlderThan(long timestamp) + { + DateTime dateTime = DateTimes.utc(timestamp); + synchronized (lock) { + return dbi.withHandle( + handle -> { + Update sql = handle.createStatement( + StringUtils.format( + "DELETE FROM %1$s WHERE datasource NOT IN (SELECT DISTINCT datasource from %2$s) and datasource!=:default_rule and version < :date_time", + getRulesTable(), + getSegmentsTable() + ) + ); + return sql.bind("default_rule", config.getDefaultRule()) + .bind("date_time", dateTime.toString()) + .execute(); + } + ); + } + } + private String getRulesTable() { return dbTables.getRulesTable(); } + + private String getSegmentsTable() + { + return dbTables.getSegmentsTable(); + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorConfig.java index 933b974d3ad0..72341cc7b79f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorConfig.java @@ -63,6 +63,14 @@ public abstract class DruidCoordinatorConfig @Default("PT-1s") public abstract Duration getCoordinatorAuditKillDurationToRetain(); + @Config("druid.coordinator.kill.rule.period") + @Default("P1D") + public abstract Duration getCoordinatorRuleKillPeriod(); + + @Config("druid.coordinator.kill.rule.durationToRetain") + @Default("PT-1s") + public abstract Duration getCoordinatorRuleKillDurationToRetain(); + @Config("druid.coordinator.load.timeout") public Duration getLoadTimeoutDelay() { diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java new file mode 100644 index 000000000000..e0c0e0a311ec --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.duty; + +import com.google.common.base.Preconditions; +import com.google.inject.Inject; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; +import org.apache.druid.server.coordinator.DruidCoordinatorConfig; +import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; + +public class KillRules implements CoordinatorDuty +{ + private static final Logger log = new Logger(KillRules.class); + + private final long period; + private final long retainDuration; + private long lastKillTime = 0; + + @Inject + public KillRules( + DruidCoordinatorConfig config + ) + { + this.period = config.getCoordinatorRuleKillPeriod().getMillis(); + Preconditions.checkArgument( + this.period >= config.getCoordinatorMetadataStoreManagementPeriod().getMillis(), + "coordinator rule kill period must be >= druid.coordinator.period.metadataStoreManagementPeriod" + ); + this.retainDuration = config.getCoordinatorRuleKillDurationToRetain().getMillis(); + Preconditions.checkArgument(this.retainDuration >= 0, "coordinator rule kill retainDuration must be >= 0"); + log.debug( + "Rule Kill Task scheduling enabled with period [%s], retainDuration [%s]", + this.period, + this.retainDuration + ); + } + + @Override + public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) + { + if ((lastKillTime + period) < System.currentTimeMillis()) { + lastKillTime = System.currentTimeMillis(); + + long timestamp = System.currentTimeMillis() - retainDuration; + int ruleRemoved = params.getDatabaseRuleManager().removeRulesOlderThan(timestamp); + ServiceEmitter emitter = params.getEmitter(); + emitter.emit( + new ServiceMetricEvent.Builder().build( + "metadata/kill/rule/count", + ruleRemoved + ) + ); + log.info("Finished running KillRules duty. Removed %,d rule", ruleRemoved); + } + return params; + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java index 5641a23c9d03..0671fffb7b64 100644 --- a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java @@ -23,19 +23,25 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.apache.druid.audit.AuditEntry; import org.apache.druid.audit.AuditInfo; import org.apache.druid.audit.AuditManager; import org.apache.druid.client.DruidServer; import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.server.SegmentManager; import org.apache.druid.server.audit.SQLAuditManager; import org.apache.druid.server.audit.SQLAuditManagerConfig; import org.apache.druid.server.coordinator.rules.IntervalLoadRule; import org.apache.druid.server.coordinator.rules.Rule; import org.apache.druid.server.metrics.NoopServiceEmitter; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.partition.NoneShardSpec; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -55,8 +61,9 @@ public class SQLMetadataRuleManagerTest private MetadataStorageTablesConfig tablesConfig; private SQLMetadataRuleManager ruleManager; private AuditManager auditManager; + private SQLMetadataSegmentPublisher publisher; private final ObjectMapper mapper = new DefaultObjectMapper(); - + private final ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); @Before public void setUp() @@ -80,6 +87,12 @@ public void setUp() connector, auditManager ); + connector.createSegmentTable(); + publisher = new SQLMetadataSegmentPublisher( + jsonMapper, + derbyConnectorRule.metadataTablesConfigSupplier().get(), + connector + ); } @Test @@ -201,6 +214,143 @@ public void testFetchAuditEntriesForAllDataSources() throws Exception } } + @Test + public void testRemoveRulesOlderThanWithNonExistenceDatasourceAndOlderThanTimestampShouldDelete() + { + List rules = ImmutableList.of( + new IntervalLoadRule( + Intervals.of("2015-01-01/2015-02-01"), ImmutableMap.of( + DruidServer.DEFAULT_TIER, + DruidServer.DEFAULT_NUM_REPLICANTS + ) + ) + ); + AuditInfo auditInfo = new AuditInfo("test_author", "test_comment", "127.0.0.1"); + ruleManager.overrideRule( + "test_dataSource", + rules, + auditInfo + ); + // Verify that rule was added + ruleManager.poll(); + Map> allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("test_dataSource").size()); + + // Now delete rules + ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + + // Verify that rule was deleted + ruleManager.poll(); + allRules = ruleManager.getAllRules(); + Assert.assertEquals(0, allRules.size()); + } + + @Test + public void testRemoveRulesOlderThanWithNonExistenceDatasourceAndNewerThanTimestampShouldNotDelete() + { + List rules = ImmutableList.of( + new IntervalLoadRule( + Intervals.of("2015-01-01/2015-02-01"), ImmutableMap.of( + DruidServer.DEFAULT_TIER, + DruidServer.DEFAULT_NUM_REPLICANTS + ) + ) + ); + AuditInfo auditInfo = new AuditInfo("test_author", "test_comment", "127.0.0.1"); + ruleManager.overrideRule( + "test_dataSource", + rules, + auditInfo + ); + // Verify that rule was added + ruleManager.poll(); + Map> allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("test_dataSource").size()); + + // This will not delete the rule as the rule was created just now so it will have the created timestamp later than + // the timestamp 2012-01-01T00:00:00Z + ruleManager.removeRulesOlderThan(DateTimes.of("2012-01-01T00:00:00Z").getMillis()); + + // Verify that rule was not deleted + ruleManager.poll(); + allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("test_dataSource").size()); + } + + @Test + public void testRemoveRulesOlderThanWithActiveDatasourceShouldNotDelete() throws Exception + { + List rules = ImmutableList.of( + new IntervalLoadRule( + Intervals.of("2015-01-01/2015-02-01"), ImmutableMap.of( + DruidServer.DEFAULT_TIER, + DruidServer.DEFAULT_NUM_REPLICANTS + ) + ) + ); + AuditInfo auditInfo = new AuditInfo("test_author", "test_comment", "127.0.0.1"); + ruleManager.overrideRule( + "test_dataSource", + rules, + auditInfo + ); + + // Verify that rule was added + ruleManager.poll(); + Map> allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("test_dataSource").size()); + + // Add segment metadata to segment table so that the datasource is considered active + DataSegment dataSegment = new DataSegment( + "test_dataSource", + Intervals.of("2015-01-01/2015-02-01"), + "1", + ImmutableMap.of( + "type", "s3_zip", + "bucket", "test", + "key", "test_dataSource/xxx" + ), + ImmutableList.of("dim1", "dim2", "dim3"), + ImmutableList.of("count", "value"), + NoneShardSpec.instance(), + 1, + 1234L + ); + publisher.publishSegment(dataSegment); + + // This will not delete the rule as the datasource has segment in the segment metadata table + ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + + // Verify that rule was not deleted + ruleManager.poll(); + allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("test_dataSource").size()); + } + + @Test + public void testRemoveRulesOlderThanShouldNotDeleteDefault() + { + // Create the default rule + ruleManager.start(); + // Verify the default rule + ruleManager.poll(); + Map> allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("_default").size()); + // Delete everything + ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + // Verify the default rule was not deleted + ruleManager.poll(); + allRules = ruleManager.getAllRules(); + Assert.assertEquals(1, allRules.size()); + Assert.assertEquals(1, allRules.get("_default").size()); + } + @After public void cleanup() { diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java index a8deb667dd05..402c92be203e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java @@ -174,6 +174,8 @@ public void setUp() throws Exception null, null, null, + null, + null, 10, new Duration("PT0s") ); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java index 3aaef647c418..a1c3d033118d 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java @@ -146,6 +146,8 @@ public void setUp() throws Exception null, null, null, + null, + null, 10, new Duration("PT0s") ); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/HttpLoadQueuePeonTest.java b/server/src/test/java/org/apache/druid/server/coordinator/HttpLoadQueuePeonTest.java index 7b07d7ffa61c..607fb8dc3fcd 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/HttpLoadQueuePeonTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/HttpLoadQueuePeonTest.java @@ -83,6 +83,8 @@ public class HttpLoadQueuePeonTest null, null, null, + null, + null, 10, Duration.ZERO ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTest.java b/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTest.java index 24e68063b6a7..a607423bb9a2 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTest.java @@ -98,6 +98,8 @@ public void testMultipleLoadDropSegments() throws Exception null, null, null, + null, + null, 10, Duration.millis(0) ) @@ -296,6 +298,8 @@ public void testFailAssignForNonTimeoutFailures() throws Exception null, null, null, + null, + null, 10, new Duration("PT1s") ) @@ -351,6 +355,8 @@ public void testFailAssignForLoadDropTimeout() throws Exception null, null, null, + null, + null, 10, new Duration("PT1s") ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTester.java b/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTester.java index 5185452779c3..5540a117d4be 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTester.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/LoadQueuePeonTester.java @@ -47,6 +47,8 @@ public LoadQueuePeonTester() null, null, null, + null, + null, 10, new Duration("PT1s") ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/TestDruidCoordinatorConfig.java b/server/src/test/java/org/apache/druid/server/coordinator/TestDruidCoordinatorConfig.java index 135f8d0557e5..a256fbdb3138 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/TestDruidCoordinatorConfig.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/TestDruidCoordinatorConfig.java @@ -33,6 +33,8 @@ public class TestDruidCoordinatorConfig extends DruidCoordinatorConfig private final Duration coordinatorKillDurationToRetain; private final Duration coordinatorAuditKillPeriod; private final Duration coordinatorAuditKillDurationToRetain; + private final Duration coordinatorRuleKillPeriod; + private final Duration coordinatorRuleKillDurationToRetain; private final Duration getLoadQueuePeonRepeatDelay; private final int coordinatorKillMaxSegments; @@ -46,6 +48,8 @@ public TestDruidCoordinatorConfig( Duration coordinatorKillDurationToRetain, Duration coordinatorAuditKillPeriod, Duration coordinatorAuditKillDurationToRetain, + Duration coordinatorRuleKillPeriod, + Duration coordinatorRuleKillDurationToRetain, int coordinatorKillMaxSegments, Duration getLoadQueuePeonRepeatDelay ) @@ -59,6 +63,8 @@ public TestDruidCoordinatorConfig( this.coordinatorKillDurationToRetain = coordinatorKillDurationToRetain; this.coordinatorAuditKillPeriod = coordinatorAuditKillPeriod; this.coordinatorAuditKillDurationToRetain = coordinatorAuditKillDurationToRetain; + this.coordinatorRuleKillPeriod = coordinatorRuleKillPeriod; + this.coordinatorRuleKillDurationToRetain = coordinatorRuleKillDurationToRetain; this.coordinatorKillMaxSegments = coordinatorKillMaxSegments; this.getLoadQueuePeonRepeatDelay = getLoadQueuePeonRepeatDelay; } @@ -111,6 +117,18 @@ public Duration getCoordinatorAuditKillDurationToRetain() return coordinatorAuditKillDurationToRetain; } + @Override + public Duration getCoordinatorRuleKillPeriod() + { + return coordinatorRuleKillPeriod; + } + + @Override + public Duration getCoordinatorRuleKillDurationToRetain() + { + return coordinatorRuleKillDurationToRetain; + } + @Override public int getCoordinatorKillMaxSegments() { diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillAuditLogTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillAuditLogTest.java index b0f273cba47d..51ddc3c4a973 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillAuditLogTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillAuditLogTest.java @@ -64,6 +64,8 @@ public void testRunSkipIfLastRunLessThanPeriod() null, new Duration(Long.MAX_VALUE), new Duration("PT1S"), + null, + null, 10, null ); @@ -86,6 +88,8 @@ public void testRunNotSkipIfLastRunMoreThanPeriod() null, new Duration("PT6S"), new Duration("PT1S"), + null, + null, 10, null ); @@ -108,6 +112,8 @@ public void testConstructorFailIfInvalidPeriod() null, new Duration("PT3S"), new Duration("PT1S"), + null, + null, 10, null ); @@ -129,6 +135,8 @@ public void testConstructorFailIfInvalidRetainDuration() null, new Duration("PT6S"), new Duration("PT-1S"), + null, + null, 10, null ); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java new file mode 100644 index 000000000000..a3cad43056a1 --- /dev/null +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.duty; + +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.emitter.service.ServiceEventBuilder; +import org.apache.druid.metadata.MetadataRuleManager; +import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; +import org.apache.druid.server.coordinator.TestDruidCoordinatorConfig; +import org.joda.time.Duration; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.mockito.ArgumentMatchers; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class KillRulesTest +{ + @Mock + private MetadataRuleManager mockRuleManager; + + @Mock + private DruidCoordinatorRuntimeParams mockDruidCoordinatorRuntimeParams; + + @Mock + private ServiceEmitter mockServiceEmitter; + + @Rule + public ExpectedException exception = ExpectedException.none(); + + private KillRules killRules; + + @Before + public void setup() + { + Mockito.when(mockDruidCoordinatorRuntimeParams.getDatabaseRuleManager()).thenReturn(mockRuleManager); + } + + @Test + public void testRunSkipIfLastRunLessThanPeriod() + { + TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig( + null, + null, + null, + new Duration("PT5S"), + null, + null, + null, + null, + null, + new Duration(Long.MAX_VALUE), + new Duration("PT1S"), + 10, + null + ); + killRules = new KillRules(druidCoordinatorConfig); + killRules.run(mockDruidCoordinatorRuntimeParams); + Mockito.verifyZeroInteractions(mockRuleManager); + } + + @Test + public void testRunNotSkipIfLastRunMoreThanPeriod() + { + Mockito.when(mockDruidCoordinatorRuntimeParams.getEmitter()).thenReturn(mockServiceEmitter); + TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig( + null, + null, + null, + new Duration("PT5S"), + null, + null, + null, + null, + null, + new Duration("PT6S"), + new Duration("PT1S"), + 10, + null + ); + killRules = new KillRules(druidCoordinatorConfig); + killRules.run(mockDruidCoordinatorRuntimeParams); + Mockito.verify(mockRuleManager).removeRulesOlderThan(ArgumentMatchers.anyLong()); + Mockito.verify(mockServiceEmitter).emit(ArgumentMatchers.any(ServiceEventBuilder.class)); + } + + @Test + public void testConstructorFailIfInvalidPeriod() + { + TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig( + null, + null, + null, + new Duration("PT5S"), + null, + null, + null, + null, + null, + new Duration("PT3S"), + new Duration("PT1S"), + 10, + null + ); + exception.expect(IllegalArgumentException.class); + exception.expectMessage("coordinator rule kill period must be >= druid.coordinator.period.metadataStoreManagementPeriod"); + killRules = new KillRules(druidCoordinatorConfig); + } + + @Test + public void testConstructorFailIfInvalidRetainDuration() + { + TestDruidCoordinatorConfig druidCoordinatorConfig = new TestDruidCoordinatorConfig( + null, + null, + null, + new Duration("PT5S"), + null, + null, + null, + null, + null, + new Duration("PT6S"), + new Duration("PT-1S"), + 10, + null + ); + exception.expect(IllegalArgumentException.class); + exception.expectMessage("coordinator rule kill retainDuration must be >= 0"); + killRules = new KillRules(druidCoordinatorConfig); + } +} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index cd766660dfbe..4a1aa0960761 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -111,6 +111,8 @@ private void testFindIntervalForKill(List segmentIntervals, Interval e Duration.parse("PT86400S"), null, null, + null, + null, 1000, Duration.ZERO ) diff --git a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java index 9e3c65b3cf1f..b9ccea3024b9 100644 --- a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java +++ b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java @@ -73,6 +73,7 @@ import org.apache.druid.server.coordinator.LoadQueueTaskMaster; import org.apache.druid.server.coordinator.duty.CoordinatorDuty; import org.apache.druid.server.coordinator.duty.KillAuditLog; +import org.apache.druid.server.coordinator.duty.KillRules; import org.apache.druid.server.coordinator.duty.KillUnusedSegments; import org.apache.druid.server.http.ClusterResource; import org.apache.druid.server.http.CompactionResource; @@ -256,6 +257,11 @@ public void configure(Binder binder) Predicates.equalTo("true"), KillAuditLog.class ); + conditionalMetadataStoreManagementDutyMultibind.addConditionBinding( + "druid.coordinator.kill.rule.on", + Predicates.equalTo("true"), + KillRules.class + ); bindNodeRoleAndAnnouncer( binder, From 9e491fc23fd00b0531768cc169888ffea3b3954d Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Mon, 26 Apr 2021 21:27:41 -0700 Subject: [PATCH 2/3] Add feature to automatically remove rules based on retention period --- .../org/apache/druid/metadata/SQLMetadataRuleManagerTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java index 0671fffb7b64..b4246f99b12a 100644 --- a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java @@ -34,7 +34,6 @@ import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.segment.TestHelper; -import org.apache.druid.server.SegmentManager; import org.apache.druid.server.audit.SQLAuditManager; import org.apache.druid.server.audit.SQLAuditManagerConfig; import org.apache.druid.server.coordinator.rules.IntervalLoadRule; From 834f63b3893f696a39fb811b1ee5f824be9f11af Mon Sep 17 00:00:00 2001 From: Maytas Monsereenusorn Date: Wed, 28 Apr 2021 20:11:05 -0700 Subject: [PATCH 3/3] address comments --- .../org/apache/druid/metadata/MetadataRuleManager.java | 2 +- .../apache/druid/metadata/SQLMetadataRuleManager.java | 9 ++++++++- .../apache/druid/server/coordinator/duty/KillRules.java | 2 +- .../druid/metadata/SQLMetadataRuleManagerTest.java | 8 ++++---- .../druid/server/coordinator/duty/KillRulesTest.java | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java b/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java index 858a0947719c..ea2b6e7461f8 100644 --- a/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java +++ b/server/src/main/java/org/apache/druid/metadata/MetadataRuleManager.java @@ -49,5 +49,5 @@ public interface MetadataRuleManager * @param timestamp timestamp in milliseconds * @return number of rules removed */ - int removeRulesOlderThan(long timestamp); + int removeRulesForEmptyDatasourcesOlderThan(long timestamp); } diff --git a/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java b/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java index 44195bb79b92..4d92ea99c9f5 100644 --- a/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SQLMetadataRuleManager.java @@ -390,6 +390,8 @@ public Void inTransaction(Handle handle, TransactionStatus transactionStatus) th .build(), handle ); + // Note that the method removeRulesForEmptyDatasourcesOlderThan depends on the version field + // to be a timestamp String version = auditTime.toString(); handle.createStatement( StringUtils.format( @@ -423,13 +425,18 @@ public Void inTransaction(Handle handle, TransactionStatus transactionStatus) th } @Override - public int removeRulesOlderThan(long timestamp) + public int removeRulesForEmptyDatasourcesOlderThan(long timestamp) { + // Note that this DELETE SQL depends on the version field to be a timestamp. Hence, this + // method depends on overrideRule method to set version to timestamp when the rule entry is created DateTime dateTime = DateTimes.utc(timestamp); synchronized (lock) { return dbi.withHandle( handle -> { Update sql = handle.createStatement( + // Note that this query could be expensive when the segments table is large + // However, since currently this query is run very infrequent (by default once a day by the KillRules Coordinator duty) + // and the inner query on segment table is a READ (no locking), it is keep this way. StringUtils.format( "DELETE FROM %1$s WHERE datasource NOT IN (SELECT DISTINCT datasource from %2$s) and datasource!=:default_rule and version < :date_time", getRulesTable(), diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java index e0c0e0a311ec..eb4f0186cdd4 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillRules.java @@ -61,7 +61,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) lastKillTime = System.currentTimeMillis(); long timestamp = System.currentTimeMillis() - retainDuration; - int ruleRemoved = params.getDatabaseRuleManager().removeRulesOlderThan(timestamp); + int ruleRemoved = params.getDatabaseRuleManager().removeRulesForEmptyDatasourcesOlderThan(timestamp); ServiceEmitter emitter = params.getEmitter(); emitter.emit( new ServiceMetricEvent.Builder().build( diff --git a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java index b4246f99b12a..289950bc60d7 100644 --- a/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SQLMetadataRuleManagerTest.java @@ -237,7 +237,7 @@ public void testRemoveRulesOlderThanWithNonExistenceDatasourceAndOlderThanTimest Assert.assertEquals(1, allRules.get("test_dataSource").size()); // Now delete rules - ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + ruleManager.removeRulesForEmptyDatasourcesOlderThan(System.currentTimeMillis()); // Verify that rule was deleted ruleManager.poll(); @@ -270,7 +270,7 @@ public void testRemoveRulesOlderThanWithNonExistenceDatasourceAndNewerThanTimest // This will not delete the rule as the rule was created just now so it will have the created timestamp later than // the timestamp 2012-01-01T00:00:00Z - ruleManager.removeRulesOlderThan(DateTimes.of("2012-01-01T00:00:00Z").getMillis()); + ruleManager.removeRulesForEmptyDatasourcesOlderThan(DateTimes.of("2012-01-01T00:00:00Z").getMillis()); // Verify that rule was not deleted ruleManager.poll(); @@ -322,7 +322,7 @@ public void testRemoveRulesOlderThanWithActiveDatasourceShouldNotDelete() throws publisher.publishSegment(dataSegment); // This will not delete the rule as the datasource has segment in the segment metadata table - ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + ruleManager.removeRulesForEmptyDatasourcesOlderThan(System.currentTimeMillis()); // Verify that rule was not deleted ruleManager.poll(); @@ -342,7 +342,7 @@ public void testRemoveRulesOlderThanShouldNotDeleteDefault() Assert.assertEquals(1, allRules.size()); Assert.assertEquals(1, allRules.get("_default").size()); // Delete everything - ruleManager.removeRulesOlderThan(System.currentTimeMillis()); + ruleManager.removeRulesForEmptyDatasourcesOlderThan(System.currentTimeMillis()); // Verify the default rule was not deleted ruleManager.poll(); allRules = ruleManager.getAllRules(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java index a3cad43056a1..98c0ad9d65e6 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillRulesTest.java @@ -102,7 +102,7 @@ public void testRunNotSkipIfLastRunMoreThanPeriod() ); killRules = new KillRules(druidCoordinatorConfig); killRules.run(mockDruidCoordinatorRuntimeParams); - Mockito.verify(mockRuleManager).removeRulesOlderThan(ArgumentMatchers.anyLong()); + Mockito.verify(mockRuleManager).removeRulesForEmptyDatasourcesOlderThan(ArgumentMatchers.anyLong()); Mockito.verify(mockServiceEmitter).emit(ArgumentMatchers.any(ServiceEventBuilder.class)); }