From c04a0a9b1079500bf9e91efe9c56eb94db54b06b Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sun, 25 Aug 2024 16:45:25 +0530 Subject: [PATCH 1/6] Cleanup Coordinator logs, add duty status API --- .../druid/client/DataSourcesSnapshot.java | 18 - .../server/coordinator/DruidCoordinator.java | 308 +++++++----------- .../DruidCoordinatorRuntimeParams.java | 120 +++---- .../balancer/BalancerStrategyFactory.java | 1 - .../balancer/TierSegmentBalancer.java | 4 +- .../coordinator/duty/BalanceSegments.java | 8 +- .../duty/CollectSegmentAndServerStats.java | 133 -------- .../duty/CoordinatorDutyGroup.java | 191 +++++++++++ .../coordinator/duty/DutyGroupStatus.java | 101 ++++++ .../duty/KillUnreferencedSegmentSchema.java | 2 +- .../duty/MarkEternityTombstonesAsUnused.java | 4 +- .../MarkOvershadowedSegmentsAsUnused.java | 20 +- ...DeleteHandler.java => MetadataAction.java} | 20 +- .../duty/PrepareBalancerAndLoadQueues.java | 15 +- .../server/coordinator/duty/RunRules.java | 29 +- .../duty/UnloadUnusedSegments.java | 32 +- .../loading/CuratorLoadQueuePeon.java | 9 +- .../coordinator/loading/SegmentHolder.java | 22 +- .../loading/SegmentLoadingConfig.java | 2 +- .../loading/StrategicSegmentAssigner.java | 5 - .../stats/CoordinatorRunStats.java | 19 -- .../druid/server/coordinator/stats/Stats.java | 6 + .../server/http/CoordinatorResource.java | 9 + .../coordinator/BalanceSegmentsProfiler.java | 7 +- .../coordinator/CoordinatorRunStatsTest.java | 7 +- .../coordinator/DruidCoordinatorTest.java | 93 ++++-- .../coordinator/duty/BalanceSegmentsTest.java | 2 +- .../CollectSegmentAndServerStatsTest.java | 68 ---- .../coordinator/duty/CompactSegmentsTest.java | 2 +- .../duty/KillStalePendingSegmentsTest.java | 2 +- .../duty/KillUnusedSegmentsTest.java | 3 +- .../MarkEternityTombstonesAsUnusedTest.java | 2 +- .../MarkOvershadowedSegmentsAsUnusedTest.java | 2 +- .../server/coordinator/duty/RunRulesTest.java | 9 +- .../duty/UnloadUnusedSegmentsTest.java | 5 +- .../rules/BroadcastDistributionRuleTest.java | 5 +- .../coordinator/rules/LoadRuleTest.java | 4 +- 37 files changed, 669 insertions(+), 620 deletions(-) delete mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStats.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/duty/DutyGroupStatus.java rename server/src/main/java/org/apache/druid/server/coordinator/duty/{SegmentDeleteHandler.java => MetadataAction.java} (64%) delete mode 100644 server/src/test/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStatsTest.java diff --git a/server/src/main/java/org/apache/druid/client/DataSourcesSnapshot.java b/server/src/main/java/org/apache/druid/client/DataSourcesSnapshot.java index aa1641d92bed..5f58117a4ec9 100644 --- a/server/src/main/java/org/apache/druid/client/DataSourcesSnapshot.java +++ b/server/src/main/java/org/apache/druid/client/DataSourcesSnapshot.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Maps; import org.apache.druid.metadata.SqlSegmentsMetadataManager; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; @@ -55,23 +54,6 @@ public static DataSourcesSnapshot fromUsedSegments( return new DataSourcesSnapshot(CollectionUtils.mapValues(dataSources, DruidDataSource::toImmutableDruidDataSource)); } - public static DataSourcesSnapshot fromUsedSegmentsTimelines( - Map usedSegmentsTimelinesPerDataSource, - ImmutableMap dataSourceProperties - ) - { - Map dataSourcesWithAllUsedSegments = - Maps.newHashMapWithExpectedSize(usedSegmentsTimelinesPerDataSource.size()); - usedSegmentsTimelinesPerDataSource.forEach( - (dataSourceName, usedSegmentsTimeline) -> { - DruidDataSource dataSource = new DruidDataSource(dataSourceName, dataSourceProperties); - usedSegmentsTimeline.iterateAllObjects().forEach(dataSource::addSegment); - dataSourcesWithAllUsedSegments.put(dataSourceName, dataSource.toImmutableDruidDataSource()); - } - ); - return new DataSourcesSnapshot(dataSourcesWithAllUsedSegments, usedSegmentsTimelinesPerDataSource); - } - private final Map dataSourcesWithAllUsedSegments; private final Map usedSegmentsTimelinesPerDataSource; private final ImmutableSet overshadowedSegments; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 5e468182fa71..4d1d6335fcae 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -19,9 +19,7 @@ package org.apache.druid.server.coordinator; -import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.errorprone.annotations.concurrent.GuardedBy; import com.google.inject.Inject; @@ -29,7 +27,6 @@ import it.unimi.dsi.fastutil.objects.Object2IntMaps; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import it.unimi.dsi.fastutil.objects.Object2LongMap; -import org.apache.druid.client.DataSourcesSnapshot; import org.apache.druid.client.DruidDataSource; import org.apache.druid.client.DruidServer; import org.apache.druid.client.ImmutableDruidDataSource; @@ -39,11 +36,8 @@ import org.apache.druid.discovery.DruidLeaderSelector; import org.apache.druid.guice.ManageLifecycle; import org.apache.druid.guice.annotations.Self; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.Stopwatch; import org.apache.druid.java.util.common.concurrent.ScheduledExecutorFactory; import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.emitter.EmittingLogger; @@ -60,11 +54,12 @@ import org.apache.druid.server.coordinator.config.DruidCoordinatorConfig; import org.apache.druid.server.coordinator.config.KillUnusedSegmentsConfig; import org.apache.druid.server.coordinator.duty.BalanceSegments; -import org.apache.druid.server.coordinator.duty.CollectSegmentAndServerStats; import org.apache.druid.server.coordinator.duty.CompactSegments; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroup; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroups; import org.apache.druid.server.coordinator.duty.CoordinatorDuty; +import org.apache.druid.server.coordinator.duty.CoordinatorDutyGroup; +import org.apache.druid.server.coordinator.duty.DutyGroupStatus; import org.apache.druid.server.coordinator.duty.KillAuditLog; import org.apache.druid.server.coordinator.duty.KillCompactionConfig; import org.apache.druid.server.coordinator.duty.KillDatasourceMetadata; @@ -75,6 +70,7 @@ import org.apache.druid.server.coordinator.duty.KillUnusedSegments; import org.apache.druid.server.coordinator.duty.MarkEternityTombstonesAsUnused; import org.apache.druid.server.coordinator.duty.MarkOvershadowedSegmentsAsUnused; +import org.apache.druid.server.coordinator.duty.MetadataAction; import org.apache.druid.server.coordinator.duty.PrepareBalancerAndLoadQueues; import org.apache.druid.server.coordinator.duty.RunRules; import org.apache.druid.server.coordinator.duty.UnloadUnusedSegments; @@ -91,12 +87,10 @@ import org.apache.druid.server.lookup.cache.LookupCoordinatorManager; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; -import org.joda.time.DateTime; import org.joda.time.Duration; import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -104,7 +98,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; /** @@ -113,27 +106,6 @@ @ManageLifecycle public class DruidCoordinator { - /** - * Orders newest segments (i.e. segments with most recent intervals) first. - * Used by: - *
    - *
  • {@link RunRules} duty to prioritize assignment of more recent segments. - * The order of segments matters because the {@link CoordinatorDynamicConfig#replicationThrottleLimit} - * might cause only a few segments to be picked for replication in a coordinator run. - *
  • - *
  • {@link LoadQueuePeon}s to prioritize load of more recent segments.
  • - *
- * It is presumed that more recent segments are queried more often and contain - * more important data for users. This ordering thus ensures that if the cluster - * has availability or loading problems, the most recent segments are made - * available as soon as possible. - */ - public static final Ordering SEGMENT_COMPARATOR_RECENT_FIRST = Ordering - .from(Comparators.intervalsByEndThenStart()) - .onResultOf(DataSegment::getInterval) - .compound(Ordering.natural()) - .reverse(); - private static final EmittingLogger log = new EmittingLogger(DruidCoordinator.class); private final Object lock = new Object(); @@ -144,7 +116,7 @@ public class DruidCoordinator private final ServiceEmitter emitter; private final OverlordClient overlordClient; private final ScheduledExecutorFactory executorFactory; - private final Map dutyGroupExecutors = new HashMap<>(); + private final List dutiesRunnables = new ArrayList<>(); private final LoadQueueTaskMaster taskMaster; private final SegmentLoadQueueManager loadQueueManager; private final ServiceAnnouncer serviceAnnouncer; @@ -158,7 +130,6 @@ public class DruidCoordinator private final CoordinatorSegmentMetadataCache coordinatorSegmentMetadataCache; private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; - private volatile boolean started = false; /** @@ -180,7 +151,7 @@ public class DruidCoordinator */ private volatile Set broadcastSegments = null; - public static final String HISTORICAL_MANAGEMENT_DUTIES_DUTY_GROUP = "HistoricalManagementDuties"; + private static final String HISTORICAL_MANAGEMENT_DUTIES_DUTY_GROUP = "HistoricalManagementDuties"; private static final String METADATA_STORE_MANAGEMENT_DUTIES_DUTY_GROUP = "MetadataStoreManagementDuties"; private static final String INDEXING_SERVICE_DUTIES_DUTY_GROUP = "IndexingServiceDuties"; private static final String COMPACT_SEGMENTS_DUTIES_DUTY_GROUP = "CompactSegmentsDuties"; @@ -363,6 +334,11 @@ public String getCurrentLeader() return coordLeaderSelector.getCurrentLeader(); } + public List getStatusOfDuties() + { + return dutiesRunnables.stream().map(r -> r.dutyGroup.getStatus()).collect(Collectors.toList()); + } + @LifecycleStart public void start() { @@ -400,11 +376,8 @@ public void stop() } coordLeaderSelector.unregisterListener(); - started = false; - - stopAllDutyGroupExecutors(); - balancerStrategyFactory.stopExecutor(); + stopAllDutyGroups(); } } @@ -453,7 +426,6 @@ private void becomeLeader() } final int startingLeaderCounter = coordLeaderSelector.localTerm(); - final List dutiesRunnables = new ArrayList<>(); dutiesRunnables.add( new DutiesRunnable( makeHistoricalManagementDuties(), @@ -484,27 +456,28 @@ private void becomeLeader() for (CoordinatorCustomDutyGroup customDutyGroup : customDutyGroups.getCoordinatorCustomDutyGroups()) { dutiesRunnables.add( new DutiesRunnable( - customDutyGroup.getCustomDutyList(), + new ArrayList<>(customDutyGroup.getCustomDutyList()), startingLeaderCounter, customDutyGroup.getName(), customDutyGroup.getPeriod() ) ); - log.info( - "Done making custom coordinator duties [%s] for group [%s]", - customDutyGroup.getCustomDutyList().stream() - .map(duty -> duty.getClass().getName()).collect(Collectors.toList()), - customDutyGroup.getName() - ); } + log.info( + "Created [%d] duty groups. DUTY RUNS WILL NOT BE LOGGED." + + " Use API '/druid/coordinator/v1/duties' to get duty status or set dynamic config" + + " 'debugDimensions' on API '/druid/coordinator/v1/config' to log more details.", + dutiesRunnables.size() + ); + for (final DutiesRunnable dutiesRunnable : dutiesRunnables) { // Several coordinator duties can take a non trival amount of time to complete. // Hence, we schedule each duty group on a dedicated executor ScheduledExecutors.scheduleAtFixedRate( - getOrCreateDutyGroupExecutor(dutiesRunnable.dutyGroupName), + dutiesRunnable.executor, config.getCoordinatorStartDelay(), - dutiesRunnable.getPeriod(), + dutiesRunnable.dutyGroup.getPeriod(), () -> { if (coordLeaderSelector.isLeader() && startingLeaderCounter == coordLeaderSelector.localTerm()) { @@ -537,28 +510,25 @@ private void stopBeingLeader() serviceAnnouncer.unannounce(self); lookupCoordinatorManager.stop(); metadataManager.onLeaderStop(); - balancerStrategyFactory.stopExecutor(); + stopAllDutyGroups(); } } @GuardedBy("lock") - private ScheduledExecutorService getOrCreateDutyGroupExecutor(String dutyGroup) + private void stopAllDutyGroups() { - return dutyGroupExecutors.computeIfAbsent( - dutyGroup, - group -> executorFactory.create(1, "Coordinator-Exec-" + dutyGroup + "-%d") - ); - } - - @GuardedBy("lock") - private void stopAllDutyGroupExecutors() - { - dutyGroupExecutors.values().forEach(ScheduledExecutorService::shutdownNow); - dutyGroupExecutors.clear(); + balancerStrategyFactory.stopExecutor(); + dutiesRunnables.forEach(group -> group.executor.shutdownNow()); + dutiesRunnables.clear(); } private List makeHistoricalManagementDuties() { + final MetadataAction.DeleteSegments deleteSegments + = segments -> metadataManager.segments().markSegmentsAsUnused(segments); + final MetadataAction.GetDatasourceRules getRules + = dataSource -> metadataManager.rules().getRulesWithDefault(dataSource); + return ImmutableList.of( new PrepareBalancerAndLoadQueues( taskMaster, @@ -566,18 +536,18 @@ private List makeHistoricalManagementDuties() balancerStrategyFactory, serverInventoryView ), - new RunRules(segments -> metadataManager.segments().markSegmentsAsUnused(segments)), + new RunRules(deleteSegments, getRules), new UpdateReplicationStatus(), - new UnloadUnusedSegments(loadQueueManager), - new MarkOvershadowedSegmentsAsUnused(segments -> metadataManager.segments().markSegmentsAsUnused(segments)), - new MarkEternityTombstonesAsUnused(segments -> metadataManager.segments().markSegmentsAsUnused(segments)), + new CollectSegmentStats(), + new UnloadUnusedSegments(loadQueueManager, getRules), + new MarkOvershadowedSegmentsAsUnused(deleteSegments), + new MarkEternityTombstonesAsUnused(deleteSegments), new BalanceSegments(config.getCoordinatorPeriod()), - new CollectSegmentAndServerStats(taskMaster) + new CollectLoadQueueStats() ); } - @VisibleForTesting - List makeIndexingServiceDuties() + private List makeIndexingServiceDuties() { final List duties = new ArrayList<>(); final KillUnusedSegmentsConfig killUnusedConfig = config.getKillConfigs().unusedSegments( @@ -590,14 +560,10 @@ List makeIndexingServiceDuties() duties.add(new KillStalePendingSegments(overlordClient)); } - // CompactSegmentsDuty should be the last duty as it can take a long time to complete - // We do not have to add compactSegments if it is already enabled in the custom duty group + // Do not add compactSegments if it is already added in any of the custom duty groups if (getCompactSegmentsDutyFromCustomGroups().isEmpty()) { duties.add(compactSegments); } - if (log.isDebugEnabled()) { - log.debug("Initialized indexing service duties [%s].", duties.stream().map(duty -> duty.getClass().getName()).collect(Collectors.toList())); - } return ImmutableList.copyOf(duties); } @@ -624,8 +590,7 @@ private List makeMetadataStoreManagementDuties() return duties; } - @VisibleForTesting - CompactSegments initializeCompactSegmentsDuty(CompactionSegmentSearchPolicy compactionSegmentSearchPolicy) + private CompactSegments initializeCompactSegmentsDuty(CompactionSegmentSearchPolicy compactionSegmentSearchPolicy) { List compactSegmentsDutyFromCustomGroups = getCompactSegmentsDutyFromCustomGroups(); if (compactSegmentsDutyFromCustomGroups.isEmpty()) { @@ -641,8 +606,7 @@ CompactSegments initializeCompactSegmentsDuty(CompactionSegmentSearchPolicy comp } } - @VisibleForTesting - List getCompactSegmentsDutyFromCustomGroups() + private List getCompactSegmentsDutyFromCustomGroups() { return customDutyGroups.getCoordinatorCustomDutyGroups() .stream() @@ -653,156 +617,86 @@ List getCompactSegmentsDutyFromCustomGroups() .collect(Collectors.toList()); } - private class DutiesRunnable implements Runnable + /** + * Used by {@link CoordinatorDutyGroup} to check leadership and emit stats. + */ + public interface DutyGroupHelper + { + boolean isLeader(); + void emitStat(CoordinatorStat stat, RowKey rowKey, long value); + } + + /** + * Container for a single {@link CoordinatorDutyGroup} that runs on a dedicated executor. + */ + private class DutiesRunnable implements Runnable, DutyGroupHelper { - private final DateTime coordinatorStartTime = DateTimes.nowUtc(); - private final List duties; private final int startingLeaderCounter; - private final String dutyGroupName; - private final Duration period; + private final ScheduledExecutorService executor; + private final CoordinatorDutyGroup dutyGroup; DutiesRunnable( - List duties, + List duties, final int startingLeaderCounter, String alias, Duration period ) { - this.duties = duties; this.startingLeaderCounter = startingLeaderCounter; - this.dutyGroupName = alias; - this.period = period; + this.dutyGroup = new CoordinatorDutyGroup(alias, duties, period, this); + this.executor = executorFactory.create(1, "Coordinator-Exec-" + alias + "-%d"); } @Override public void run() { try { - log.info("Starting coordinator run for group [%s]", dutyGroupName); - final Stopwatch groupRunTime = Stopwatch.createStarted(); - synchronized (lock) { if (!coordLeaderSelector.isLeader()) { - log.info("LEGGO MY EGGO. [%s] is leader.", coordLeaderSelector.getCurrentLeader()); stopBeingLeader(); return; } } - List allStarted = Arrays.asList( - metadataManager.isStarted(), - serverInventoryView.isStarted() - ); - for (Boolean aBoolean : allStarted) { - if (!aBoolean) { - log.error("InventoryManagers not started[%s]", allStarted); - stopBeingLeader(); - return; - } - } - - // Do coordinator stuff. - DataSourcesSnapshot dataSourcesSnapshot - = metadataManager.segments().getSnapshotOfDataSourcesWithAllUsedSegments(); - - final CoordinatorDynamicConfig dynamicConfig = metadataManager.configs().getCurrentDynamicConfig(); - final DruidCompactionConfig compactionConfig = metadataManager.configs().getCurrentCompactionConfig(); - DruidCoordinatorRuntimeParams params = - DruidCoordinatorRuntimeParams - .newBuilder(coordinatorStartTime) - .withDatabaseRuleManager(metadataManager.rules()) - .withDataSourcesSnapshot(dataSourcesSnapshot) - .withDynamicConfigs(dynamicConfig) - .withCompactionConfig(compactionConfig) - .build(); - log.info( - "Initialized run params for group [%s] with [%,d] used segments in [%d] datasources.", - dutyGroupName, params.getUsedSegments().size(), dataSourcesSnapshot.getDataSourcesMap().size() - ); - - boolean coordinationPaused = dynamicConfig.getPauseCoordination(); - if (coordinationPaused - && coordLeaderSelector.isLeader() - && startingLeaderCounter == coordLeaderSelector.localTerm()) { - - log.info("Coordination has been paused. Duties will not run until coordination is resumed."); - } - - final Stopwatch dutyRunTime = Stopwatch.createUnstarted(); - for (CoordinatorDuty duty : duties) { - // Don't read state and run state in the same duty otherwise racy conditions may exist - if (!coordinationPaused - && coordLeaderSelector.isLeader() - && startingLeaderCounter == coordLeaderSelector.localTerm()) { - - dutyRunTime.restart(); - params = duty.run(params); - dutyRunTime.stop(); - - final String dutyName = duty.getClass().getName(); - if (params == null) { - log.info("Stopping run for group [%s] on request of duty [%s].", dutyGroupName, dutyName); - return; - } else { - final RowKey rowKey = RowKey.of(Dimension.DUTY, dutyName); - final long dutyRunMillis = dutyRunTime.millisElapsed(); - params.getCoordinatorStats().add(Stats.CoordinatorRun.DUTY_RUN_TIME, rowKey, dutyRunMillis); - } - } - } - - // Emit stats collected from all duties - final CoordinatorRunStats allStats = params.getCoordinatorStats(); - if (allStats.rowCount() > 0) { - final AtomicInteger emittedCount = new AtomicInteger(); - allStats.forEachStat( - (stat, dimensions, value) -> { - if (stat.shouldEmit()) { - emitStat(stat, dimensions.getValues(), value); - emittedCount.incrementAndGet(); - } - } - ); - - log.info( - "Emitted [%d] stats for group [%s]. All collected stats:%s", - emittedCount.get(), dutyGroupName, allStats.buildStatsTable() - ); + if (metadataManager.isStarted() && serverInventoryView.isStarted()) { + final DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams + .builder() + .withDataSourcesSnapshot(metadataManager.segments().getSnapshotOfDataSourcesWithAllUsedSegments()) + .withDynamicConfigs(metadataManager.configs().getCurrentDynamicConfig()) + .withCompactionConfig(metadataManager.configs().getCurrentCompactionConfig()) + .build(); + dutyGroup.run(params); + } else { + log.error("Inventory view not initialized yet. Skipping run of duty group[%s].", dutyGroup.getName()); + stopBeingLeader(); } - - // Emit the runtime of the full DutiesRunnable - groupRunTime.stop(); - final long runMillis = groupRunTime.millisElapsed(); - emitStat(Stats.CoordinatorRun.GROUP_RUN_TIME, Collections.emptyMap(), runMillis); - log.info("Finished coordinator run for group [%s] in [%d] ms.%n", dutyGroupName, runMillis); } catch (Exception e) { log.makeAlert(e, "Caught exception, ignoring so that schedule keeps going.").emit(); } } - private void emitStat(CoordinatorStat stat, Map dimensionValues, long value) + @Override + public void emitStat(CoordinatorStat stat, RowKey rowKey, long value) { ServiceMetricEvent.Builder eventBuilder = new ServiceMetricEvent.Builder() - .setDimension(Dimension.DUTY_GROUP.reportedName(), dutyGroupName); - dimensionValues.forEach( + .setDimension(Dimension.DUTY_GROUP.reportedName(), dutyGroup.getName()); + rowKey.getValues().forEach( (dim, dimValue) -> eventBuilder.setDimension(dim.reportedName(), dimValue) ); emitter.emit(eventBuilder.setMetric(stat.getMetricName(), value)); } - Duration getPeriod() - { - return period; - } - @Override - public String toString() + public boolean isLeader() { - return "DutiesRunnable{group='" + dutyGroupName + '\'' + '}'; + return coordLeaderSelector.isLeader() + && startingLeaderCounter == coordLeaderSelector.localTerm(); } } + // Duties that read/update the state in the DruidCoordinator class + /** * Updates replication status of all used segments. This duty must run after * {@link RunRules} so that the number of required replicas for all segments @@ -810,7 +704,6 @@ public String toString() */ private class UpdateReplicationStatus implements CoordinatorDuty { - @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { @@ -819,7 +712,18 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) if (coordinatorSegmentMetadataCache != null) { coordinatorSegmentMetadataCache.updateSegmentReplicationStatus(segmentReplicationStatus); } + return params; + } + } + /** + * Collects stats for unavailable and under-replicated segments. + */ + private class CollectSegmentStats implements CoordinatorDuty + { + @Override + public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) + { // Collect stats for unavailable and under-replicated segments final CoordinatorRunStats stats = params.getCoordinatorStats(); getDatasourceToUnavailableSegmentCount().forEach( @@ -846,4 +750,36 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) return params; } } + + /** + * Collects load queue stats. + */ + private class CollectLoadQueueStats implements CoordinatorDuty + { + @Override + public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) + { + final CoordinatorRunStats stats = params.getCoordinatorStats(); + taskMaster.getAllPeons().forEach((serverName, queuePeon) -> { + final RowKey rowKey = RowKey.of(Dimension.SERVER, serverName); + stats.add(Stats.SegmentQueue.BYTES_TO_LOAD, rowKey, queuePeon.getSizeOfSegmentsToLoad()); + stats.add(Stats.SegmentQueue.NUM_TO_LOAD, rowKey, queuePeon.getSegmentsToLoad().size()); + stats.add(Stats.SegmentQueue.NUM_TO_DROP, rowKey, queuePeon.getSegmentsToDrop().size()); + stats.updateMax(Stats.SegmentQueue.LOAD_RATE_KBPS, rowKey, queuePeon.getLoadRateKbps()); + + queuePeon.getAndResetStats().forEachStat( + (stat, key, statValue) -> + stats.add(stat, createRowKeyForServer(serverName, key.getValues()), statValue) + ); + }); + return params; + } + + private RowKey createRowKeyForServer(String serverName, Map dimensionValues) + { + final RowKey.Builder builder = RowKey.with(Dimension.SERVER, serverName); + dimensionValues.forEach(builder::with); + return builder.build(); + } + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java index 45f3993caa5e..ebef87546320 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinatorRuntimeParams.java @@ -22,8 +22,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import org.apache.druid.client.DataSourcesSnapshot; -import org.apache.druid.metadata.MetadataRuleManager; import org.apache.druid.server.coordinator.balancer.BalancerStrategy; +import org.apache.druid.server.coordinator.loading.SegmentHolder; import org.apache.druid.server.coordinator.loading.SegmentLoadQueueManager; import org.apache.druid.server.coordinator.loading.SegmentLoadingConfig; import org.apache.druid.server.coordinator.loading.SegmentReplicationStatus; @@ -32,7 +32,6 @@ import org.apache.druid.server.coordinator.stats.Dimension; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentTimeline; -import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.Arrays; @@ -46,26 +45,10 @@ */ public class DruidCoordinatorRuntimeParams { - /** - * Creates a Set to be assigned into {@link Builder#usedSegments} from the given {@link Iterable} of segments. - * - * Creates a TreeSet sorted in {@link DruidCoordinator#SEGMENT_COMPARATOR_RECENT_FIRST} order and populates it with - * the segments from the given iterable. The given iterable is iterated exactly once. No special action is taken if - * duplicate segments are encountered in the iterable. - */ - private static TreeSet createUsedSegmentsSet(Iterable usedSegments) - { - TreeSet segmentsSet = new TreeSet<>(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST); - usedSegments.forEach(segmentsSet::add); - return segmentsSet; - } - - private final DateTime coordinatorStartTime; private final DruidCluster druidCluster; - private final MetadataRuleManager databaseRuleManager; private final StrategicSegmentAssigner segmentAssigner; - private final @Nullable TreeSet usedSegments; - private final @Nullable DataSourcesSnapshot dataSourcesSnapshot; + private final TreeSet usedSegmentsNewestFirst; + private final DataSourcesSnapshot dataSourcesSnapshot; private final CoordinatorDynamicConfig coordinatorDynamicConfig; private final DruidCompactionConfig compactionConfig; private final SegmentLoadingConfig segmentLoadingConfig; @@ -74,12 +57,10 @@ private static TreeSet createUsedSegmentsSet(Iterable private final Set broadcastDatasources; private DruidCoordinatorRuntimeParams( - DateTime coordinatorStartTime, DruidCluster druidCluster, - MetadataRuleManager databaseRuleManager, StrategicSegmentAssigner segmentAssigner, - @Nullable TreeSet usedSegments, - @Nullable DataSourcesSnapshot dataSourcesSnapshot, + TreeSet usedSegmentsNewestFirst, + DataSourcesSnapshot dataSourcesSnapshot, CoordinatorDynamicConfig coordinatorDynamicConfig, DruidCompactionConfig compactionConfig, SegmentLoadingConfig segmentLoadingConfig, @@ -88,11 +69,9 @@ private DruidCoordinatorRuntimeParams( Set broadcastDatasources ) { - this.coordinatorStartTime = coordinatorStartTime; this.druidCluster = druidCluster; - this.databaseRuleManager = databaseRuleManager; this.segmentAssigner = segmentAssigner; - this.usedSegments = usedSegments; + this.usedSegmentsNewestFirst = usedSegmentsNewestFirst; this.dataSourcesSnapshot = dataSourcesSnapshot; this.coordinatorDynamicConfig = coordinatorDynamicConfig; this.compactionConfig = compactionConfig; @@ -102,21 +81,11 @@ private DruidCoordinatorRuntimeParams( this.broadcastDatasources = broadcastDatasources; } - public DateTime getCoordinatorStartTime() - { - return coordinatorStartTime; - } - public DruidCluster getDruidCluster() { return druidCluster; } - public MetadataRuleManager getDatabaseRuleManager() - { - return databaseRuleManager; - } - @Nullable public SegmentReplicationStatus getSegmentReplicationStatus() { @@ -140,10 +109,29 @@ public Map getUsedSegmentsTimelinesPerDataSource() return dataSourcesSnapshot.getUsedSegmentsTimelinesPerDataSource(); } - public TreeSet getUsedSegments() + /** + * Used segments ordered by {@link SegmentHolder#NEWEST_SEGMENT_FIRST}. + */ + public TreeSet getUsedSegmentsNewestFirst() { - Preconditions.checkState(usedSegments != null, "usedSegments or dataSourcesSnapshot must be set"); - return usedSegments; + return usedSegmentsNewestFirst; + } + + /** + * @return true if the given segment is marked as a "used" segment in the + * metadata store. + */ + public boolean isUsedSegment(DataSegment segment) + { + return usedSegmentsNewestFirst.contains(segment); + } + + /** + * Number of used segments in metadata store. + */ + public int getUsedSegmentCount() + { + return usedSegmentsNewestFirst.size(); } public CoordinatorDynamicConfig getCoordinatorDynamicConfig() @@ -182,19 +170,17 @@ public DataSourcesSnapshot getDataSourcesSnapshot() return dataSourcesSnapshot; } - public static Builder newBuilder(DateTime coordinatorStartTime) + public static Builder builder() { - return new Builder(coordinatorStartTime); + return new Builder(); } public Builder buildFromExisting() { return new Builder( - coordinatorStartTime, druidCluster, - databaseRuleManager, segmentAssigner, - usedSegments, + usedSegmentsNewestFirst, dataSourcesSnapshot, coordinatorDynamicConfig, compactionConfig, @@ -207,13 +193,11 @@ public Builder buildFromExisting() public static class Builder { - private final DateTime coordinatorStartTime; private DruidCluster druidCluster; - private MetadataRuleManager databaseRuleManager; private SegmentLoadQueueManager loadQueueManager; private StrategicSegmentAssigner segmentAssigner; - private @Nullable TreeSet usedSegments; - private @Nullable DataSourcesSnapshot dataSourcesSnapshot; + private TreeSet usedSegmentsNewestFirst; + private DataSourcesSnapshot dataSourcesSnapshot; private CoordinatorDynamicConfig coordinatorDynamicConfig; private DruidCompactionConfig compactionConfig; private SegmentLoadingConfig segmentLoadingConfig; @@ -221,21 +205,18 @@ public static class Builder private BalancerStrategy balancerStrategy; private Set broadcastDatasources; - private Builder(DateTime coordinatorStartTime) + private Builder() { - this.coordinatorStartTime = coordinatorStartTime; this.coordinatorDynamicConfig = CoordinatorDynamicConfig.builder().build(); this.compactionConfig = DruidCompactionConfig.empty(); this.broadcastDatasources = Collections.emptySet(); } private Builder( - DateTime coordinatorStartTime, DruidCluster cluster, - MetadataRuleManager databaseRuleManager, StrategicSegmentAssigner segmentAssigner, - @Nullable TreeSet usedSegments, - @Nullable DataSourcesSnapshot dataSourcesSnapshot, + TreeSet usedSegmentsNewestFirst, + DataSourcesSnapshot dataSourcesSnapshot, CoordinatorDynamicConfig coordinatorDynamicConfig, DruidCompactionConfig compactionConfig, SegmentLoadingConfig segmentLoadingConfig, @@ -244,11 +225,9 @@ private Builder( Set broadcastDatasources ) { - this.coordinatorStartTime = coordinatorStartTime; this.druidCluster = cluster; - this.databaseRuleManager = databaseRuleManager; this.segmentAssigner = segmentAssigner; - this.usedSegments = usedSegments; + this.usedSegmentsNewestFirst = usedSegmentsNewestFirst; this.dataSourcesSnapshot = dataSourcesSnapshot; this.coordinatorDynamicConfig = coordinatorDynamicConfig; this.compactionConfig = compactionConfig; @@ -260,15 +239,16 @@ private Builder( public DruidCoordinatorRuntimeParams build() { + Preconditions.checkNotNull(dataSourcesSnapshot); + Preconditions.checkNotNull(usedSegmentsNewestFirst); + initStatsIfRequired(); initSegmentAssignerIfRequired(); return new DruidCoordinatorRuntimeParams( - coordinatorStartTime, druidCluster, - databaseRuleManager, segmentAssigner, - usedSegments, + usedSegmentsNewestFirst, dataSourcesSnapshot, coordinatorDynamicConfig, compactionConfig, @@ -298,11 +278,10 @@ private void initSegmentAssignerIfRequired() Preconditions.checkNotNull(druidCluster); Preconditions.checkNotNull(balancerStrategy); - Preconditions.checkNotNull(usedSegments); Preconditions.checkNotNull(stats); if (segmentLoadingConfig == null) { - segmentLoadingConfig = SegmentLoadingConfig.create(coordinatorDynamicConfig, usedSegments.size()); + segmentLoadingConfig = SegmentLoadingConfig.create(coordinatorDynamicConfig, usedSegmentsNewestFirst.size()); } segmentAssigner = new StrategicSegmentAssigner( @@ -314,15 +293,16 @@ private void initSegmentAssignerIfRequired() ); } - public Builder withDruidCluster(DruidCluster cluster) + private static TreeSet createUsedSegmentsSet(Iterable usedSegments) { - this.druidCluster = cluster; - return this; + TreeSet segmentsSet = new TreeSet<>(SegmentHolder.NEWEST_SEGMENT_FIRST); + usedSegments.forEach(segmentsSet::add); + return segmentsSet; } - public Builder withDatabaseRuleManager(MetadataRuleManager databaseRuleManager) + public Builder withDruidCluster(DruidCluster cluster) { - this.databaseRuleManager = databaseRuleManager; + this.druidCluster = cluster; return this; } @@ -338,7 +318,7 @@ public Builder withSegmentAssignerUsing(SegmentLoadQueueManager loadQueueManager public Builder withDataSourcesSnapshot(DataSourcesSnapshot snapshot) { - this.usedSegments = createUsedSegmentsSet(snapshot.iterateAllUsedSegmentsInSnapshot()); + this.usedSegmentsNewestFirst = createUsedSegmentsSet(snapshot.iterateAllUsedSegmentsInSnapshot()); this.dataSourcesSnapshot = snapshot; return this; } @@ -350,7 +330,7 @@ public Builder withUsedSegments(DataSegment... usedSegments) public Builder withUsedSegments(Collection usedSegments) { - this.usedSegments = createUsedSegmentsSet(usedSegments); + this.usedSegmentsNewestFirst = createUsedSegmentsSet(usedSegments); this.dataSourcesSnapshot = DataSourcesSnapshot.fromUsedSegments(usedSegments, ImmutableMap.of()); return this; } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/BalancerStrategyFactory.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/BalancerStrategyFactory.java index 0dd05cb1b5b0..a120555a2e86 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/BalancerStrategyFactory.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/BalancerStrategyFactory.java @@ -68,7 +68,6 @@ protected ListeningExecutorService getOrCreateBalancerExecutor(int balancerCompu private ListeningExecutorService createNewBalancerExecutor(int numThreads) { - log.info("Creating new balancer executor with [%d] threads.", numThreads); cachedBalancerThreadNumber = numThreads; return MoreExecutors.listeningDecorator( Execs.multiThreaded(numThreads, "coordinator-cost-balancer-%s") diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 95dc87d09a24..9cb52596f688 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -72,7 +72,7 @@ public TierSegmentBalancer( this.tier = tier; this.params = params; this.segmentAssigner = params.getSegmentAssigner(); - this.runStats = segmentAssigner.getStats(); + this.runStats = params.getCoordinatorStats(); Map> partitions = servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning)); @@ -167,7 +167,7 @@ private int moveSegmentsTo( @Nullable private DataSegment getLoadableSegment(DataSegment segmentToMove) { - if (!params.getUsedSegments().contains(segmentToMove)) { + if (!params.isUsedSegment(segmentToMove)) { markUnmoved("Segment is unused", segmentToMove); return null; } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java index 37e58ff28f8c..c13bcb255f28 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java @@ -50,8 +50,8 @@ public BalanceSegments(Duration coordinatorPeriod) @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { - if (params.getUsedSegments().isEmpty()) { - log.info("Skipping balance as there are no used segments."); + if (params.getUsedSegmentCount() <= 0) { + log.debug("Skipping balance as there are no used segments."); return params; } @@ -60,10 +60,10 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final int maxSegmentsToMove = getMaxSegmentsToMove(params); if (maxSegmentsToMove <= 0) { - log.info("Skipping balance as maxSegmentsToMove is [%d].", maxSegmentsToMove); + log.debug("Skipping balance as maxSegmentsToMove is [%d].", maxSegmentsToMove); return params; } else { - log.info( + log.debug( "Balancing segments in tiers [%s] with maxSegmentsToMove[%,d] and maxLifetime[%d].", cluster.getTierNames(), maxSegmentsToMove, loadingConfig.getMaxLifetimeInLoadQueue() ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStats.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStats.java deleted file mode 100644 index 3056d1c2bd0e..000000000000 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStats.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.coordinator.duty; - -import com.google.common.util.concurrent.AtomicDouble; -import org.apache.druid.client.ImmutableDruidDataSource; -import org.apache.druid.client.ImmutableDruidServer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.server.coordinator.DruidCluster; -import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; -import org.apache.druid.server.coordinator.ServerHolder; -import org.apache.druid.server.coordinator.loading.LoadQueuePeon; -import org.apache.druid.server.coordinator.loading.LoadQueueTaskMaster; -import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; -import org.apache.druid.server.coordinator.stats.Dimension; -import org.apache.druid.server.coordinator.stats.RowKey; -import org.apache.druid.server.coordinator.stats.Stats; - -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -/** - * Collects stats pertaining to segment availability on different servers. - */ -public class CollectSegmentAndServerStats implements CoordinatorDuty -{ - private static final Logger log = new Logger(CollectSegmentAndServerStats.class); - - private final LoadQueueTaskMaster taskMaster; - - public CollectSegmentAndServerStats(LoadQueueTaskMaster taskMaster) - { - this.taskMaster = taskMaster; - } - - @Override - public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) - { - params.getDruidCluster().getHistoricals() - .forEach(this::logHistoricalTierStats); - logServerDebuggingInfo(params.getDruidCluster()); - collectLoadQueueStats(params.getCoordinatorStats()); - - return params; - } - - private void collectLoadQueueStats(CoordinatorRunStats stats) - { - taskMaster.getAllPeons().forEach((serverName, queuePeon) -> { - final RowKey rowKey = RowKey.of(Dimension.SERVER, serverName); - stats.add(Stats.SegmentQueue.BYTES_TO_LOAD, rowKey, queuePeon.getSizeOfSegmentsToLoad()); - stats.add(Stats.SegmentQueue.NUM_TO_LOAD, rowKey, queuePeon.getSegmentsToLoad().size()); - stats.add(Stats.SegmentQueue.NUM_TO_DROP, rowKey, queuePeon.getSegmentsToDrop().size()); - stats.updateMax(Stats.SegmentQueue.LOAD_RATE_KBPS, rowKey, queuePeon.getLoadRateKbps()); - - queuePeon.getAndResetStats().forEachStat( - (stat, key, statValue) -> - stats.add(stat, createRowKeyForServer(serverName, key.getValues()), statValue) - ); - }); - } - - private RowKey createRowKeyForServer(String serverName, Map dimensionValues) - { - final RowKey.Builder builder = RowKey.with(Dimension.SERVER, serverName); - dimensionValues.forEach(builder::with); - return builder.build(); - } - - private void logHistoricalTierStats(String tier, Set historicals) - { - final AtomicInteger servedCount = new AtomicInteger(); - final AtomicInteger loadingCount = new AtomicInteger(); - final AtomicInteger droppingCount = new AtomicInteger(); - - final AtomicDouble usageSum = new AtomicDouble(); - final AtomicLong currentBytesSum = new AtomicLong(); - - historicals.forEach(serverHolder -> { - final ImmutableDruidServer server = serverHolder.getServer(); - servedCount.addAndGet(server.getNumSegments()); - currentBytesSum.addAndGet(server.getCurrSize()); - usageSum.addAndGet(100.0f * server.getCurrSize() / server.getMaxSize()); - - final LoadQueuePeon queuePeon = serverHolder.getPeon(); - loadingCount.addAndGet(queuePeon.getSegmentsToLoad().size()); - droppingCount.addAndGet(queuePeon.getSegmentsToDrop().size()); - }); - - final int numHistoricals = historicals.size(); - log.info( - "Tier[%s] is serving [%,d], loading [%,d] and dropping [%,d] segments" - + " across [%d] historicals with average usage[%d GBs], [%.1f%%].", - tier, servedCount.get(), loadingCount.get(), droppingCount.get(), numHistoricals, - (currentBytesSum.get() >> 30) / numHistoricals, usageSum.get() / numHistoricals - ); - } - - private void logServerDebuggingInfo(DruidCluster cluster) - { - if (log.isDebugEnabled()) { - log.debug("Servers"); - for (ServerHolder serverHolder : cluster.getAllServers()) { - ImmutableDruidServer druidServer = serverHolder.getServer(); - log.debug(" %s", druidServer); - log.debug(" -- DataSources"); - for (ImmutableDruidDataSource druidDataSource : druidServer.getDataSources()) { - log.debug(" %s", druidDataSource); - } - } - } - } - -} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java new file mode 100644 index 000000000000..6e9bd1220f4b --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.duty; + +import com.google.common.collect.EvictingQueue; +import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Stopwatch; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.server.coordinator.DruidCoordinator; +import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; +import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; +import org.apache.druid.server.coordinator.stats.CoordinatorStat; +import org.apache.druid.server.coordinator.stats.Dimension; +import org.apache.druid.server.coordinator.stats.RowKey; +import org.apache.druid.server.coordinator.stats.Stats; +import org.joda.time.DateTime; +import org.joda.time.Duration; + +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +/** + * A group of {@link CoordinatorDuty}. + */ +public class CoordinatorDutyGroup +{ + private static final Logger log = new Logger(CoordinatorDutyGroup.class); + + private final String name; + private final Duration period; + private final List duties; + private final List dutyNames; + private final DruidCoordinator.DutyGroupHelper coordinator; + + private final AtomicReference lastRunStartTime = new AtomicReference<>(); + private final AtomicReference lastRunEndTime = new AtomicReference<>(); + + private final EvictingQueue runTimes = EvictingQueue.create(20); + private final EvictingQueue gapTimes = EvictingQueue.create(20); + + public CoordinatorDutyGroup( + String name, + List duties, + Duration period, + DruidCoordinator.DutyGroupHelper coordinator + ) + { + this.name = name; + this.duties = duties; + this.period = period; + this.dutyNames = duties.stream().map(duty -> duty.getClass().getName()).collect(Collectors.toList()); + this.coordinator = coordinator; + + log.info("Scheduled run of duty group[%s] at period[%s] with duties[%s].", name, period, dutyNames); + } + + public String getName() + { + return name; + } + + public Duration getPeriod() + { + return period; + } + + public synchronized DutyGroupStatus getStatus() + { + return new DutyGroupStatus( + name, + period, + dutyNames, + lastRunStartTime.get(), + lastRunEndTime.get(), + computeWindowAverage(runTimes), + computeWindowAverage(gapTimes) + ); + } + + /** + * Runs this duty group if the coordinator is leader and emits stats collected + * during the run. + */ + public void run(DruidCoordinatorRuntimeParams params) + { + markRunStarted(); + + final boolean coordinationPaused = params.getCoordinatorDynamicConfig().getPauseCoordination(); + if (coordinationPaused && coordinator.isLeader()) { + log.info("Coordination has been paused. Duties will not run until coordination is resumed."); + return; + } + + final CoordinatorRunStats stats = params.getCoordinatorStats(); + for (CoordinatorDuty duty : duties) { + if (coordinator.isLeader()) { + final Stopwatch dutyRunTime = Stopwatch.createStarted(); + params = duty.run(params); + dutyRunTime.stop(); + + final String dutyName = duty.getClass().getName(); + if (params == null) { + log.warn("Stopping run for group[%s] on request of duty[%s].", name, dutyName); + return; + } else { + stats.add( + Stats.CoordinatorRun.DUTY_RUN_TIME, + RowKey.of(Dimension.DUTY, dutyName), + dutyRunTime.millisElapsed() + ); + } + } + } + + // Emit stats collected from all duties + if (stats.rowCount() > 0) { + stats.forEachStat(this::emitStat); + + final String statsTable = stats.buildStatsTable(); + if (!statsTable.isEmpty()) { + log.info("Collected stats for duty group[%s]: %s", name, statsTable); + } + } + + // Emit the runtime of the entire duty group + final long runMillis = markRunCompleted(); + emitStat(Stats.CoordinatorRun.GROUP_RUN_TIME, RowKey.empty(), runMillis); + } + + private void emitStat(CoordinatorStat stat, RowKey rowKey, long value) + { + if (stat.shouldEmit()) { + coordinator.emitStat(stat, rowKey, value); + } + } + + private synchronized long computeWindowAverage(EvictingQueue window) + { + final int numEntries = window.size(); + if (numEntries > 0) { + long totalTimeMillis = window.stream().mapToLong(Long::longValue).sum(); + return totalTimeMillis / numEntries; + } else { + return 0; + } + } + + private synchronized void addToWindow(EvictingQueue window, long value) + { + window.add(value); + } + + private void markRunStarted() + { + final DateTime now = DateTimes.nowUtc(); + + final DateTime lastStart = lastRunStartTime.getAndSet(now); + if (lastStart != null) { + addToWindow(gapTimes, now.getMillis() - lastStart.getMillis()); + } + } + + private long markRunCompleted() + { + final DateTime now = DateTimes.nowUtc(); + lastRunEndTime.set(now); + + final long runtimeMillis = now.getMillis() - lastRunStartTime.get().getMillis(); + addToWindow(runTimes, runtimeMillis); + + return runtimeMillis; + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/DutyGroupStatus.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/DutyGroupStatus.java new file mode 100644 index 000000000000..72b1852c043d --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/DutyGroupStatus.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator.duty; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.joda.time.DateTime; +import org.joda.time.Duration; + +import java.util.List; + +public class DutyGroupStatus +{ + private final String name; + private final Duration period; + private final List dutyNames; + + private final DateTime lastRunStart; + private final DateTime lastRunEnd; + + private final long avgRuntimeMillis; + private final long avgRunGapMillis; + + public DutyGroupStatus( + String name, + Duration period, + List dutyNames, + DateTime lastRunStart, + DateTime lastRunEnd, + long avgRuntimeMillis, + long avgRunGapMillis + ) + { + this.name = name; + this.period = period; + this.dutyNames = dutyNames; + this.lastRunStart = lastRunStart; + this.lastRunEnd = lastRunEnd; + this.avgRuntimeMillis = avgRuntimeMillis; + this.avgRunGapMillis = avgRunGapMillis; + } + + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public Duration getPeriod() + { + return period; + } + + @JsonProperty + public List getDutyNames() + { + return dutyNames; + } + + @JsonProperty + public DateTime getLastRunStart() + { + return lastRunStart; + } + + @JsonProperty + public DateTime getLastRunEnd() + { + return lastRunEnd; + } + + @JsonProperty + public long getAvgRuntimeMillis() + { + return avgRuntimeMillis; + } + + @JsonProperty + public long getAvgRunGapMillis() + { + return avgRunGapMillis; + } + +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchema.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchema.java index bfdfb84aaf4c..83a68323030f 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchema.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnreferencedSegmentSchema.java @@ -74,7 +74,7 @@ protected int cleanupEntriesCreatedBefore(DateTime minCreatedTime) // This case would arise when segment is associated with a schema which was marked unused in the previous step // or in the previous run. List schemaFingerprintsToUpdate = segmentSchemaManager.findReferencedSchemaMarkedAsUnused(); - if (schemaFingerprintsToUpdate.size() > 0) { + if (!schemaFingerprintsToUpdate.isEmpty()) { segmentSchemaManager.markSchemaAsUsed(schemaFingerprintsToUpdate); log.info("Marked [%s] unused schemas referenced by used segments as used.", schemaFingerprintsToUpdate.size()); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnused.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnused.java index 8a2e6c9dfd9a..78704166ce9e 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnused.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnused.java @@ -72,9 +72,9 @@ public class MarkEternityTombstonesAsUnused implements CoordinatorDuty { private static final Logger log = new Logger(MarkEternityTombstonesAsUnused.class); - private final SegmentDeleteHandler deleteHandler; + private final MetadataAction.DeleteSegments deleteHandler; - public MarkEternityTombstonesAsUnused(final SegmentDeleteHandler deleteHandler) + public MarkEternityTombstonesAsUnused(final MetadataAction.DeleteSegments deleteHandler) { this.deleteHandler = deleteHandler; } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java index 99fcde7499c6..656af062732b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java @@ -21,7 +21,7 @@ import org.apache.druid.client.ImmutableDruidDataSource; import org.apache.druid.client.ImmutableDruidServer; -import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Stopwatch; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.DruidCluster; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; @@ -34,8 +34,8 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentTimeline; import org.joda.time.DateTime; +import org.joda.time.Duration; -import java.time.Duration; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -56,9 +56,10 @@ public class MarkOvershadowedSegmentsAsUnused implements CoordinatorDuty { private static final Logger log = new Logger(MarkOvershadowedSegmentsAsUnused.class); - private final SegmentDeleteHandler deleteHandler; + private final MetadataAction.DeleteSegments deleteHandler; + private final Stopwatch sinceCoordinatorStarted = Stopwatch.createStarted(); - public MarkOvershadowedSegmentsAsUnused(SegmentDeleteHandler deleteHandler) + public MarkOvershadowedSegmentsAsUnused(MetadataAction.DeleteSegments deleteHandler) { this.deleteHandler = deleteHandler; } @@ -68,12 +69,13 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { // Mark overshadowed segments as unused only if the coordinator has been running // long enough to have refreshed its metadata view - final DateTime coordinatorStartTime = params.getCoordinatorStartTime(); - final long delayMillis = params.getCoordinatorDynamicConfig().getMarkSegmentAsUnusedDelayMillis(); - if (DateTimes.nowUtc().isBefore(coordinatorStartTime.plus(delayMillis))) { + final Duration requiredDelay = Duration.millis( + params.getCoordinatorDynamicConfig().getMarkSegmentAsUnusedDelayMillis() + ); + if (sinceCoordinatorStarted.hasNotElapsed(requiredDelay)) { log.info( - "Skipping MarkAsUnused until [%s] have elapsed after coordinator start time[%s].", - Duration.ofMillis(delayMillis), coordinatorStartTime + "Skipping MarkAsUnused as only [%s ms] have elapsed since Coordinator start. Required delay[%s].", + sinceCoordinatorStarted.millisElapsed(), requiredDelay ); return params; } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/SegmentDeleteHandler.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataAction.java similarity index 64% rename from server/src/main/java/org/apache/druid/server/coordinator/duty/SegmentDeleteHandler.java rename to server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataAction.java index b45abcd51f96..f890298cb880 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/SegmentDeleteHandler.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataAction.java @@ -19,13 +19,27 @@ package org.apache.druid.server.coordinator.duty; +import org.apache.druid.server.coordinator.rules.Rule; import org.apache.druid.timeline.SegmentId; +import java.util.List; import java.util.Set; -public interface SegmentDeleteHandler +/** + * Contains functional interfaces that are used by a {@link CoordinatorDuty} to + * perform a single read or write operation on the metadata store. + */ +public final class MetadataAction { + @FunctionalInterface + public interface DeleteSegments + { + int markSegmentsAsUnused(Set segmentIds); + } - int markSegmentsAsUnused(Set segmentIds); - + @FunctionalInterface + public interface GetDatasourceRules + { + List getRulesWithDefault(String dataSource); + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index 6af49e15ee91..97b2235acb60 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -85,7 +85,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final CoordinatorDynamicConfig dynamicConfig = params.getCoordinatorDynamicConfig(); final SegmentLoadingConfig segmentLoadingConfig - = SegmentLoadingConfig.create(dynamicConfig, params.getUsedSegments().size()); + = SegmentLoadingConfig.create(dynamicConfig, params.getUsedSegmentCount()); final DruidCluster cluster = prepareCluster(dynamicConfig, segmentLoadingConfig, currentServers); cancelLoadsOnDecommissioningServers(cluster); @@ -93,11 +93,12 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) final CoordinatorRunStats stats = params.getCoordinatorStats(); collectHistoricalStats(cluster, stats); collectUsedSegmentStats(params, stats); + collectDebugStats(segmentLoadingConfig, stats); final int numBalancerThreads = segmentLoadingConfig.getBalancerComputeThreads(); final BalancerStrategy balancerStrategy = balancerStrategyFactory.createBalancerStrategy(numBalancerThreads); - log.info( - "Using balancer strategy [%s] with [%d] threads.", + log.debug( + "Using balancer strategy[%s] with [%d] threads.", balancerStrategy.getClass().getSimpleName(), numBalancerThreads ); @@ -134,7 +135,7 @@ private void cancelLoadsOnDecommissioningServers(DruidCluster cluster) } if (cancelledCount.get() > 0) { - log.info( + log.debug( "Cancelled [%d] load/move operations on [%d] decommissioning servers.", cancelledCount.get(), decommissioningServers.size() ); @@ -195,4 +196,10 @@ private void collectUsedSegmentStats(DruidCoordinatorRuntimeParams params, Coord stats.add(Stats.Segments.USED, datasourceKey, timeline.getNumObjects()); }); } + + private void collectDebugStats(SegmentLoadingConfig config, CoordinatorRunStats stats) + { + stats.add(Stats.Segments.BALANCER_THREADS, config.getBalancerComputeThreads()); + stats.add(Stats.Segments.REPLICATION_THROTTLE_LIMIT, config.getReplicationThrottleLimit()); + } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java index 60f46aca6e29..fb4437107607 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java @@ -24,7 +24,6 @@ import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.Stopwatch; import org.apache.druid.java.util.emitter.EmittingLogger; -import org.apache.druid.metadata.MetadataRuleManager; import org.apache.druid.server.coordinator.DruidCluster; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.loading.StrategicSegmentAssigner; @@ -56,11 +55,16 @@ public class RunRules implements CoordinatorDuty { private static final EmittingLogger log = new EmittingLogger(RunRules.class); - private final SegmentDeleteHandler deleteHandler; + private final MetadataAction.DeleteSegments deleteHandler; + private final MetadataAction.GetDatasourceRules ruleHandler; - public RunRules(SegmentDeleteHandler deleteHandler) + public RunRules( + MetadataAction.DeleteSegments deleteHandler, + MetadataAction.GetDatasourceRules ruleHandler + ) { this.deleteHandler = deleteHandler; + this.ruleHandler = ruleHandler; } @Override @@ -72,15 +76,17 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) return params; } + // Get all used segments sorted by interval. Segments must be sorted to ensure that: + // a) round-robin assignment distributes newer segments uniformly across servers + // b) replication throttling has a smaller impact on newer segments + final Set usedSegments = params.getUsedSegmentsNewestFirst(); final Set overshadowed = params.getDataSourcesSnapshot().getOvershadowedSegments(); - final Set usedSegments = params.getUsedSegments(); - log.info( + log.debug( "Applying retention rules on [%,d] used segments, skipping [%,d] overshadowed segments.", usedSegments.size(), overshadowed.size() ); final StrategicSegmentAssigner segmentAssigner = params.getSegmentAssigner(); - final MetadataRuleManager databaseRuleManager = params.getDatabaseRuleManager(); final DateTime now = DateTimes.nowUtc(); final Object2IntOpenHashMap datasourceToSegmentsWithNoRule = new Object2IntOpenHashMap<>(); @@ -92,7 +98,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) } // Find and apply matching rule - List rules = databaseRuleManager.getRulesWithDefault(segment.getDataSource()); + List rules = ruleHandler.getRulesWithDefault(segment.getDataSource()); boolean foundMatchingRule = false; for (Rule rule : rules) { if (rule.appliesTo(segment, now)) { @@ -161,7 +167,7 @@ private Set getBroadcastDatasources(DruidCoordinatorRuntimeParams params final Set broadcastDatasources = params.getDataSourcesSnapshot().getDataSourcesMap().values().stream() .map(ImmutableDruidDataSource::getName) - .filter(datasource -> isBroadcastDatasource(datasource, params)) + .filter(this::isBroadcastDatasource) .collect(Collectors.toSet()); if (!broadcastDatasources.isEmpty()) { @@ -179,9 +185,10 @@ private Set getBroadcastDatasources(DruidCoordinatorRuntimeParams params *
  • Are unloaded if unused, even from realtime servers
  • * */ - private boolean isBroadcastDatasource(String datasource, DruidCoordinatorRuntimeParams params) + private boolean isBroadcastDatasource(String datasource) { - return params.getDatabaseRuleManager().getRulesWithDefault(datasource).stream() - .anyMatch(rule -> rule instanceof BroadcastDistributionRule); + return ruleHandler.getRulesWithDefault(datasource) + .stream() + .anyMatch(rule -> rule instanceof BroadcastDistributionRule); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java index 5a135d81cf45..53f7abd6f6d9 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegments.java @@ -33,7 +33,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; /** @@ -44,9 +43,14 @@ public class UnloadUnusedSegments implements CoordinatorDuty private static final Logger log = new Logger(UnloadUnusedSegments.class); private final SegmentLoadQueueManager loadQueueManager; + private final MetadataAction.GetDatasourceRules ruleHandler; - public UnloadUnusedSegments(SegmentLoadQueueManager loadQueueManager) + public UnloadUnusedSegments( + SegmentLoadQueueManager loadQueueManager, + MetadataAction.GetDatasourceRules ruleHandler + ) { + this.ruleHandler = ruleHandler; this.loadQueueManager = loadQueueManager; } @@ -82,18 +86,16 @@ private int dropUnusedSegments( Map broadcastStatusByDatasource ) { - final Set usedSegments = params.getUsedSegments(); - final AtomicInteger numQueuedDrops = new AtomicInteger(0); final ImmutableDruidServer server = serverHolder.getServer(); for (ImmutableDruidDataSource dataSource : server.getDataSources()) { - if (shouldSkipUnload(serverHolder, dataSource.getName(), broadcastStatusByDatasource, params)) { + if (shouldSkipUnload(serverHolder, dataSource.getName(), broadcastStatusByDatasource)) { continue; } int totalUnneededCount = 0; for (DataSegment segment : dataSource.getSegments()) { - if (!usedSegments.contains(segment) + if (!params.isUsedSegment(segment) && loadQueueManager.dropSegment(segment, serverHolder)) { totalUnneededCount++; log.debug( @@ -118,13 +120,11 @@ private int cancelLoadOfUnusedSegments( DruidCoordinatorRuntimeParams params ) { - final Set usedSegments = params.getUsedSegments(); - final AtomicInteger cancelledOperations = new AtomicInteger(0); server.getQueuedSegments().forEach((segment, action) -> { - if (shouldSkipUnload(server, segment.getDataSource(), broadcastStatusByDatasource, params)) { + if (shouldSkipUnload(server, segment.getDataSource(), broadcastStatusByDatasource)) { // do nothing - } else if (usedSegments.contains(segment)) { + } else if (params.isUsedSegment(segment)) { // do nothing } else if (action.isLoad() && server.cancelOperation(action, segment)) { cancelledOperations.incrementAndGet(); @@ -147,21 +147,21 @@ private int cancelLoadOfUnusedSegments( private boolean shouldSkipUnload( ServerHolder server, String dataSource, - Map broadcastStatusByDatasource, - DruidCoordinatorRuntimeParams params + Map broadcastStatusByDatasource ) { boolean isBroadcastDatasource = broadcastStatusByDatasource - .computeIfAbsent(dataSource, ds -> isBroadcastDatasource(ds, params)); + .computeIfAbsent(dataSource, this::isBroadcastDatasource); return server.isRealtimeServer() && !isBroadcastDatasource; } /** * A datasource is considered a broadcast datasource if it has even one broadcast rule. */ - private boolean isBroadcastDatasource(String datasource, DruidCoordinatorRuntimeParams params) + private boolean isBroadcastDatasource(String datasource) { - return params.getDatabaseRuleManager().getRulesWithDefault(datasource).stream() - .anyMatch(rule -> rule instanceof BroadcastDistributionRule); + return ruleHandler.getRulesWithDefault(datasource) + .stream() + .anyMatch(rule -> rule instanceof BroadcastDistributionRule); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/CuratorLoadQueuePeon.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/CuratorLoadQueuePeon.java index 333d3b0e3056..0ab9d8ab3297 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/CuratorLoadQueuePeon.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/CuratorLoadQueuePeon.java @@ -27,7 +27,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.emitter.EmittingLogger; import org.apache.druid.server.coordination.SegmentChangeRequestNoop; -import org.apache.druid.server.coordinator.DruidCoordinator; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; import org.apache.druid.server.coordinator.stats.Stats; import org.apache.druid.timeline.DataSegment; @@ -85,7 +84,7 @@ public class CuratorLoadQueuePeon implements LoadQueuePeon * {@link #stop()}. */ private final ConcurrentSkipListMap segmentsToLoad - = new ConcurrentSkipListMap<>(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST); + = new ConcurrentSkipListMap<>(SegmentHolder.NEWEST_SEGMENT_FIRST); /** * Needs to be thread safe since it can be concurrently accessed via @@ -93,7 +92,7 @@ public class CuratorLoadQueuePeon implements LoadQueuePeon * {@link #getSegmentsToDrop()} and {@link #stop()} */ private final ConcurrentSkipListMap segmentsToDrop - = new ConcurrentSkipListMap<>(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST); + = new ConcurrentSkipListMap<>(SegmentHolder.NEWEST_SEGMENT_FIRST); /** * Needs to be thread safe since it can be concurrently accessed via @@ -101,7 +100,7 @@ public class CuratorLoadQueuePeon implements LoadQueuePeon * and {@link #getSegmentsToDrop()} */ private final ConcurrentSkipListSet segmentsMarkedToDrop - = new ConcurrentSkipListSet<>(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST); + = new ConcurrentSkipListSet<>(SegmentHolder.NEWEST_SEGMENT_FIRST); /** * Needs to be thread safe since it can be concurrently accessed via @@ -109,7 +108,7 @@ public class CuratorLoadQueuePeon implements LoadQueuePeon * {@link #getTimedOutSegments()} and {@link #stop()} */ private final ConcurrentSkipListSet timedOutSegments = - new ConcurrentSkipListSet<>(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST); + new ConcurrentSkipListSet<>(SegmentHolder.NEWEST_SEGMENT_FIRST); public CuratorLoadQueuePeon( CuratorFramework curator, diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentHolder.java index 9f4a181699dd..3b474cbd0b0a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentHolder.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentHolder.java @@ -22,10 +22,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Ordering; import org.apache.druid.java.util.common.Stopwatch; +import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.server.coordination.DataSegmentChangeRequest; import org.apache.druid.server.coordination.SegmentChangeRequestDrop; import org.apache.druid.server.coordination.SegmentChangeRequestLoad; -import org.apache.druid.server.coordinator.DruidCoordinator; import org.apache.druid.server.coordinator.config.HttpLoadQueuePeonConfig; import org.apache.druid.timeline.DataSegment; import org.joda.time.Duration; @@ -43,6 +43,24 @@ */ public class SegmentHolder implements Comparable { + /** + * Orders newest segments first (i.e. segments with most recent intervals). + *

    + * The order is needed to ensure that: + *

      + *
    • Round-robin assignment distributes segments belonging to same or adjacent + * intervals uniformly across all servers.
    • + *
    • Load queue prioritizes load of most recent segments, as + * they are presumed to contain more important data which is queried more often.
    • + *
    • Replication throttler has a smaller impact on replicas of newer segments.
    • + *
    + */ + public static final Ordering NEWEST_SEGMENT_FIRST = Ordering + .from(Comparators.intervalsByEndThenStart()) + .onResultOf(DataSegment::getInterval) + .compound(Ordering.natural()) + .reverse(); + /** * Orders segment requests: *
      @@ -53,7 +71,7 @@ public class SegmentHolder implements Comparable private static final Comparator COMPARE_ACTION_THEN_INTERVAL = Ordering.explicit(SegmentAction.DROP, SegmentAction.LOAD, SegmentAction.REPLICATE, SegmentAction.MOVE_TO) .onResultOf(SegmentHolder::getAction) - .compound(DruidCoordinator.SEGMENT_COMPARATOR_RECENT_FIRST.onResultOf(SegmentHolder::getSegment)); + .compound(NEWEST_SEGMENT_FIRST.onResultOf(SegmentHolder::getSegment)); private final DataSegment segment; private final DataSegmentChangeRequest changeRequest; diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentLoadingConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentLoadingConfig.java index ca0305eaa099..26fcd220c259 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentLoadingConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/SegmentLoadingConfig.java @@ -50,7 +50,7 @@ public static SegmentLoadingConfig create(CoordinatorDynamicConfig dynamicConfig final int throttlePercentage = 5; final int replicationThrottleLimit = Math.max(100, numUsedSegments * throttlePercentage / 100); final int numBalancerThreads = CoordinatorDynamicConfig.getDefaultBalancerComputeThreads(); - log.info( + log.debug( "Smart segment loading is enabled. Calculated replicationThrottleLimit[%,d]" + " (%d%% of used segments[%,d]) and numBalancerThreads[%d].", replicationThrottleLimit, throttlePercentage, numUsedSegments, numBalancerThreads diff --git a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java index 9b5d38f198eb..a5e38eee7d4b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/loading/StrategicSegmentAssigner.java @@ -93,11 +93,6 @@ public StrategicSegmentAssigner( ); } - public CoordinatorRunStats getStats() - { - return stats; - } - public SegmentReplicationStatus getReplicationStatus() { return replicaCountMap.toReplicationStatus(); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/CoordinatorRunStats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/CoordinatorRunStats.java index 92b2acc052bc..76d788cb4306 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/CoordinatorRunStats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/CoordinatorRunStats.java @@ -29,7 +29,6 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; /** * Contains statistics typically tracked during a single coordinator run or the @@ -112,8 +111,6 @@ public void forEachStat(StatHandler handler) public String buildStatsTable() { final StringBuilder statsTable = new StringBuilder(); - final AtomicInteger hiddenStats = new AtomicInteger(0); - final AtomicInteger totalStats = new AtomicInteger(); allStats.forEach( (rowKey, statMap) -> { @@ -129,7 +126,6 @@ public String buildStatsTable() // Add all the errors final Map errorStats = levelToStats .getOrDefault(CoordinatorStat.Level.ERROR, Collections.emptyMap()); - totalStats.addAndGet(errorStats.size()); if (!errorStats.isEmpty()) { statsTable.append( StringUtils.format("\nError: %s ==> %s", rowKey, errorStats) @@ -139,7 +135,6 @@ public String buildStatsTable() // Add all the info level stats final Map infoStats = levelToStats .getOrDefault(CoordinatorStat.Level.INFO, Collections.emptyMap()); - totalStats.addAndGet(infoStats.size()); if (!infoStats.isEmpty()) { statsTable.append( StringUtils.format("\nInfo : %s ==> %s", rowKey, infoStats) @@ -149,28 +144,14 @@ public String buildStatsTable() // Add all the debug level stats if the row key has a debug dimension final Map debugStats = levelToStats .getOrDefault(CoordinatorStat.Level.DEBUG, Collections.emptyMap()); - totalStats.addAndGet(debugStats.size()); if (!debugStats.isEmpty() && hasDebugDimension(rowKey)) { statsTable.append( StringUtils.format("\nDebug: %s ==> %s", rowKey, debugStats) ); - } else { - hiddenStats.addAndGet(debugStats.size()); } } ); - if (hiddenStats.get() > 0) { - statsTable.append( - StringUtils.format("\nDebug: %d hidden stats. Set 'debugDimensions' to see these.", hiddenStats.get()) - ); - } - if (totalStats.get() > 0) { - statsTable.append( - StringUtils.format("\nTOTAL: %d stats for %d dimension keys", totalStats.get(), rowCount()) - ); - } - return statsTable.toString(); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index 10873d894261..3f62c0734eea 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -61,6 +61,12 @@ public static class Segments = CoordinatorStat.toDebugAndEmit("overshadowed", "segment/overshadowed/count"); public static final CoordinatorStat UNNEEDED_ETERNITY_TOMBSTONE = CoordinatorStat.toDebugAndEmit("unneededEternityTombstone", "segment/unneededEternityTombstone/count"); + + // Values computed in a run + public static final CoordinatorStat BALANCER_THREADS + = CoordinatorStat.toDebugOnly("numBalancerThreads"); + public static final CoordinatorStat REPLICATION_THROTTLE_LIMIT + = CoordinatorStat.toDebugOnly("replicationThrottleLimit"); } public static class SegmentQueue diff --git a/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java b/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java index bffe2fe99d29..e73b61d03d6b 100644 --- a/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java +++ b/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java @@ -149,4 +149,13 @@ public Response getLoadQueue( ) ).build(); } + + @GET + @Path("/duties") + @ResourceFilters(StateResourceFilter.class) + @Produces(MediaType.APPLICATION_JSON) + public Response getStatusOfDuties() + { + return Response.ok(coordinator.getStatusOfDuties()).build(); + } } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/BalanceSegmentsProfiler.java b/server/src/test/java/org/apache/druid/server/coordinator/BalanceSegmentsProfiler.java index 3e5bc87f9b3d..5cc36f219784 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/BalanceSegmentsProfiler.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/BalanceSegmentsProfiler.java @@ -128,7 +128,7 @@ public void bigProfiler() .addTier("normal", serverHolderList.toArray(new ServerHolder[0])) .build(); DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster(druidCluster) .withUsedSegments(segments) .withDynamicConfigs( @@ -140,11 +140,10 @@ public void bigProfiler() .build() ) .withSegmentAssignerUsing(loadQueueManager) - .withDatabaseRuleManager(manager) .build(); BalanceSegments tester = new BalanceSegments(Duration.standardMinutes(1)); - RunRules runner = new RunRules(Set::size); + RunRules runner = new RunRules(Set::size, manager::getRulesWithDefault); watch.start(); DruidCoordinatorRuntimeParams balanceParams = tester.run(params); DruidCoordinatorRuntimeParams assignParams = runner.run(params); @@ -174,7 +173,7 @@ public void profileRun() EasyMock.replay(druidServer2); DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster( DruidCluster .builder() diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CoordinatorRunStatsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/CoordinatorRunStatsTest.java index 6b9dc8443960..456213db38a2 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CoordinatorRunStatsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CoordinatorRunStatsTest.java @@ -144,9 +144,7 @@ public void testBuildStatsTable() final String expectedTable = "\nError: {duty=duty1} ==> {error1=10}" - + "\nInfo : {duty=duty1} ==> {info1=20}" - + "\nDebug: 1 hidden stats. Set 'debugDimensions' to see these." - + "\nTOTAL: 3 stats for 1 dimension keys"; + + "\nInfo : {duty=duty1} ==> {info1=20}"; Assert.assertEquals(expectedTable, stats.buildStatsTable()); } @@ -162,8 +160,7 @@ public void testBuildStatsTableWithDebugDimensions() final String expectedTable = "\nError: {duty=duty1} ==> {error1=10}" + "\nInfo : {duty=duty1} ==> {info1=20}" - + "\nDebug: {duty=duty1} ==> {debug1=30}" - + "\nTOTAL: 3 stats for 1 dimension keys"; + + "\nDebug: {duty=duty1} ==> {debug1=30}"; Assert.assertEquals(expectedTable, debugStats.buildStatsTable()); } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java index 801016eb819a..0d3894fd536d 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java @@ -48,6 +48,7 @@ import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.metadata.MetadataRuleManager; import org.apache.druid.metadata.SegmentsMetadataManager; +import org.apache.druid.rpc.indexing.OverlordClient; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.server.DruidNode; import org.apache.druid.server.coordination.ServerType; @@ -61,7 +62,7 @@ import org.apache.druid.server.coordinator.duty.CoordinatorCustomDuty; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroup; import org.apache.druid.server.coordinator.duty.CoordinatorCustomDutyGroups; -import org.apache.druid.server.coordinator.duty.CoordinatorDuty; +import org.apache.druid.server.coordinator.duty.DutyGroupStatus; import org.apache.druid.server.coordinator.duty.KillSupervisorsCustomDuty; import org.apache.druid.server.coordinator.loading.CuratorLoadQueuePeon; import org.apache.druid.server.coordinator.loading.LoadQueuePeon; @@ -114,6 +115,7 @@ public class DruidCoordinatorTest extends CuratorTestBase private ObjectMapper objectMapper; private DruidNode druidNode; private NewestSegmentFirstPolicy newestSegmentFirstPolicy; + private OverlordClient overlordClient; private final LatchableServiceEmitter serviceEmitter = new LatchableServiceEmitter(); @Before @@ -125,6 +127,7 @@ public void setUp() throws Exception dataSourcesSnapshot = EasyMock.createNiceMock(DataSourcesSnapshot.class); metadataRuleManager = EasyMock.createNiceMock(MetadataRuleManager.class); loadQueueTaskMaster = EasyMock.createMock(LoadQueueTaskMaster.class); + overlordClient = EasyMock.createMock(OverlordClient.class); JacksonConfigManager configManager = EasyMock.createNiceMock(JacksonConfigManager.class); EasyMock.expect( @@ -181,7 +184,7 @@ public void setUp() throws Exception serverInventoryView, serviceEmitter, scheduledExecutorFactory, - null, + overlordClient, loadQueueTaskMaster, new SegmentLoadQueueManager(serverInventoryView, loadQueueTaskMaster), new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), @@ -605,6 +608,8 @@ public void testComputeUnderReplicationCountsPerDataSourcePerTierForSegmentsWith @Test public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() { + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); + CoordinatorCustomDutyGroups emptyCustomDutyGroups = new CoordinatorCustomDutyGroups(ImmutableSet.of()); coordinator = new DruidCoordinator( druidCoordinatorConfig, @@ -612,7 +617,7 @@ public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() serverInventoryView, serviceEmitter, scheduledExecutorFactory, - null, + overlordClient, loadQueueTaskMaster, null, new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), @@ -624,22 +629,30 @@ public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() null, CentralizedDatasourceSchemaConfig.create() ); + coordinator.start(); + // Since CompactSegments is not enabled in Custom Duty Group, then CompactSegments must be created in IndexingServiceDuties - List indexingDuties = coordinator.makeIndexingServiceDuties(); - Assert.assertTrue(indexingDuties.stream().anyMatch(coordinatorDuty -> coordinatorDuty instanceof CompactSegments)); + final List duties = coordinator.getStatusOfDuties(); + Assert.assertEquals(3, duties.size()); - // CompactSegments should not exist in Custom Duty Group - List compactSegmentsDutyFromCustomGroups = coordinator.getCompactSegmentsDutyFromCustomGroups(); - Assert.assertTrue(compactSegmentsDutyFromCustomGroups.isEmpty()); + Assert.assertEquals("HistoricalManagementDuties", duties.get(0).getName()); + Assert.assertEquals("IndexingServiceDuties", duties.get(1).getName()); + Assert.assertEquals("MetadataStoreManagementDuties", duties.get(2).getName()); - // CompactSegments returned by this method should be created using the DruidCoordinatorConfig in the DruidCoordinator - CompactSegments duty = coordinator.initializeCompactSegmentsDuty(newestSegmentFirstPolicy); - Assert.assertNotNull(duty); + final String compactDutyName = CompactSegments.class.getName(); + Assert.assertTrue(duties.get(1).getDutyNames().contains(compactDutyName)); + + // CompactSegments should not exist in other duty groups + Assert.assertFalse(duties.get(0).getDutyNames().contains(compactDutyName)); + Assert.assertFalse(duties.get(2).getDutyNames().contains(compactDutyName)); + + coordinator.stop(); } @Test public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsCompactSegments() { + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); CoordinatorCustomDutyGroup group = new CoordinatorCustomDutyGroup( "group1", Duration.standardSeconds(1), @@ -652,7 +665,7 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsC serverInventoryView, serviceEmitter, scheduledExecutorFactory, - null, + overlordClient, loadQueueTaskMaster, null, new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), @@ -664,22 +677,30 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsC null, CentralizedDatasourceSchemaConfig.create() ); + coordinator.start(); // Since CompactSegments is not enabled in Custom Duty Group, then CompactSegments must be created in IndexingServiceDuties - List indexingDuties = coordinator.makeIndexingServiceDuties(); - Assert.assertTrue(indexingDuties.stream().anyMatch(coordinatorDuty -> coordinatorDuty instanceof CompactSegments)); + final List duties = coordinator.getStatusOfDuties(); + Assert.assertEquals(4, duties.size()); - // CompactSegments should not exist in Custom Duty Group - List compactSegmentsDutyFromCustomGroups = coordinator.getCompactSegmentsDutyFromCustomGroups(); - Assert.assertTrue(compactSegmentsDutyFromCustomGroups.isEmpty()); + Assert.assertEquals("HistoricalManagementDuties", duties.get(0).getName()); + Assert.assertEquals("IndexingServiceDuties", duties.get(1).getName()); + Assert.assertEquals("MetadataStoreManagementDuties", duties.get(2).getName()); + Assert.assertEquals("group1", duties.get(3).getName()); - // CompactSegments returned by this method should be created using the DruidCoordinatorConfig in the DruidCoordinator - CompactSegments duty = coordinator.initializeCompactSegmentsDuty(newestSegmentFirstPolicy); - Assert.assertNotNull(duty); + final String compactDutyName = CompactSegments.class.getName(); + Assert.assertTrue(duties.get(1).getDutyNames().contains(compactDutyName)); + + // CompactSegments should not exist in other duty groups + Assert.assertFalse(duties.get(0).getDutyNames().contains(compactDutyName)); + Assert.assertFalse(duties.get(2).getDutyNames().contains(compactDutyName)); + + coordinator.stop(); } @Test public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupContainsCompactSegments() { + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); CoordinatorCustomDutyGroup compactSegmentCustomGroup = new CoordinatorCustomDutyGroup( "group1", Duration.standardSeconds(1), @@ -692,7 +713,7 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupContainsCompactS serverInventoryView, serviceEmitter, scheduledExecutorFactory, - null, + overlordClient, loadQueueTaskMaster, null, new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), @@ -704,19 +725,27 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupContainsCompactS null, CentralizedDatasourceSchemaConfig.create() ); + coordinator.start(); + // Since CompactSegments is enabled in Custom Duty Group, then CompactSegments must not be created in IndexingServiceDuties - List indexingDuties = coordinator.makeIndexingServiceDuties(); - Assert.assertTrue(indexingDuties.stream().noneMatch(coordinatorDuty -> coordinatorDuty instanceof CompactSegments)); + final List duties = coordinator.getStatusOfDuties(); + Assert.assertEquals(4, duties.size()); + + Assert.assertEquals("HistoricalManagementDuties", duties.get(0).getName()); + Assert.assertEquals("IndexingServiceDuties", duties.get(1).getName()); + Assert.assertEquals("MetadataStoreManagementDuties", duties.get(2).getName()); + Assert.assertEquals("group1", duties.get(3).getName()); // CompactSegments should exist in Custom Duty Group - List compactSegmentsDutyFromCustomGroups = coordinator.getCompactSegmentsDutyFromCustomGroups(); - Assert.assertFalse(compactSegmentsDutyFromCustomGroups.isEmpty()); - Assert.assertEquals(1, compactSegmentsDutyFromCustomGroups.size()); - Assert.assertNotNull(compactSegmentsDutyFromCustomGroups.get(0)); - - // CompactSegments returned by this method should be from the Custom Duty Group - CompactSegments duty = coordinator.initializeCompactSegmentsDuty(newestSegmentFirstPolicy); - Assert.assertNotNull(duty); + final String compactDutyName = CompactSegments.class.getName(); + Assert.assertTrue(duties.get(3).getDutyNames().contains(compactDutyName)); + + // CompactSegments should not exist in other duty groups + Assert.assertFalse(duties.get(0).getDutyNames().contains(compactDutyName)); + Assert.assertFalse(duties.get(1).getDutyNames().contains(compactDutyName)); + Assert.assertFalse(duties.get(2).getDutyNames().contains(compactDutyName)); + + coordinator.stop(); } @Test(timeout = 3000) @@ -797,7 +826,7 @@ public void testCoordinatorCustomDutyGroupsRunAsExpected() throws Exception serverInventoryView, serviceEmitter, scheduledExecutorFactory, - null, + overlordClient, loadQueueTaskMaster, new SegmentLoadQueueManager(serverInventoryView, loadQueueTaskMaster), new LatchableServiceAnnouncer(leaderAnnouncerLatch, leaderUnannouncerLatch), diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/BalanceSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/BalanceSegmentsTest.java index bd2f00b74e24..e4f3c416411b 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/BalanceSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/BalanceSegmentsTest.java @@ -326,7 +326,7 @@ private DruidCoordinatorRuntimeParams.Builder defaultRuntimeParamsBuilder( ) { return DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster(DruidCluster.builder().addTier("normal", servers).build()) .withUsedSegments(allSegments) .withBroadcastDatasources(broadcastDatasources) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStatsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStatsTest.java deleted file mode 100644 index bb6d0406c699..000000000000 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/CollectSegmentAndServerStatsTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.server.coordinator.duty; - -import com.google.common.collect.ImmutableMap; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.server.coordinator.DruidCluster; -import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; -import org.apache.druid.server.coordinator.balancer.RandomBalancerStrategy; -import org.apache.druid.server.coordinator.loading.LoadQueueTaskMaster; -import org.apache.druid.server.coordinator.loading.SegmentLoadQueueManager; -import org.apache.druid.server.coordinator.loading.TestLoadQueuePeon; -import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; -import org.apache.druid.server.coordinator.stats.Stats; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; - -@RunWith(MockitoJUnitRunner.class) -public class CollectSegmentAndServerStatsTest -{ - @Mock - private LoadQueueTaskMaster mockTaskMaster; - - @Test - public void testCollectedSegmentStats() - { - DruidCoordinatorRuntimeParams runtimeParams = - DruidCoordinatorRuntimeParams.newBuilder(DateTimes.nowUtc()) - .withDruidCluster(DruidCluster.EMPTY) - .withUsedSegments() - .withBalancerStrategy(new RandomBalancerStrategy()) - .withSegmentAssignerUsing(new SegmentLoadQueueManager(null, null)) - .build(); - - Mockito.when(mockTaskMaster.getAllPeons()) - .thenReturn(ImmutableMap.of("server1", new TestLoadQueuePeon())); - - CoordinatorDuty duty = new CollectSegmentAndServerStats(mockTaskMaster); - DruidCoordinatorRuntimeParams params = duty.run(runtimeParams); - - CoordinatorRunStats stats = params.getCoordinatorStats(); - Assert.assertTrue(stats.hasStat(Stats.SegmentQueue.NUM_TO_LOAD)); - Assert.assertTrue(stats.hasStat(Stats.SegmentQueue.NUM_TO_DROP)); - Assert.assertTrue(stats.hasStat(Stats.SegmentQueue.LOAD_RATE_KBPS)); - } - -} diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java index eb03acc48bfa..fb71c08bfd97 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/CompactSegmentsTest.java @@ -1831,7 +1831,7 @@ private CoordinatorRunStats doCompactSegments( ) { DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDataSourcesSnapshot(dataSources) .withCompactionConfig( new DruidCompactionConfig( diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillStalePendingSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillStalePendingSegmentsTest.java index 11ea5bd4b57b..03dcc94bd69e 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillStalePendingSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillStalePendingSegmentsTest.java @@ -174,7 +174,7 @@ public void testPendingSegmentOfDisallowedDatasourceIsNotDeleted() private DruidCoordinatorRuntimeParams.Builder createParamsWithDatasources(String... datasources) { - DruidCoordinatorRuntimeParams.Builder builder = DruidCoordinatorRuntimeParams.newBuilder(DateTimes.nowUtc()); + DruidCoordinatorRuntimeParams.Builder builder = DruidCoordinatorRuntimeParams.builder(); // Create a dummy for each of the datasources so that they get added to the timeline Set usedSegments = new HashSet<>(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index f4e8cdb3cd4b..b1afbfd41a53 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -130,7 +130,8 @@ public void setup() .withBufferPeriod(Duration.standardSeconds(1)); dynamicConfigBuilder = CoordinatorDynamicConfig.builder() .withKillTaskSlotRatio(1.0); - paramsBuilder = DruidCoordinatorRuntimeParams.newBuilder(DateTimes.nowUtc()); + paramsBuilder = DruidCoordinatorRuntimeParams.builder() + .withUsedSegments(Collections.emptySet()); } @Test diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnusedTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnusedTest.java index d6dd4f982068..76a4bbbedd65 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnusedTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkEternityTombstonesAsUnusedTest.java @@ -463,7 +463,7 @@ private DruidCoordinatorRuntimeParams initializeServerAndGetParams(final Immutab .build(); final DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDataSourcesSnapshot( segmentsMetadataManager.getSnapshotOfDataSourcesWithAllUsedSegments() ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnusedTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnusedTest.java index d7cbf7773c92..43cad2391f25 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnusedTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnusedTest.java @@ -90,7 +90,7 @@ public void testRun(String serverType) .build(); DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDataSourcesSnapshot( segmentsMetadataManager.getSnapshotOfDataSourcesWithAllUsedSegments() ) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/RunRulesTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/RunRulesTest.java index a284a27c0071..774c0699b07b 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/RunRulesTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/RunRulesTest.java @@ -99,7 +99,7 @@ public void setUp() emitter = new StubServiceEmitter("coordinator", "host"); EmittingLogger.registerEmitter(emitter); databaseRuleManager = EasyMock.createMock(MetadataRuleManager.class); - ruleRunner = new RunRules(Set::size); + ruleRunner = new RunRules(Set::size, databaseRuleManager::getRulesWithDefault); loadQueueManager = new SegmentLoadQueueManager(null, null); balancerExecutor = MoreExecutors.listeningDecorator(Execs.multiThreaded(1, "RunRulesTest-%d")); } @@ -334,10 +334,9 @@ private DruidCoordinatorRuntimeParams.Builder createCoordinatorRuntimeParams( ) { return DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc().minusDays(1)) + .builder() .withDruidCluster(druidCluster) - .withUsedSegments(dataSegments) - .withDatabaseRuleManager(databaseRuleManager); + .withUsedSegments(dataSegments); } /** @@ -1014,7 +1013,7 @@ public void testRulesRunOnNonOvershadowedSegmentsOnly() Assert.assertFalse(stats.hasStat(Stats.Segments.DROPPED)); Assert.assertEquals(2, usedSegments.size()); - Assert.assertEquals(usedSegments, params.getUsedSegments()); + Assert.assertEquals(usedSegments, params.getUsedSegmentsNewestFirst()); EasyMock.verify(mockPeon); } diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegmentsTest.java index db0af054e60a..131750d9581b 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/UnloadUnusedSegmentsTest.java @@ -243,7 +243,7 @@ public void test_unloadUnusedSegmentsFromAllServers() Set usedSegments = ImmutableSet.of(segment2); DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster( DruidCluster .builder() @@ -261,10 +261,9 @@ public void test_unloadUnusedSegmentsFromAllServers() ) .withUsedSegments(usedSegments) .withBroadcastDatasources(Collections.singleton(broadcastDatasource)) - .withDatabaseRuleManager(databaseRuleManager) .build(); - params = new UnloadUnusedSegments(loadQueueManager).run(params); + params = new UnloadUnusedSegments(loadQueueManager, databaseRuleManager::getRulesWithDefault).run(params); CoordinatorRunStats stats = params.getCoordinatorStats(); // We drop segment1 and broadcast1 from all servers, realtimeSegment is not dropped by the indexer diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java index 46e62fe14321..10f7fa1c1462 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/BroadcastDistributionRuleTest.java @@ -20,7 +20,6 @@ package org.apache.druid.server.coordinator.rules; import org.apache.druid.client.DruidServer; -import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.server.coordination.ServerType; import org.apache.druid.server.coordinator.CreateDataSegments; @@ -225,7 +224,7 @@ private CoordinatorRunStats runRuleOnSegment( { StrategicSegmentAssigner segmentAssigner = params.getSegmentAssigner(); rule.run(segment, segmentAssigner); - return segmentAssigner.getStats(); + return params.getCoordinatorStats(); } private DruidCoordinatorRuntimeParams makeParamsWithUsedSegments( @@ -234,7 +233,7 @@ private DruidCoordinatorRuntimeParams makeParamsWithUsedSegments( ) { return DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster(druidCluster) .withUsedSegments(usedSegments) .withBalancerStrategy(new RandomBalancerStrategy()) diff --git a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java index 1e43d89bdda4..8e220cd9f14f 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/rules/LoadRuleTest.java @@ -143,7 +143,7 @@ private DruidCoordinatorRuntimeParams makeCoordinatorRuntimeParams( ) { return DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster(druidCluster) .withBalancerStrategy(balancerStrategy) .withUsedSegments(usedSegments) @@ -334,7 +334,7 @@ public void testMaxLoadingQueueSize() DataSegment dataSegment3 = createDataSegment("ds3"); DruidCoordinatorRuntimeParams params = DruidCoordinatorRuntimeParams - .newBuilder(DateTimes.nowUtc()) + .builder() .withDruidCluster(druidCluster) .withBalancerStrategy(balancerStrategy) .withUsedSegments(dataSegment1, dataSegment2, dataSegment3) From 0d2e61c9a3b859665fb96176e7652513ff4d4f38 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sun, 25 Aug 2024 19:16:31 +0530 Subject: [PATCH 2/6] Remove some more logs --- .../common/actions/TaskActionTestKit.java | 10 ++--- .../common/task/IngestionTestBase.java | 5 ++- .../metadata/SqlSegmentsMetadataManager.java | 43 +++++++++---------- .../SqlSegmentsMetadataManagerProvider.java | 9 +++- .../server/coordinator/DruidCoordinator.java | 5 +-- .../balancer/TierSegmentBalancer.java | 8 ++-- .../coordinator/duty/BalanceSegments.java | 23 +++------- .../coordinator/duty/CompactSegments.java | 8 +--- .../duty/CoordinatorDutyGroup.java | 2 +- .../duty/KillCompactionConfig.java | 2 - .../MarkOvershadowedSegmentsAsUnused.java | 8 +--- .../coordinator/duty/MetadataCleanupDuty.java | 4 +- .../duty/PrepareBalancerAndLoadQueues.java | 2 +- .../druid/server/coordinator/stats/Stats.java | 5 ++- ...qlSegmentsMetadataManagerProviderTest.java | 4 +- ...SegmentsMetadataManagerSchemaPollTest.java | 9 ++-- .../SqlSegmentsMetadataManagerTest.java | 8 ++-- .../duty/KillUnusedSegmentsTest.java | 4 +- .../server/metrics/NoopServiceEmitter.java | 7 +++ 19 files changed, 84 insertions(+), 82 deletions(-) diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index ed9b0e501fda..c1c949fdadf5 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -104,16 +104,16 @@ public int getSqlMetadataMaxRetry() } }; taskLockbox = new TaskLockbox(taskStorage, metadataStorageCoordinator); - segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); + segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, Suppliers.ofInstance(new SegmentsMetadataManagerConfig()), Suppliers.ofInstance(metadataStorageTablesConfig), testDerbyConnector, segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); - final ServiceEmitter noopEmitter = new NoopServiceEmitter(); final TaskLockConfig taskLockConfig = new TaskLockConfig() { @Override @@ -137,10 +137,10 @@ public long getBatchAllocationWaitTime() taskLockbox, taskLockConfig, metadataStorageCoordinator, - noopEmitter, + NoopServiceEmitter.instance(), ScheduledExecutors::fixed ), - noopEmitter, + NoopServiceEmitter.instance(), EasyMock.createMock(SupervisorManager.class), objectMapper ); diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java index 67a0c518f57c..00e3733f6e4f 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IngestionTestBase.java @@ -155,14 +155,15 @@ public void setUpIngestionTestBase() throws IOException segmentSchemaManager, CentralizedDatasourceSchemaConfig.create() ); - segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); + segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentsMetadataManager = new SqlSegmentsMetadataManager( objectMapper, SegmentsMetadataManagerConfig::new, derbyConnectorRule.metadataTablesConfigSupplier(), derbyConnectorRule.getConnector(), segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); lockbox = new TaskLockbox(taskStorage, storageCoordinator); segmentCacheManagerFactory = new SegmentCacheManagerFactory(TestIndex.INDEX_IO, getObjectMapper()); diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 2c81603e529c..e326034de81c 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -50,6 +50,8 @@ import org.apache.druid.java.util.common.lifecycle.LifecycleStop; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; +import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.segment.SchemaPayload; import org.apache.druid.segment.SegmentMetadata; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; @@ -164,6 +166,7 @@ long nanosElapsedFromInitiation() private final Supplier dbTables; private final SQLMetadataConnector connector; private final SegmentSchemaCache segmentSchemaCache; + private final ServiceEmitter serviceEmitter; private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; /** @@ -251,7 +254,8 @@ public SqlSegmentsMetadataManager( Supplier dbTables, SQLMetadataConnector connector, SegmentSchemaCache segmentSchemaCache, - CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig, + ServiceEmitter serviceEmitter ) { this.jsonMapper = jsonMapper; @@ -260,6 +264,7 @@ public SqlSegmentsMetadataManager( this.connector = connector; this.segmentSchemaCache = segmentSchemaCache; this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; + this.serviceEmitter = serviceEmitter; } /** @@ -639,7 +644,7 @@ public boolean markSegmentAsUsed(final String segmentId) { try { int numUpdatedDatabaseEntries = connector.getDBI().withHandle( - (Handle handle) -> handle + handle -> handle .createStatement(StringUtils.format("UPDATE %s SET used=true, used_status_last_updated = :used_status_last_updated WHERE id = :id", getSegmentsTable())) .bind("id", segmentId) .bind("used_status_last_updated", DateTimes.nowUtc().toString()) @@ -650,7 +655,7 @@ public boolean markSegmentAsUsed(final String segmentId) // segment into the respective data source, because we don't have it fetched from the database. It's probably not // worth complicating the implementation and making two database queries just to add the segment because it will // be anyway fetched during the next poll(). Segment putting that is done in the bulk markAsUsed methods is a nice - // to have thing, but doesn't formally affects the external guarantees of SegmentsMetadataManager class. + // to have thing, but doesn't formally affect the external guarantees of SegmentsMetadataManager class. return numUpdatedDatabaseEntries > 0; } catch (RuntimeException e) { @@ -1031,7 +1036,6 @@ private void doPoll() private void doPollSegments() { final Stopwatch stopwatch = Stopwatch.createStarted(); - log.info("Starting polling of segment table."); // Some databases such as PostgreSQL require auto-commit turned off // to stream results back, enabling transactions disables auto-commit @@ -1060,14 +1064,11 @@ private void doPollSegments() "Unexpected 'null' when polling segments from the db, aborting snapshot update." ); - if (segments.isEmpty()) { - log.info("No segments found in the database!"); - } else { - log.info( - "Polled and found [%,d] segments in the database in [%,d] ms.", - segments.size(), stopwatch.millisElapsed() - ); - } + log.debug( + "Polled and found [%,d] segments in the database in [%,d] ms.", + segments.size(), stopwatch.millisElapsed() + ); + emitMetric("segment/poll/time", stopwatch.millisElapsed()); createDatasourcesSnapshot(segments); } @@ -1075,7 +1076,6 @@ private void doPollSegments() private void doPollSegmentAndSchema() { final Stopwatch stopwatch = Stopwatch.createStarted(); - log.info("Starting polling of segment and schema table."); ImmutableMap.Builder segmentMetadataBuilder = new ImmutableMap.Builder<>(); @@ -1166,18 +1166,16 @@ public List inTransaction(Handle handle, TransactionStatus status) "Unexpected 'null' when polling segments from the db, aborting snapshot update." ); - if (segments.isEmpty() && schemaMap.isEmpty()) { - log.info("No segments found in the database!"); - } else { - log.info( - "Polled and found [%,d] segments and [%,d] schemas in the database in [%,d] ms.", - segments.size(), schemaMap.size(), stopwatch.millisElapsed() - ); - } + emitMetric("segment/pollWithSchema/time", stopwatch.millisElapsed()); createDatasourcesSnapshot(segments); } + private void emitMetric(String metricName, long value) + { + serviceEmitter.emit(new ServiceMetricEvent.Builder().setMetric(metricName, value)); + } + private void createDatasourcesSnapshot(List segments) { final Stopwatch stopwatch = Stopwatch.createStarted(); @@ -1195,10 +1193,11 @@ private void createDatasourcesSnapshot(List segments) Iterables.filter(segments, Objects::nonNull), // Filter corrupted entries (see above in this method). dataSourceProperties ); - log.info( + log.debug( "Successfully created snapshot from polled segments in [%d] ms. Found [%d] overshadowed segments.", stopwatch.millisElapsed(), dataSourcesSnapshot.getOvershadowedSegments().size() ); + emitMetric("segment/buildSnapshot/time", stopwatch.millisElapsed()); } private static ImmutableMap createDefaultDataSourceProperties() diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProvider.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProvider.java index c580c9ad3abd..35add5751159 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProvider.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProvider.java @@ -23,6 +23,7 @@ import com.google.common.base.Supplier; import com.google.inject.Inject; import org.apache.druid.java.util.common.lifecycle.Lifecycle; +import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; @@ -33,6 +34,7 @@ public class SqlSegmentsMetadataManagerProvider implements SegmentsMetadataManag private final Supplier storageConfig; private final SQLMetadataConnector connector; private final Lifecycle lifecycle; + private final ServiceEmitter serviceEmitter; private final SegmentSchemaCache segmentSchemaCache; private final CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig; @@ -44,7 +46,8 @@ public SqlSegmentsMetadataManagerProvider( SQLMetadataConnector connector, Lifecycle lifecycle, SegmentSchemaCache segmentSchemaCache, - CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig + CentralizedDatasourceSchemaConfig centralizedDatasourceSchemaConfig, + ServiceEmitter serviceEmitter ) { this.jsonMapper = jsonMapper; @@ -52,6 +55,7 @@ public SqlSegmentsMetadataManagerProvider( this.storageConfig = storageConfig; this.connector = connector; this.lifecycle = lifecycle; + this.serviceEmitter = serviceEmitter; this.segmentSchemaCache = segmentSchemaCache; this.centralizedDatasourceSchemaConfig = centralizedDatasourceSchemaConfig; } @@ -84,7 +88,8 @@ public void stop() storageConfig, connector, segmentSchemaCache, - centralizedDatasourceSchemaConfig + centralizedDatasourceSchemaConfig, + serviceEmitter ); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java index 4d1d6335fcae..b3301f44609a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java @@ -464,10 +464,9 @@ private void becomeLeader() ); } - log.info( + log.warn( "Created [%d] duty groups. DUTY RUNS WILL NOT BE LOGGED." - + " Use API '/druid/coordinator/v1/duties' to get duty status or set dynamic config" - + " 'debugDimensions' on API '/druid/coordinator/v1/config' to log more details.", + + " Use API '/druid/coordinator/v1/duties' to get current status.", dutiesRunnables.size() ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java index 9cb52596f688..36be8a61de93 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/balancer/TierSegmentBalancer.java @@ -20,7 +20,7 @@ package org.apache.druid.server.coordinator.balancer; import org.apache.druid.client.ImmutableDruidDataSource; -import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.ServerHolder; @@ -49,7 +49,7 @@ */ public class TierSegmentBalancer { - private static final EmittingLogger log = new EmittingLogger(TierSegmentBalancer.class); + private static final Logger log = new Logger(TierSegmentBalancer.class); private final String tier; private final DruidCoordinatorRuntimeParams params; @@ -127,10 +127,10 @@ private void moveSegmentsFrom( ); movedCount += moveSegmentsTo(activeServers, pickedSegments, numLoadedSegmentsToMove); } else { - log.info("There are already [%,d] segments moving in tier[%s].", movingSegmentCount, tier); + log.debug("There are already [%,d] segments moving in tier[%s].", movingSegmentCount, tier); } - log.info( + log.debug( "Moved [%,d of %,d] segments from [%d] [%s] servers in tier [%s].", movedCount, numSegmentsToMove, sourceServers.size(), sourceServerType, tier ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java index c13bcb255f28..c27f95a002de 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/BalanceSegments.java @@ -20,25 +20,25 @@ package org.apache.druid.server.coordinator.duty; import org.apache.druid.java.util.common.Pair; -import org.apache.druid.java.util.emitter.EmittingLogger; +import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.server.coordinator.CoordinatorDynamicConfig; import org.apache.druid.server.coordinator.DruidCluster; import org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams; import org.apache.druid.server.coordinator.ServerHolder; import org.apache.druid.server.coordinator.balancer.SegmentToMoveCalculator; import org.apache.druid.server.coordinator.balancer.TierSegmentBalancer; -import org.apache.druid.server.coordinator.loading.SegmentLoadingConfig; import org.apache.druid.server.coordinator.stats.CoordinatorRunStats; +import org.apache.druid.server.coordinator.stats.Stats; import org.joda.time.Duration; import java.util.Set; /** - * + * Coordinator Duty to balance segments across Historicals. */ public class BalanceSegments implements CoordinatorDuty { - private static final EmittingLogger log = new EmittingLogger(BalanceSegments.class); + private static final Logger log = new Logger(BalanceSegments.class); private final Duration coordinatorPeriod; @@ -51,25 +51,16 @@ public BalanceSegments(Duration coordinatorPeriod) public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { if (params.getUsedSegmentCount() <= 0) { - log.debug("Skipping balance as there are no used segments."); return params; } - final DruidCluster cluster = params.getDruidCluster(); - final SegmentLoadingConfig loadingConfig = params.getSegmentLoadingConfig(); - final int maxSegmentsToMove = getMaxSegmentsToMove(params); + params.getCoordinatorStats().add(Stats.Balancer.MAX_TO_MOVE, maxSegmentsToMove); if (maxSegmentsToMove <= 0) { - log.debug("Skipping balance as maxSegmentsToMove is [%d].", maxSegmentsToMove); return params; - } else { - log.debug( - "Balancing segments in tiers [%s] with maxSegmentsToMove[%,d] and maxLifetime[%d].", - cluster.getTierNames(), maxSegmentsToMove, loadingConfig.getMaxLifetimeInLoadQueue() - ); } - cluster.getHistoricals().forEach( + params.getDruidCluster().getHistoricals().forEach( (tier, servers) -> new TierSegmentBalancer(tier, servers, maxSegmentsToMove, params).run() ); @@ -97,7 +88,7 @@ private int getMaxSegmentsToMove(DruidCoordinatorRuntimeParams params) final int numBalancerThreads = params.getSegmentLoadingConfig().getBalancerComputeThreads(); final int maxSegmentsToMove = SegmentToMoveCalculator .computeMaxSegmentsToMovePerTier(totalSegmentsInCluster, numBalancerThreads, coordinatorPeriod); - log.info( + log.debug( "Computed maxSegmentsToMove[%,d] for total [%,d] segments on [%d] historicals.", maxSegmentsToMove, totalSegmentsInCluster, numHistoricalsAndSegments.lhs ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java index 7b2392b8c66c..a2dbd1f40519 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java @@ -118,19 +118,15 @@ public OverlordClient getOverlordClient() @Override public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { - LOG.info("Running CompactSegments duty"); - final DruidCompactionConfig dynamicConfig = params.getCompactionConfig(); final int maxCompactionTaskSlots = dynamicConfig.getMaxCompactionTaskSlots(); if (maxCompactionTaskSlots <= 0) { - LOG.info("Skipping compaction as maxCompactionTaskSlots is [%d].", maxCompactionTaskSlots); resetCompactionSnapshot(); return params; } List compactionConfigList = dynamicConfig.getCompactionConfigs(); if (compactionConfigList == null || compactionConfigList.isEmpty()) { - LOG.info("Skipping compaction as compaction config list is empty."); resetCompactionSnapshot(); return params; } @@ -365,7 +361,7 @@ private int getAvailableCompactionTaskSlots(int compactionTaskCapacity, int busy // compaction is enabled and estimatedIncompleteCompactionTasks is 0. availableCompactionTaskSlots = Math.max(1, compactionTaskCapacity); } - LOG.info( + LOG.debug( "Found [%d] available task slots for compaction out of max compaction task capacity [%d]", availableCompactionTaskSlots, compactionTaskCapacity ); @@ -522,7 +518,7 @@ private int submitCompactionTasks( new ClientCompactionRunnerInfo(compactionEngine) ); - LOG.info( + LOG.debug( "Submitted a compaction task[%s] for [%d] segments in datasource[%s], umbrella interval[%s].", taskId, segmentsToCompact.size(), dataSourceName, entry.getUmbrellaInterval() ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java index 6e9bd1220f4b..642744ae878a 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CoordinatorDutyGroup.java @@ -69,7 +69,7 @@ public CoordinatorDutyGroup( this.dutyNames = duties.stream().map(duty -> duty.getClass().getName()).collect(Collectors.toList()); this.coordinator = coordinator; - log.info("Scheduled run of duty group[%s] at period[%s] with duties[%s].", name, period, dutyNames); + log.info("Created dutyGroup[%s] with period[%s] and duties[%s].", name, period, dutyNames); } public String getName() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillCompactionConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillCompactionConfig.java index 7395be6d8434..d2dc33365a0d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillCompactionConfig.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillCompactionConfig.java @@ -88,7 +88,6 @@ private DruidCompactionConfig deleteConfigsForInactiveDatasources( { // If current compaction config is empty then there is nothing to do if (DruidCompactionConfig.empty().equals(current)) { - log.info("Nothing to do as compaction config is already empty."); return current; } @@ -134,7 +133,6 @@ private int tryDeleteCompactionConfigs() throws RetryableException if (result.isOk()) { return compactionConfigRemoved.get(); } else if (result.isRetryable()) { - log.debug("Retrying KillCompactionConfig duty"); throw new RetryableException(result.getException()); } else { log.error(result.getException(), "Failed to kill compaction configurations"); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java index 656af062732b..6e3c38839766 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java @@ -33,7 +33,6 @@ import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.SegmentTimeline; -import org.joda.time.DateTime; import org.joda.time.Duration; import java.util.HashMap; @@ -73,16 +72,11 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) params.getCoordinatorDynamicConfig().getMarkSegmentAsUnusedDelayMillis() ); if (sinceCoordinatorStarted.hasNotElapsed(requiredDelay)) { - log.info( - "Skipping MarkAsUnused as only [%s ms] have elapsed since Coordinator start. Required delay[%s].", - sinceCoordinatorStarted.millisElapsed(), requiredDelay - ); return params; } final Set allOvershadowedSegments = params.getDataSourcesSnapshot().getOvershadowedSegments(); if (allOvershadowedSegments.isEmpty()) { - log.info("Skipping MarkAsUnused as there are no overshadowed segments."); return params; } @@ -125,7 +119,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) stats.add(Stats.Segments.OVERSHADOWED, datasourceKey, unusedSegments.size()); int updatedCount = deleteHandler.markSegmentsAsUnused(unusedSegments); - log.info("Successfully marked [%d] segments of datasource[%s] as unused.", updatedCount, datasource); + log.info("Marked [%d] segments of datasource[%s] as unused.", updatedCount, datasource); } ); diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataCleanupDuty.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataCleanupDuty.java index e24f93966895..47b7f34c276d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataCleanupDuty.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MetadataCleanupDuty.java @@ -82,7 +82,9 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) try { DateTime minCreatedTime = now.minus(cleanupConfig.getDurationToRetain()); int deletedEntries = cleanupEntriesCreatedBefore(minCreatedTime); - log.info("Removed [%,d] [%s] created before [%s].", deletedEntries, entryType, minCreatedTime); + if (deletedEntries > 0) { + log.info("Removed [%,d] [%s] created before [%s].", deletedEntries, entryType, minCreatedTime); + } params.getCoordinatorStats().add(cleanupCountStat, deletedEntries); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index 97b2235acb60..053e57ddafef 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -199,7 +199,7 @@ private void collectUsedSegmentStats(DruidCoordinatorRuntimeParams params, Coord private void collectDebugStats(SegmentLoadingConfig config, CoordinatorRunStats stats) { - stats.add(Stats.Segments.BALANCER_THREADS, config.getBalancerComputeThreads()); + stats.add(Stats.Balancer.COMPUTE_THREADS, config.getBalancerComputeThreads()); stats.add(Stats.Segments.REPLICATION_THROTTLE_LIMIT, config.getReplicationThrottleLimit()); } } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java index 3f62c0734eea..959ef24d3935 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/stats/Stats.java @@ -63,8 +63,6 @@ public static class Segments = CoordinatorStat.toDebugAndEmit("unneededEternityTombstone", "segment/unneededEternityTombstone/count"); // Values computed in a run - public static final CoordinatorStat BALANCER_THREADS - = CoordinatorStat.toDebugOnly("numBalancerThreads"); public static final CoordinatorStat REPLICATION_THROTTLE_LIMIT = CoordinatorStat.toDebugOnly("replicationThrottleLimit"); } @@ -176,5 +174,8 @@ public static class Balancer ); public static final CoordinatorStat COMPUTATION_TIME = CoordinatorStat.toDebugOnly("costComputeTime"); public static final CoordinatorStat COMPUTATION_COUNT = CoordinatorStat.toDebugOnly("costComputeCount"); + + public static final CoordinatorStat COMPUTE_THREADS = CoordinatorStat.toDebugOnly("balancerComputeThreads"); + public static final CoordinatorStat MAX_TO_MOVE = CoordinatorStat.toDebugOnly("maxToMove"); } } diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java index 1b172d17beeb..356116c4f90c 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; import org.apache.druid.java.util.common.lifecycle.Lifecycle; +import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; @@ -54,7 +55,8 @@ public void testLifecycleStartCreatesSegmentTables() throws Exception connector, lifecycle, segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + new StubServiceEmitter() ); SegmentsMetadataManager manager = provider.get(); Assert.assertTrue(manager instanceof SqlSegmentsMetadataManager); diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java index 18095305ad91..b6d62b000b14 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerSchemaPollTest.java @@ -70,7 +70,8 @@ public void setUp() throws Exception derbyConnectorRule.metadataTablesConfigSupplier(), connector, segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); storageConfig = derbyConnectorRule.metadataTablesConfigSupplier().get(); @@ -137,7 +138,8 @@ public void testPollSegmentAndSchema() derbyConnectorRule.metadataTablesConfigSupplier(), connector, segmentSchemaCache, - centralizedDatasourceSchemaConfig + centralizedDatasourceSchemaConfig, + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); @@ -225,7 +227,8 @@ public void testPollOnlyNewSchemaVersion() derbyConnectorRule.metadataTablesConfigSupplier(), connector, segmentSchemaCache, - centralizedDatasourceSchemaConfig + centralizedDatasourceSchemaConfig, + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java index d101a7a74b47..73673ace7cce 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerTest.java @@ -119,7 +119,7 @@ public void setUp() config.setPollDuration(Period.seconds(3)); storageConfig = derbyConnectorRule.metadataTablesConfigSupplier().get(); - segmentSchemaCache = new SegmentSchemaCache(new NoopServiceEmitter()); + segmentSchemaCache = new SegmentSchemaCache(NoopServiceEmitter.instance()); segmentSchemaManager = new SegmentSchemaManager( derbyConnectorRule.metadataTablesConfigSupplier().get(), jsonMapper, @@ -132,7 +132,8 @@ public void setUp() derbyConnectorRule.metadataTablesConfigSupplier(), connector, segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); @@ -1343,7 +1344,8 @@ public void testIterateAllUsedNonOvershadowedSegmentsForDatasourceInterval() thr derbyConnectorRule.metadataTablesConfigSupplier(), derbyConnectorRule.getConnector(), segmentSchemaCache, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java index b1afbfd41a53..ce65f30ede75 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java @@ -51,6 +51,7 @@ import org.apache.druid.server.coordinator.stats.Dimension; import org.apache.druid.server.coordinator.stats.RowKey; import org.apache.druid.server.coordinator.stats.Stats; +import org.apache.druid.server.metrics.NoopServiceEmitter; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.NoneShardSpec; import org.joda.time.DateTime; @@ -115,7 +116,8 @@ public void setup() derbyConnectorRule.metadataTablesConfigSupplier(), connector, null, - CentralizedDatasourceSchemaConfig.create() + CentralizedDatasourceSchemaConfig.create(), + NoopServiceEmitter.instance() ); sqlSegmentsMetadataManager.start(); diff --git a/server/src/test/java/org/apache/druid/server/metrics/NoopServiceEmitter.java b/server/src/test/java/org/apache/druid/server/metrics/NoopServiceEmitter.java index fe2a084cb49a..745b365415ab 100644 --- a/server/src/test/java/org/apache/druid/server/metrics/NoopServiceEmitter.java +++ b/server/src/test/java/org/apache/druid/server/metrics/NoopServiceEmitter.java @@ -24,6 +24,13 @@ public class NoopServiceEmitter extends ServiceEmitter { + private static final NoopServiceEmitter INSTANCE = new NoopServiceEmitter(); + + public static NoopServiceEmitter instance() + { + return INSTANCE; + } + public NoopServiceEmitter() { super("", "", null); From 83aa280fe38686c4bf256c736460b727570eb410 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sun, 25 Aug 2024 19:55:29 +0530 Subject: [PATCH 3/6] Add tests --- .../common/actions/TaskActionTestKit.java | 1 - .../server/http/CoordinatorDutyStatus.java | 41 ++++++++++++++++ .../server/http/CoordinatorResource.java | 2 +- ...qlSegmentsMetadataManagerProviderTest.java | 3 +- .../coordinator/DruidCoordinatorTest.java | 12 +++-- .../simulate/SegmentLoadingTest.java | 2 +- .../server/http/CoordinatorResourceTest.java | 47 +++++++++++++++++++ 7 files changed, 100 insertions(+), 8 deletions(-) create mode 100644 server/src/main/java/org/apache/druid/server/http/CoordinatorDutyStatus.java diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java index c1c949fdadf5..fcbf37c956da 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/TaskActionTestKit.java @@ -30,7 +30,6 @@ import org.apache.druid.indexing.overlord.config.TaskLockConfig; import org.apache.druid.indexing.overlord.supervisor.SupervisorManager; import org.apache.druid.java.util.common.concurrent.ScheduledExecutors; -import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.MetadataStorageConnectorConfig; import org.apache.druid.metadata.MetadataStorageTablesConfig; diff --git a/server/src/main/java/org/apache/druid/server/http/CoordinatorDutyStatus.java b/server/src/main/java/org/apache/druid/server/http/CoordinatorDutyStatus.java new file mode 100644 index 000000000000..a7b8cdf7c4a0 --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/http/CoordinatorDutyStatus.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.http; + +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.server.coordinator.duty.DutyGroupStatus; + +import java.util.List; + +public class CoordinatorDutyStatus +{ + private final List dutyGroups; + + public CoordinatorDutyStatus(List dutyGroups) + { + this.dutyGroups = dutyGroups; + } + + @JsonProperty + public List getDutyGroups() + { + return dutyGroups; + } +} diff --git a/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java b/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java index e73b61d03d6b..1f61464bafbf 100644 --- a/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java +++ b/server/src/main/java/org/apache/druid/server/http/CoordinatorResource.java @@ -156,6 +156,6 @@ public Response getLoadQueue( @Produces(MediaType.APPLICATION_JSON) public Response getStatusOfDuties() { - return Response.ok(coordinator.getStatusOfDuties()).build(); + return Response.ok(new CoordinatorDutyStatus(coordinator.getStatusOfDuties())).build(); } } diff --git a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java index 356116c4f90c..33ee627bef4e 100644 --- a/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java +++ b/server/src/test/java/org/apache/druid/metadata/SqlSegmentsMetadataManagerProviderTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Suppliers; import org.apache.druid.java.util.common.lifecycle.Lifecycle; -import org.apache.druid.java.util.metrics.StubServiceEmitter; import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.metadata.CentralizedDatasourceSchemaConfig; import org.apache.druid.segment.metadata.SegmentSchemaCache; @@ -56,7 +55,7 @@ public void testLifecycleStartCreatesSegmentTables() throws Exception lifecycle, segmentSchemaCache, CentralizedDatasourceSchemaConfig.create(), - new StubServiceEmitter() + NoopServiceEmitter.instance() ); SegmentsMetadataManager manager = provider.get(); Assert.assertTrue(manager instanceof SqlSegmentsMetadataManager); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java index 0d3894fd536d..94b619dfdc29 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/DruidCoordinatorTest.java @@ -608,7 +608,9 @@ public void testComputeUnderReplicationCountsPerDataSourcePerTierForSegmentsWith @Test public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() { - EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()) + .andReturn(true).anyTimes(); + EasyMock.replay(segmentsMetadataManager); CoordinatorCustomDutyGroups emptyCustomDutyGroups = new CoordinatorCustomDutyGroups(ImmutableSet.of()); coordinator = new DruidCoordinator( @@ -652,7 +654,9 @@ public void testCompactSegmentsDutyWhenCustomDutyGroupEmpty() @Test public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsCompactSegments() { - EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()) + .andReturn(true).anyTimes(); + EasyMock.replay(segmentsMetadataManager); CoordinatorCustomDutyGroup group = new CoordinatorCustomDutyGroup( "group1", Duration.standardSeconds(1), @@ -700,7 +704,9 @@ public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupDoesNotContainsC @Test public void testInitializeCompactSegmentsDutyWhenCustomDutyGroupContainsCompactSegments() { - EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()).andReturn(true).anyTimes(); + EasyMock.expect(segmentsMetadataManager.isPollingDatabasePeriodically()) + .andReturn(true).anyTimes(); + EasyMock.replay(segmentsMetadataManager); CoordinatorCustomDutyGroup compactSegmentCustomGroup = new CoordinatorCustomDutyGroup( "group1", Duration.standardSeconds(1), diff --git a/server/src/test/java/org/apache/druid/server/coordinator/simulate/SegmentLoadingTest.java b/server/src/test/java/org/apache/druid/server/coordinator/simulate/SegmentLoadingTest.java index 4b7965e9596e..45cc2177cc30 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/simulate/SegmentLoadingTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/simulate/SegmentLoadingTest.java @@ -73,7 +73,7 @@ public void testSecondReplicaOnAnyTierIsThrottled() startSimulation(sim); runCoordinatorCycle(); - // Verify that that replicationThrottleLimit is honored + // Verify that replicationThrottleLimit is honored verifyValue(Metric.ASSIGNED_COUNT, 2L); loadQueuedSegments(); diff --git a/server/src/test/java/org/apache/druid/server/http/CoordinatorResourceTest.java b/server/src/test/java/org/apache/druid/server/http/CoordinatorResourceTest.java index 9d2b81f6aa15..fe56c5c38d1d 100644 --- a/server/src/test/java/org/apache/druid/server/http/CoordinatorResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/CoordinatorResourceTest.java @@ -20,15 +20,21 @@ package org.apache.druid.server.http; import com.google.common.collect.ImmutableMap; +import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.server.coordinator.DruidCoordinator; +import org.apache.druid.server.coordinator.duty.DutyGroupStatus; import org.apache.druid.server.coordinator.loading.TestLoadQueuePeon; import org.easymock.EasyMock; +import org.joda.time.DateTime; +import org.joda.time.Duration; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import javax.ws.rs.core.Response; +import java.util.Collections; +import java.util.List; public class CoordinatorResourceTest { @@ -99,4 +105,45 @@ public void testGetLoadStatusSimple() ); Assert.assertEquals(200, response.getStatus()); } + + @Test + public void testGetStatusOfDuties() + { + final DateTime now = DateTimes.nowUtc(); + final DutyGroupStatus dutyGroupStatus = new DutyGroupStatus( + "HistoricalManagementDuties", + Duration.standardMinutes(1), + Collections.singletonList("org.apache.druid.duty.RunRules"), + now.minusMinutes(5), + now, + 100L, + 500L + ); + + EasyMock.expect(mock.getStatusOfDuties()).andReturn( + Collections.singletonList(dutyGroupStatus) + ).once(); + EasyMock.replay(mock); + + final Response response = new CoordinatorResource(mock).getStatusOfDuties(); + Assert.assertEquals(200, response.getStatus()); + + final Object payload = response.getEntity(); + Assert.assertTrue(payload instanceof CoordinatorDutyStatus); + + final List observedDutyGroups = ((CoordinatorDutyStatus) payload).getDutyGroups(); + Assert.assertEquals(1, observedDutyGroups.size()); + + final DutyGroupStatus observedStatus = observedDutyGroups.get(0); + Assert.assertEquals("HistoricalManagementDuties", observedStatus.getName()); + Assert.assertEquals(Duration.standardMinutes(1), observedStatus.getPeriod()); + Assert.assertEquals( + Collections.singletonList("org.apache.druid.duty.RunRules"), + observedStatus.getDutyNames() + ); + Assert.assertEquals(now.minusMinutes(5), observedStatus.getLastRunStart()); + Assert.assertEquals(now, observedStatus.getLastRunEnd()); + Assert.assertEquals(100L, observedStatus.getAvgRuntimeMillis()); + Assert.assertEquals(500L, observedStatus.getAvgRunGapMillis()); + } } From 983c09f9b29068b233fe931b897e8b1ff9e2d03c Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Sun, 25 Aug 2024 20:18:12 +0530 Subject: [PATCH 4/6] Remove redundant logs --- .../metadata/SqlSegmentsMetadataManager.java | 8 -------- .../duty/PrepareBalancerAndLoadQueues.java | 7 ------- .../server/coordinator/duty/RunRules.java | 19 ++++--------------- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index e326034de81c..df1ad891d202 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -1064,10 +1064,6 @@ private void doPollSegments() "Unexpected 'null' when polling segments from the db, aborting snapshot update." ); - log.debug( - "Polled and found [%,d] segments in the database in [%,d] ms.", - segments.size(), stopwatch.millisElapsed() - ); emitMetric("segment/poll/time", stopwatch.millisElapsed()); createDatasourcesSnapshot(segments); @@ -1193,10 +1189,6 @@ private void createDatasourcesSnapshot(List segments) Iterables.filter(segments, Objects::nonNull), // Filter corrupted entries (see above in this method). dataSourceProperties ); - log.debug( - "Successfully created snapshot from polled segments in [%d] ms. Found [%d] overshadowed segments.", - stopwatch.millisElapsed(), dataSourcesSnapshot.getOvershadowedSegments().size() - ); emitMetric("segment/buildSnapshot/time", stopwatch.millisElapsed()); } diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java index 053e57ddafef..a9e926ea4f7d 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/PrepareBalancerAndLoadQueues.java @@ -133,13 +133,6 @@ private void cancelLoadsOnDecommissioningServers(DruidCluster cluster) } ); } - - if (cancelledCount.get() > 0) { - log.debug( - "Cancelled [%d] load/move operations on [%d] decommissioning servers.", - cancelledCount.get(), decommissioningServers.size() - ); - } } private List prepareCurrentServers() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java index fb4437107607..c2f8751f19ea 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/RunRules.java @@ -81,10 +81,6 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) // b) replication throttling has a smaller impact on newer segments final Set usedSegments = params.getUsedSegmentsNewestFirst(); final Set overshadowed = params.getDataSourcesSnapshot().getOvershadowedSegments(); - log.debug( - "Applying retention rules on [%,d] used segments, skipping [%,d] overshadowed segments.", - usedSegments.size(), overshadowed.size() - ); final StrategicSegmentAssigner segmentAssigner = params.getSegmentAssigner(); @@ -164,17 +160,10 @@ private void alertForInvalidRules(StrategicSegmentAssigner segmentAssigner) private Set getBroadcastDatasources(DruidCoordinatorRuntimeParams params) { - final Set broadcastDatasources = - params.getDataSourcesSnapshot().getDataSourcesMap().values().stream() - .map(ImmutableDruidDataSource::getName) - .filter(this::isBroadcastDatasource) - .collect(Collectors.toSet()); - - if (!broadcastDatasources.isEmpty()) { - log.info("Found broadcast datasources [%s] which will not participate in balancing.", broadcastDatasources); - } - - return broadcastDatasources; + return params.getDataSourcesSnapshot().getDataSourcesMap().values().stream() + .map(ImmutableDruidDataSource::getName) + .filter(this::isBroadcastDatasource) + .collect(Collectors.toSet()); } /** From 5d2eb872c4ceb53f300c47a6ccc99acd593b0e72 Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Mon, 2 Sep 2024 13:35:48 +0530 Subject: [PATCH 5/6] Retain some logs --- .../druid/metadata/SqlSegmentsMetadataManager.java | 14 ++++++++++++++ .../duty/MarkOvershadowedSegmentsAsUnused.java | 6 +++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java index 8daddbf76914..12327785b924 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataManager.java @@ -1064,7 +1064,12 @@ private void doPollSegments() "Unexpected 'null' when polling segments from the db, aborting snapshot update." ); + stopwatch.stop(); emitMetric("segment/poll/time", stopwatch.millisElapsed()); + log.info( + "Polled and found [%,d] segments in the database in [%,d]ms.", + segments.size(), stopwatch.millisElapsed() + ); createDatasourcesSnapshot(segments); } @@ -1162,7 +1167,12 @@ public List inTransaction(Handle handle, TransactionStatus status) "Unexpected 'null' when polling segments from the db, aborting snapshot update." ); + stopwatch.stop(); emitMetric("segment/pollWithSchema/time", stopwatch.millisElapsed()); + log.info( + "Polled and found [%,d] segments and [%,d] schemas in the database in [%,d]ms.", + segments.size(), schemaMap.size(), stopwatch.millisElapsed() + ); createDatasourcesSnapshot(segments); } @@ -1190,6 +1200,10 @@ private void createDatasourcesSnapshot(List segments) dataSourceProperties ); emitMetric("segment/buildSnapshot/time", stopwatch.millisElapsed()); + log.debug( + "Created snapshot from polled segments in [%d]ms. Found [%d] overshadowed segments.", + stopwatch.millisElapsed(), dataSourcesSnapshot.getOvershadowedSegments().size() + ); } private static ImmutableMap createDefaultDataSourceProperties() diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java index 6e3c38839766..9574df1b720b 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/MarkOvershadowedSegmentsAsUnused.java @@ -118,8 +118,12 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) RowKey datasourceKey = RowKey.of(Dimension.DATASOURCE, datasource); stats.add(Stats.Segments.OVERSHADOWED, datasourceKey, unusedSegments.size()); + final Stopwatch updateTime = Stopwatch.createStarted(); int updatedCount = deleteHandler.markSegmentsAsUnused(unusedSegments); - log.info("Marked [%d] segments of datasource[%s] as unused.", updatedCount, datasource); + log.info( + "Marked [%d] segments of datasource[%s] as unused in [%,d]ms.", + updatedCount, datasource, updateTime.millisElapsed() + ); } ); From 779c0681fbf52dc4dfb43b43824ab7f80c13253c Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Tue, 24 Sep 2024 11:00:17 +0530 Subject: [PATCH 6/6] Fix log line --- .../apache/druid/server/coordinator/duty/CompactSegments.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java index 3d691eec86f7..639246d28762 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/duty/CompactSegments.java @@ -122,7 +122,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) { if (isCompactionSupervisorEnabled()) { LOG.warn( - "Skipping CompactSegments duty since compaction supervisors are" + "Skipping CompactSegments duty since compaction supervisors" + " are already running on Overlord." ); } else {