diff --git a/docs/configuration/index.md b/docs/configuration/index.md
index 0976190cf99a..054aea13f100 100644
--- a/docs/configuration/index.md
+++ b/docs/configuration/index.md
@@ -1924,16 +1924,11 @@ You can configure Druid services to emit [metrics](../operations/metrics.md) reg
### Metrics monitors for each service
-Metric monitoring is an essential part of Druid operations. Druid includes
+Metric monitoring is an essential part of Druid operations.
+Monitors can be enabled by configuring the property `druid.monitoring.monitors` in the common configuration file, `common.runtime.properties`.
+If a monitor is not supported on a certain service, it will simply be ignored while starting up that service.
-:::caution
-
-The `runtime.properties` file for each service overrides the common configuration file (`common.runtime.properties`). They are not additive. This means that if you add any monitors to a specific service, that service only has the monitors specified in its `runtime.properties` file even if there are additional ones listed in the common file.
-
-:::
-
-
-The following table lists the monitors that are available and the services you could configure the monitor for:
+The following table lists available monitors and the respective services where they are supported:
|Name|Description|Service|
|----|-----------|-------|
diff --git a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/EmbeddedKafkaClusterMetricsTest.java b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/EmbeddedKafkaClusterMetricsTest.java
index 8d10bae6e4c1..320c14a56281 100644
--- a/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/EmbeddedKafkaClusterMetricsTest.java
+++ b/embedded-tests/src/test/java/org/apache/druid/testing/embedded/indexing/EmbeddedKafkaClusterMetricsTest.java
@@ -119,7 +119,11 @@ public void stop()
.addCommonProperty("druid.emitter", "composing")
.addCommonProperty("druid.emitter.composing.emitters", "[\"latching\",\"kafka\"]")
.addCommonProperty("druid.monitoring.emissionPeriod", "PT0.1s")
- .addCommonProperty("druid.monitoring.monitors", "[\"org.apache.druid.java.util.metrics.JvmMonitor\"]")
+ .addCommonProperty(
+ "druid.monitoring.monitors",
+ "[\"org.apache.druid.java.util.metrics.JvmMonitor\","
+ + "\"org.apache.druid.server.metrics.TaskCountStatsMonitor\"]"
+ )
.addResource(kafkaServer)
.addServer(coordinator)
.addServer(overlord)
@@ -208,6 +212,12 @@ public void test_ingestClusterMetrics_withConcurrentCompactionSupervisor_andSkip
o -> o.postSupervisor(kafkaSupervisorSpec)
);
+ // Wait for a task to succeed
+ overlord.latchableEmitter().waitForEventAggregate(
+ event -> event.hasMetricName("task/success/count")
+ .hasDimension(DruidMetrics.DATASOURCE, dataSource),
+ agg -> agg.hasSumAtLeast(1)
+ );
// Wait for some segments to be published
overlord.latchableEmitter().waitForEvent(
event -> event.hasMetricName("segment/txn/success")
diff --git a/server/src/main/java/org/apache/druid/client/cache/CacheMonitor.java b/server/src/main/java/org/apache/druid/client/cache/CacheMonitor.java
index 2c43d1ea7686..2f9ab2cbf3d5 100644
--- a/server/src/main/java/org/apache/druid/client/cache/CacheMonitor.java
+++ b/server/src/main/java/org/apache/druid/client/cache/CacheMonitor.java
@@ -20,11 +20,19 @@
package org.apache.druid.client.cache;
import com.google.inject.Inject;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
+@LoadScope(roles = {
+ NodeRole.BROKER_JSON_NAME,
+ NodeRole.HISTORICAL_JSON_NAME,
+ NodeRole.INDEXER_JSON_NAME,
+ NodeRole.PEON_JSON_NAME
+})
public class CacheMonitor extends AbstractMonitor
{
// package private for tests
diff --git a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java
index d49c19c6f882..10985b4b4d3a 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/GroupByStatsMonitor.java
@@ -21,6 +21,8 @@
import com.google.inject.Inject;
import org.apache.druid.collections.BlockingPool;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
@@ -29,6 +31,12 @@
import java.nio.ByteBuffer;
+@LoadScope(roles = {
+ NodeRole.BROKER_JSON_NAME,
+ NodeRole.HISTORICAL_JSON_NAME,
+ NodeRole.INDEXER_JSON_NAME,
+ NodeRole.PEON_JSON_NAME
+})
public class GroupByStatsMonitor extends AbstractMonitor
{
private final GroupByStatsProvider groupByStatsProvider;
diff --git a/server/src/main/java/org/apache/druid/server/metrics/HistoricalMetricsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/HistoricalMetricsMonitor.java
index e7d6a74275b1..b26a55ad5a39 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/HistoricalMetricsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/HistoricalMetricsMonitor.java
@@ -23,6 +23,8 @@
import it.unimi.dsi.fastutil.objects.Object2LongMap;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import org.apache.druid.client.DruidServerConfig;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
@@ -33,6 +35,7 @@
import java.util.Map;
+@LoadScope(roles = NodeRole.HISTORICAL_JSON_NAME)
public class HistoricalMetricsMonitor extends AbstractMonitor
{
private final DruidServerConfig serverConfig;
diff --git a/server/src/main/java/org/apache/druid/server/metrics/MetricsModule.java b/server/src/main/java/org/apache/druid/server/metrics/MetricsModule.java
index 39fc2d622b4e..d6a25bea4ddf 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/MetricsModule.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/MetricsModule.java
@@ -22,7 +22,6 @@
import com.google.common.base.Supplier;
import com.google.common.collect.Iterables;
import com.google.inject.Binder;
-import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.Module;
@@ -34,6 +33,7 @@
import org.apache.druid.guice.JsonConfigProvider;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.ManageLifecycle;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.concurrent.Execs;
@@ -64,18 +64,14 @@
/**
* Sets up the {@link MonitorScheduler} to monitor things on a regular schedule. {@link Monitor}s must be explicitly
* bound in order to be loaded.
+ *
+ * For any service, a monitor is loaded only if the {@link NodeRole} of the service
+ * is included in the {@link LoadScope} of the monitor.
*/
public class MetricsModule implements Module
{
public static final String MONITORING_PROPERTY_PREFIX = "druid.monitoring";
private static final Logger log = new Logger(MetricsModule.class);
- private Set nodeRoles;
-
- @Inject
- public void setNodeRoles(@Self Set nodeRoles)
- {
- this.nodeRoles = nodeRoles;
- }
public static void register(Binder binder, Class extends Monitor> monitorClazz)
{
@@ -107,6 +103,7 @@ public MonitorScheduler getMonitorScheduler(
Supplier config,
MonitorsConfig monitorsConfig,
Set> monitorSet,
+ @Self Set nodeRoles,
ServiceEmitter emitter,
Injector injector
)
@@ -117,12 +114,14 @@ public MonitorScheduler getMonitorScheduler(
// but by injecting DataSourceTaskIdHolder early this cycle is avoided.
injector.getInstance(DataSourceTaskIdHolder.class);
for (Class extends Monitor> monitorClass : Iterables.concat(monitorsConfig.getMonitors(), monitorSet)) {
- monitors.add(injector.getInstance(monitorClass));
+ if (shouldLoadMonitor(monitorClass, nodeRoles)) {
+ monitors.add(injector.getInstance(monitorClass));
+ }
}
if (!monitors.isEmpty()) {
log.info(
- "Loaded %d monitors: %s",
+ "Loaded [%d] monitors: %s",
monitors.size(),
monitors.stream().map(monitor -> monitor.getClass().getName()).collect(Collectors.joining(", "))
);
@@ -218,4 +217,23 @@ public OshiSysMonitor getOshiSysMonitor(
return new OshiSysMonitor(dimensions, oshiSysConfig);
}
}
+
+ /**
+ * Checks if a monitor needs to be loaded on this service based on its node role.
+ */
+ private boolean shouldLoadMonitor(Class> monitorClass, Set nodeRoles)
+ {
+ final LoadScope loadScope = monitorClass.getAnnotation(LoadScope.class);
+ if (loadScope == null) {
+ // If annotation is not specified, check superclass (if one exists), otherwise load the monitor
+ Class> superClass = monitorClass.getSuperclass();
+ return superClass == null || shouldLoadMonitor(superClass, nodeRoles);
+ }
+ for (String role : loadScope.roles()) {
+ if (nodeRoles.contains(NodeRole.fromJsonName(role))) {
+ return true;
+ }
+ }
+ return false;
+ }
}
diff --git a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java
index a9c43d77ee8b..70b68c15ca7d 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java
@@ -22,6 +22,8 @@
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import org.apache.druid.collections.BlockingPool;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
@@ -31,6 +33,13 @@
import java.nio.ByteBuffer;
import java.util.Map;
+@LoadScope(roles = {
+ NodeRole.BROKER_JSON_NAME,
+ NodeRole.HISTORICAL_JSON_NAME,
+ NodeRole.ROUTER_JSON_NAME,
+ NodeRole.INDEXER_JSON_NAME,
+ NodeRole.PEON_JSON_NAME
+})
public class QueryCountStatsMonitor extends AbstractMonitor
{
private final KeyedDiff keyedDiff = new KeyedDiff();
diff --git a/server/src/main/java/org/apache/druid/server/metrics/SegmentStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/SegmentStatsMonitor.java
index 68b703f8951f..4afc745ae4fe 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/SegmentStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/SegmentStatsMonitor.java
@@ -23,6 +23,8 @@
import com.google.inject.Inject;
import org.apache.druid.client.DruidServerConfig;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEventBuilder;
@@ -39,6 +41,7 @@
*
* It keeps track of the average number of rows in a segment and the distribution of segments according to rowCount.
*/
+@LoadScope(roles = NodeRole.HISTORICAL_JSON_NAME)
public class SegmentStatsMonitor extends AbstractMonitor
{
private final DruidServerConfig serverConfig;
diff --git a/server/src/main/java/org/apache/druid/server/metrics/SubqueryCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/SubqueryCountStatsMonitor.java
index 57da36ac1546..561f53a6ee95 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/SubqueryCountStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/SubqueryCountStatsMonitor.java
@@ -21,6 +21,8 @@
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
@@ -31,6 +33,7 @@
/**
* Monitors and emits the metrics corresponding to the subqueries and their materialization.
*/
+@LoadScope(roles = NodeRole.BROKER_JSON_NAME)
public class SubqueryCountStatsMonitor extends AbstractMonitor
{
diff --git a/server/src/main/java/org/apache/druid/server/metrics/TaskCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/TaskCountStatsMonitor.java
index aca9fec3e4e3..f6598c0e1159 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/TaskCountStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/TaskCountStatsMonitor.java
@@ -20,6 +20,8 @@
package org.apache.druid.server.metrics;
import com.google.inject.Inject;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
@@ -30,6 +32,7 @@
import java.util.Map;
+@LoadScope(roles = NodeRole.OVERLORD_JSON_NAME)
public class TaskCountStatsMonitor extends AbstractMonitor
{
private final TaskCountStatsProvider statsProvider;
diff --git a/server/src/main/java/org/apache/druid/server/metrics/TaskSlotCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/TaskSlotCountStatsMonitor.java
index 46227a3ca452..dda5b0d1579d 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/TaskSlotCountStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/TaskSlotCountStatsMonitor.java
@@ -20,12 +20,15 @@
package org.apache.druid.server.metrics;
import com.google.inject.Inject;
+import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
import java.util.Map;
+@LoadScope(roles = NodeRole.OVERLORD_JSON_NAME)
public class TaskSlotCountStatsMonitor extends AbstractMonitor
{
private final TaskSlotCountStatsProvider statsProvider;
diff --git a/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java
index a568ad85d423..6dfc44e4bd99 100644
--- a/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java
+++ b/server/src/main/java/org/apache/druid/server/metrics/WorkerTaskCountStatsMonitor.java
@@ -22,6 +22,7 @@
import com.google.inject.Inject;
import com.google.inject.Injector;
import org.apache.druid.discovery.NodeRole;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
@@ -31,6 +32,7 @@
import java.util.Map;
import java.util.Set;
+@LoadScope(roles = {NodeRole.INDEXER_JSON_NAME, NodeRole.MIDDLE_MANAGER_JSON_NAME})
public class WorkerTaskCountStatsMonitor extends AbstractMonitor
{
private final WorkerTaskCountStatsProvider statsProvider;
diff --git a/server/src/test/java/org/apache/druid/server/metrics/MetricsModuleTest.java b/server/src/test/java/org/apache/druid/server/metrics/MetricsModuleTest.java
index f4563eb8e5bb..9337f04c11e6 100644
--- a/server/src/test/java/org/apache/druid/server/metrics/MetricsModuleTest.java
+++ b/server/src/test/java/org/apache/druid/server/metrics/MetricsModuleTest.java
@@ -34,6 +34,7 @@
import org.apache.druid.guice.JsonConfigProvider;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.LifecycleModule;
+import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.initialization.Initialization;
import org.apache.druid.initialization.ServerInjectorBuilder;
@@ -41,8 +42,10 @@
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEventBuilder;
+import org.apache.druid.java.util.metrics.AbstractMonitor;
import org.apache.druid.java.util.metrics.BasicMonitorScheduler;
import org.apache.druid.java.util.metrics.ClockDriftSafeMonitorScheduler;
+import org.apache.druid.java.util.metrics.Monitor;
import org.apache.druid.java.util.metrics.MonitorScheduler;
import org.apache.druid.java.util.metrics.NoopOshiSysMonitor;
import org.apache.druid.java.util.metrics.NoopSysMonitor;
@@ -62,6 +65,7 @@
import javax.validation.Validation;
import javax.validation.Validator;
import java.util.Properties;
+import java.util.Set;
public class MetricsModuleTest
{
@@ -158,6 +162,55 @@ public void testGetBasicMonitorSchedulerViaConfig()
Assert.assertSame(BasicMonitorScheduler.class, monitorScheduler.getClass());
}
+ @Test
+ public void test_monitorScheduler_addsMonitor_ifNodeRoleIsInLoadScope()
+ {
+ final Properties properties = new Properties();
+ properties.setProperty(
+ "druid.monitoring.monitors",
+ StringUtils.format("[\"%s\"]", OverlordOnlyMonitor.class.getName())
+ );
+
+ verifyThatMonitorIsLoadedOnlyOn(
+ OverlordOnlyMonitor.class,
+ properties,
+ NodeRole.OVERLORD
+ );
+ }
+
+ @Test
+ public void test_monitorScheduler_addsMonitor_ifNodeRoleIsInLoadScopeOfSuperClass()
+ {
+ final Properties properties = new Properties();
+ properties.setProperty(
+ "druid.monitoring.monitors",
+ StringUtils.format("[\"%s\"]", OverlordAndCoordinatorMonitor2.class.getName())
+ );
+
+ verifyThatMonitorIsLoadedOnlyOn(
+ OverlordAndCoordinatorMonitor2.class,
+ properties,
+ NodeRole.COORDINATOR,
+ NodeRole.OVERLORD
+ );
+ }
+
+ @Test
+ public void test_monitorScheduler_addsMonitor_ifNoLoadScopeIsDefined()
+ {
+ final Properties properties = new Properties();
+ properties.setProperty(
+ "druid.monitoring.monitors",
+ StringUtils.format("[\"%s\"]", AllNodeMonitor.class.getName())
+ );
+
+ verifyThatMonitorIsLoadedOnlyOn(
+ AllNodeMonitor.class,
+ properties,
+ NodeRole.values()
+ );
+ }
+
@Test
public void testGetMonitorSchedulerUnknownSchedulerException()
{
@@ -259,4 +312,47 @@ private static Injector createInjector(Properties properties, ImmutableSet void verifyThatMonitorIsLoadedOnlyOn(
+ Class monitorClass,
+ Properties properties,
+ NodeRole... supportedRoles
+ )
+ {
+ final Set supportedRoleSet = Set.of(supportedRoles);
+ for (NodeRole role : NodeRole.values()) {
+ final MonitorScheduler monitorScheduler = createInjector(properties, ImmutableSet.of(role))
+ .getInstance(MonitorScheduler.class);
+ Assert.assertEquals(
+ supportedRoleSet.contains(role),
+ monitorScheduler.findMonitor(monitorClass).isPresent()
+ );
+ }
+ }
+
+ public static class AllNodeMonitor extends AbstractMonitor
+ {
+ @Override
+ public boolean doMonitor(ServiceEmitter emitter)
+ {
+ return false;
+ }
+ }
+
+ @LoadScope(roles = {NodeRole.COORDINATOR_JSON_NAME, NodeRole.OVERLORD_JSON_NAME})
+ public static class OverlordAndCoordinatorMonitor extends AllNodeMonitor
+ {
+ }
+
+ @LoadScope(roles = NodeRole.OVERLORD_JSON_NAME)
+ public static class OverlordOnlyMonitor extends OverlordAndCoordinatorMonitor
+ {
+ }
+
+ /**
+ * Uses load scope of super class.
+ */
+ public static class OverlordAndCoordinatorMonitor2 extends OverlordAndCoordinatorMonitor
+ {
+ }
}