Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -1924,16 +1924,11 @@ You can configure Druid services to emit [metrics](../operations/metrics.md) reg

### Metrics monitors for each service

Metric monitoring is an essential part of Druid operations. Druid includes
Metric monitoring is an essential part of Druid operations.
Monitors can be enabled by configuring the property `druid.monitoring.monitors` in the common configuration file, `common.runtime.properties`.
If a monitor is not supported on a certain service, it will simply be ignored while starting up that service.

:::caution

The `runtime.properties` file for each service overrides the common configuration file (`common.runtime.properties`). They are not additive. This means that if you add any monitors to a specific service, that service only has the monitors specified in its `runtime.properties` file even if there are additional ones listed in the common file.

:::


The following table lists the monitors that are available and the services you could configure the monitor for:
The following table lists available monitors and the respective services where they are supported:

|Name|Description|Service|
|----|-----------|-------|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,11 @@ public void stop()
.addCommonProperty("druid.emitter", "composing")
.addCommonProperty("druid.emitter.composing.emitters", "[\"latching\",\"kafka\"]")
.addCommonProperty("druid.monitoring.emissionPeriod", "PT0.1s")
.addCommonProperty("druid.monitoring.monitors", "[\"org.apache.druid.java.util.metrics.JvmMonitor\"]")
.addCommonProperty(
"druid.monitoring.monitors",
"[\"org.apache.druid.java.util.metrics.JvmMonitor\","
+ "\"org.apache.druid.server.metrics.TaskCountStatsMonitor\"]"
)
.addResource(kafkaServer)
.addServer(coordinator)
.addServer(overlord)
Expand Down Expand Up @@ -208,6 +212,12 @@ public void test_ingestClusterMetrics_withConcurrentCompactionSupervisor_andSkip
o -> o.postSupervisor(kafkaSupervisorSpec)
);

// Wait for a task to succeed
overlord.latchableEmitter().waitForEventAggregate(
event -> event.hasMetricName("task/success/count")
.hasDimension(DruidMetrics.DATASOURCE, dataSource),
agg -> agg.hasSumAtLeast(1)
);
// Wait for some segments to be published
overlord.latchableEmitter().waitForEvent(
event -> event.hasMetricName("segment/txn/success")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,19 @@
package org.apache.druid.client.cache;

import com.google.inject.Inject;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;

@LoadScope(roles = {
NodeRole.BROKER_JSON_NAME,
NodeRole.HISTORICAL_JSON_NAME,
NodeRole.INDEXER_JSON_NAME,
NodeRole.PEON_JSON_NAME
})
public class CacheMonitor extends AbstractMonitor
{
// package private for tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import com.google.inject.Inject;
import org.apache.druid.collections.BlockingPool;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
Expand All @@ -29,6 +31,12 @@

import java.nio.ByteBuffer;

@LoadScope(roles = {
NodeRole.BROKER_JSON_NAME,
NodeRole.HISTORICAL_JSON_NAME,
NodeRole.INDEXER_JSON_NAME,
NodeRole.PEON_JSON_NAME
})
public class GroupByStatsMonitor extends AbstractMonitor
{
private final GroupByStatsProvider groupByStatsProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import it.unimi.dsi.fastutil.objects.Object2LongMap;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import org.apache.druid.client.DruidServerConfig;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
Expand All @@ -33,6 +35,7 @@

import java.util.Map;

@LoadScope(roles = NodeRole.HISTORICAL_JSON_NAME)
public class HistoricalMetricsMonitor extends AbstractMonitor
{
private final DruidServerConfig serverConfig;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import com.google.common.base.Supplier;
import com.google.common.collect.Iterables;
import com.google.inject.Binder;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.Module;
Expand All @@ -34,6 +33,7 @@
import org.apache.druid.guice.JsonConfigProvider;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.ManageLifecycle;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.concurrent.Execs;
Expand Down Expand Up @@ -64,18 +64,14 @@
/**
* Sets up the {@link MonitorScheduler} to monitor things on a regular schedule. {@link Monitor}s must be explicitly
* bound in order to be loaded.
* <p>
* For any service, a monitor is loaded only if the {@link NodeRole} of the service
* is included in the {@link LoadScope} of the monitor.
*/
public class MetricsModule implements Module
{
public static final String MONITORING_PROPERTY_PREFIX = "druid.monitoring";
private static final Logger log = new Logger(MetricsModule.class);
private Set<NodeRole> nodeRoles;

@Inject
public void setNodeRoles(@Self Set<NodeRole> nodeRoles)
{
this.nodeRoles = nodeRoles;
}

public static void register(Binder binder, Class<? extends Monitor> monitorClazz)
{
Expand Down Expand Up @@ -107,6 +103,7 @@ public MonitorScheduler getMonitorScheduler(
Supplier<DruidMonitorSchedulerConfig> config,
MonitorsConfig monitorsConfig,
Set<Class<? extends Monitor>> monitorSet,
@Self Set<NodeRole> nodeRoles,
ServiceEmitter emitter,
Injector injector
)
Expand All @@ -117,12 +114,14 @@ public MonitorScheduler getMonitorScheduler(
// but by injecting DataSourceTaskIdHolder early this cycle is avoided.
injector.getInstance(DataSourceTaskIdHolder.class);
for (Class<? extends Monitor> monitorClass : Iterables.concat(monitorsConfig.getMonitors(), monitorSet)) {
monitors.add(injector.getInstance(monitorClass));
if (shouldLoadMonitor(monitorClass, nodeRoles)) {
monitors.add(injector.getInstance(monitorClass));
}
}

if (!monitors.isEmpty()) {
log.info(
"Loaded %d monitors: %s",
"Loaded [%d] monitors: %s",
monitors.size(),
monitors.stream().map(monitor -> monitor.getClass().getName()).collect(Collectors.joining(", "))
);
Expand Down Expand Up @@ -218,4 +217,23 @@ public OshiSysMonitor getOshiSysMonitor(
return new OshiSysMonitor(dimensions, oshiSysConfig);
}
}

/**
* Checks if a monitor needs to be loaded on this service based on its node role.
*/
private boolean shouldLoadMonitor(Class<?> monitorClass, Set<NodeRole> nodeRoles)
{
final LoadScope loadScope = monitorClass.getAnnotation(LoadScope.class);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any desire to get fancy here and check superclass annotations too? Probably not necessary but just something that came to mind.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, that can be done.

I guess we would need to check the superclass recursively if there is no annotation defined at the current class level. Would that make sense?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's what I was imagining.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Updated to handle annotations on super-class and added tests.

if (loadScope == null) {
// If annotation is not specified, check superclass (if one exists), otherwise load the monitor
Class<?> superClass = monitorClass.getSuperclass();
return superClass == null || shouldLoadMonitor(superClass, nodeRoles);
}
for (String role : loadScope.roles()) {
if (nodeRoles.contains(NodeRole.fromJsonName(role))) {
return true;
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import org.apache.druid.collections.BlockingPool;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Merging;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
Expand All @@ -31,6 +33,13 @@
import java.nio.ByteBuffer;
import java.util.Map;

@LoadScope(roles = {
NodeRole.BROKER_JSON_NAME,
NodeRole.HISTORICAL_JSON_NAME,
NodeRole.ROUTER_JSON_NAME,
NodeRole.INDEXER_JSON_NAME,
NodeRole.PEON_JSON_NAME
})
public class QueryCountStatsMonitor extends AbstractMonitor
{
private final KeyedDiff keyedDiff = new KeyedDiff();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import com.google.inject.Inject;
import org.apache.druid.client.DruidServerConfig;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEventBuilder;
Expand All @@ -39,6 +41,7 @@
*
* It keeps track of the average number of rows in a segment and the distribution of segments according to rowCount.
*/
@LoadScope(roles = NodeRole.HISTORICAL_JSON_NAME)
public class SegmentStatsMonitor extends AbstractMonitor
{
private final DruidServerConfig serverConfig;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

import com.google.common.collect.ImmutableMap;
import com.google.inject.Inject;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
Expand All @@ -31,6 +33,7 @@
/**
* Monitors and emits the metrics corresponding to the subqueries and their materialization.
*/
@LoadScope(roles = NodeRole.BROKER_JSON_NAME)
public class SubqueryCountStatsMonitor extends AbstractMonitor
{

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
package org.apache.druid.server.metrics;

import com.google.inject.Inject;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
Expand All @@ -30,6 +32,7 @@

import java.util.Map;

@LoadScope(roles = NodeRole.OVERLORD_JSON_NAME)
public class TaskCountStatsMonitor extends AbstractMonitor
{
private final TaskCountStatsProvider statsProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@
package org.apache.druid.server.metrics;

import com.google.inject.Inject;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;

import java.util.Map;

@LoadScope(roles = NodeRole.OVERLORD_JSON_NAME)
public class TaskSlotCountStatsMonitor extends AbstractMonitor
{
private final TaskSlotCountStatsProvider statsProvider;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.inject.Inject;
import com.google.inject.Injector;
import org.apache.druid.discovery.NodeRole;
import org.apache.druid.guice.annotations.LoadScope;
import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
Expand All @@ -31,6 +32,7 @@
import java.util.Map;
import java.util.Set;

@LoadScope(roles = {NodeRole.INDEXER_JSON_NAME, NodeRole.MIDDLE_MANAGER_JSON_NAME})
public class WorkerTaskCountStatsMonitor extends AbstractMonitor
{
private final WorkerTaskCountStatsProvider statsProvider;
Expand Down
Loading
Loading