Skip to content
1 change: 1 addition & 0 deletions docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ Metric monitoring is an essential part of Druid operations. The following monit
|`org.apache.druid.server.metrics.TaskCountStatsMonitor`|Reports how many ingestion tasks are currently running/pending/waiting and also the number of successful/failed tasks per emission period.|
|`org.apache.druid.server.metrics.TaskSlotCountStatsMonitor`|Reports metrics about task slot usage per emission period.|
|`org.apache.druid.server.metrics.WorkerTaskCountStatsMonitor`|Reports how many ingestion tasks are currently running/pending/waiting, the number of successful/failed tasks, and metrics about task slot usage for the reporting worker, per emission period. Only supported by middleManager node types.|
|`org.apache.druid.server.metrics.ServiceStatusMonitor`|Reports a heartbeat for the service.|

For example, you might configure monitors on all processes for system and JVM information within `common.runtime.properties` as follows:

Expand Down
6 changes: 6 additions & 0 deletions docs/operations/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ If `emitBalancingStats` is set to `true` in the Coordinator [dynamic configurati

## General Health

### Service Health

|Metric|Description|Dimensions|Normal Value|
|------|-----------|----------|------------|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. |`leader` on the Overlord and Coordinator.|1|

### Historical

|Metric|Description|Dimensions|Normal Value|
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.server.metrics;

import com.google.common.base.Supplier;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;

import java.util.Map;

/**
* Reports a heartbeat for the service.
*/
Comment thread
suneet-s marked this conversation as resolved.
public class ServiceStatusMonitor extends AbstractMonitor
{

@Named("heartbeat")
@Inject(optional = true)
Supplier<Map<String, Object>> heartbeatTagsSupplier = null;

@Override
public boolean doMonitor(ServiceEmitter emitter)
{
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) {
heartbeatTagsSupplier.get().forEach((k, v) -> {
builder.setDimension(k, v);
});
}

emitter.emit(builder.build("service/heartbeat", 1));
return true;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.server.metrics;

import com.google.common.base.Supplier;
import org.apache.druid.java.util.metrics.StubServiceEmitter;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.HashMap;
import java.util.Map;

public class ServiceStatusMonitorTest
{

private ServiceStatusMonitor monitor;
private Map<String, Object> heartbeatTags;
private Supplier<Map<String, Object>> heartbeatTagsSupplier = () -> heartbeatTags;
private static String HEARTBEAT_METRIC_KEY = "service/heartbeat";

@Before
public void setUp()
{
monitor = new ServiceStatusMonitor();
Comment thread
suneet-s marked this conversation as resolved.
heartbeatTags = new HashMap<>();
monitor.heartbeatTagsSupplier = heartbeatTagsSupplier;
}

@Test
public void testDefaultHeartbeatReported()
{
final StubServiceEmitter emitter = new StubServiceEmitter("service", "host");
monitor.doMonitor(emitter);
Assert.assertEquals(1, emitter.getEvents().size());
Assert.assertEquals(HEARTBEAT_METRIC_KEY, emitter.getEvents().get(0).toMap().get("metric"));
Assert.assertEquals(1, emitter.getEvents().get(0).toMap().get("value"));
}

@Test
public void testLeaderTag()
{
heartbeatTags.put("leader", 1);
final StubServiceEmitter emitter = new StubServiceEmitter("service", "host");
monitor.doMonitor(emitter);
Assert.assertEquals(1, emitter.getEvents().size());
Assert.assertEquals(1, emitter.getEvents().get(0).toMap().get("leader"));
Assert.assertEquals(HEARTBEAT_METRIC_KEY, emitter.getEvents().get(0).toMap().get("metric"));
Assert.assertEquals(1, emitter.getEvents().get(0).toMap().get("value"));
}

@Test
public void testMoreThanOneTag()
{
heartbeatTags.put("leader", 1);
heartbeatTags.put("taskRunner", "http");
final StubServiceEmitter emitter = new StubServiceEmitter("service", "host");
monitor.doMonitor(emitter);
Assert.assertEquals(1, emitter.getEvents().size());
Assert.assertEquals(1, emitter.getEvents().get(0).toMap().get("leader"));
Assert.assertEquals("http", emitter.getEvents().get(0).toMap().get("taskRunner"));
Assert.assertEquals(HEARTBEAT_METRIC_KEY, emitter.getEvents().get(0).toMap().get("metric"));
Assert.assertEquals(1, emitter.getEvents().get(0).toMap().get("value"));
}
}
30 changes: 30 additions & 0 deletions services/src/main/java/org/apache/druid/cli/CliCoordinator.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
import com.github.rvesse.airline.annotations.Command;
import com.google.common.base.Predicates;
import com.google.common.base.Strings;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
import com.google.inject.Inject;
import com.google.inject.Key;
import com.google.inject.Module;
import com.google.inject.Provider;
import com.google.inject.Provides;
import com.google.inject.TypeLiteral;
import com.google.inject.name.Names;
import com.google.inject.util.Providers;
import org.apache.curator.framework.CuratorFramework;
Expand Down Expand Up @@ -119,8 +121,10 @@
import org.joda.time.Duration;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutorService;
Expand Down Expand Up @@ -330,6 +334,10 @@ public void configure(Binder binder)
binder.bind(TaskStorage.class).toProvider(Providers.of(null));
binder.bind(TaskMaster.class).toProvider(Providers.of(null));
binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null));
// Bind HeartbeatSupplier only when the service operates independently of Overlord.
binder.bind(new TypeLiteral<Supplier<Map<String, Object>>>() {})
.annotatedWith(Names.named("heartbeat"))
.toProvider(HeartbeatSupplier.class);
}

binder.bind(CoordinatorCustomDutyGroups.class)
Expand Down Expand Up @@ -459,4 +467,26 @@ public CoordinatorCustomDutyGroups get()
}
}
}

private static class HeartbeatSupplier implements Provider<Supplier<Map<String, Object>>>
{
private final DruidCoordinator coordinator;

@Inject
public HeartbeatSupplier(DruidCoordinator coordinator)
{
this.coordinator = coordinator;
}

@Override
public Supplier<Map<String, Object>> get()
{
return () -> {
Map<String, Object> heartbeatTags = new HashMap<>();
heartbeatTags.put("leader", coordinator.isLeader() ? 1 : 0);

return heartbeatTags;
};
}
}
}
17 changes: 17 additions & 0 deletions services/src/main/java/org/apache/druid/cli/CliOverlord.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.rvesse.airline.annotations.Command;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
Expand All @@ -32,6 +33,7 @@
import com.google.inject.TypeLiteral;
import com.google.inject.multibindings.MapBinder;
import com.google.inject.multibindings.Multibinder;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import com.google.inject.servlet.GuiceFilter;
import com.google.inject.util.Providers;
Expand Down Expand Up @@ -134,7 +136,9 @@
import org.eclipse.jetty.servlet.ServletContextHandler;
import org.eclipse.jetty.servlet.ServletHolder;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

Expand Down Expand Up @@ -354,6 +358,19 @@ public TaskStorageDirTracker getTaskStorageDirTracker(WorkerConfig workerConfig,
{
return TaskStorageDirTracker.fromConfigs(workerConfig, taskConfig);
}

@Provides
@LazySingleton
@Named("heartbeat")
public Supplier<Map<String, Object>> getHeartbeatSupplier(TaskMaster taskMaster)
{
return () -> {
Map<String, Object> heartbeatTags = new HashMap<>();
heartbeatTags.put("leader", taskMaster.isLeader() ? 1 : 0);

return heartbeatTags;
};
}
};
}

Expand Down