diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index 933f131a4900..62c0521c2c93 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -57,7 +57,8 @@ Available Metrics |`query/count`|number of total queries|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/success/count`|number of queries successfully processed|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/failed/count`|number of failed queries|This metric is only available if the QueryCountStatsMonitor module is included.|| -|`query/interrupted/count`|number of queries interrupted due to cancellation or timeout|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/interrupted/count`|number of queries interrupted due to cancellation.|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/timeout/count`|number of timed out queries.|This metric is only available if the QueryCountStatsMonitor module is included.|| |`sqlQuery/time`|Milliseconds taken to complete a SQL query.|id, nativeQueryIds, dataSource, remoteAddress, success.|< 1s| |`sqlQuery/bytes`|number of bytes returned in SQL query response.|id, nativeQueryIds, dataSource, remoteAddress, success.| | @@ -74,7 +75,8 @@ Available Metrics |`query/count`|number of total queries|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/success/count`|number of queries successfully processed|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/failed/count`|number of failed queries|This metric is only available if the QueryCountStatsMonitor module is included.|| -|`query/interrupted/count`|number of queries interrupted due to cancellation or timeout|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/interrupted/count`|number of queries interrupted due to cancellation.|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/timeout/count`|number of timed out queries.|This metric is only available if the QueryCountStatsMonitor module is included.|| ### Real-time @@ -86,7 +88,8 @@ Available Metrics |`query/count`|number of total queries|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/success/count`|number of queries successfully processed|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/failed/count`|number of failed queries|This metric is only available if the QueryCountStatsMonitor module is included.|| -|`query/interrupted/count`|number of queries interrupted due to cancellation or timeout|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/interrupted/count`|number of queries interrupted due to cancellation.|This metric is only available if the QueryCountStatsMonitor module is included.|| +|`query/timeout/count`|number of timed out queries.|This metric is only available if the QueryCountStatsMonitor module is included.|| ### Jetty diff --git a/extensions-contrib/opentsdb-emitter/src/main/resources/defaultMetrics.json b/extensions-contrib/opentsdb-emitter/src/main/resources/defaultMetrics.json index d00b79265420..c2d38455cf99 100644 --- a/extensions-contrib/opentsdb-emitter/src/main/resources/defaultMetrics.json +++ b/extensions-contrib/opentsdb-emitter/src/main/resources/defaultMetrics.json @@ -19,6 +19,7 @@ "query/success/count": [], "query/failed/count": [], "query/interrupted/count": [], + "query/timeout/count": [], "query/wait/time": [ "segment" ], diff --git a/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json b/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json index 1a62d70adba6..287aaf6ecf9e 100644 --- a/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json +++ b/extensions-contrib/statsd-emitter/src/main/resources/defaultMetricDimensions.json @@ -16,6 +16,7 @@ "query/success/count" : { "dimensions" : [], "type" : "count" }, "query/failed/count" : { "dimensions" : [], "type" : "count" }, "query/interrupted/count" : { "dimensions" : [], "type" : "count" }, + "query/timeout/count" : { "dimensions" : [], "type" : "count" }, "query/cache/delta/numEntries" : { "dimensions" : [], "type" : "count" }, "query/cache/delta/sizeBytes" : { "dimensions" : [], "type" : "count" }, diff --git a/server/src/main/java/org/apache/druid/server/AsyncQueryForwardingServlet.java b/server/src/main/java/org/apache/druid/server/AsyncQueryForwardingServlet.java index 4ecb86ceb993..acf80f2b8b46 100644 --- a/server/src/main/java/org/apache/druid/server/AsyncQueryForwardingServlet.java +++ b/server/src/main/java/org/apache/druid/server/AsyncQueryForwardingServlet.java @@ -425,6 +425,14 @@ public long getInterruptedQueryCount() return interruptedQueryCount.get(); } + @Override + public long getTimedOutQueryCount() + { + // Query timeout metric is not relevant here and this metric is already being tracked in the Broker and the + // data nodes using QueryResource + return 0L; + } + @VisibleForTesting static String getAvaticaConnectionId(Map requestMap) { diff --git a/server/src/main/java/org/apache/druid/server/QueryResource.java b/server/src/main/java/org/apache/druid/server/QueryResource.java index 27ac513bd6e1..75e68ad98996 100644 --- a/server/src/main/java/org/apache/druid/server/QueryResource.java +++ b/server/src/main/java/org/apache/druid/server/QueryResource.java @@ -108,6 +108,7 @@ public class QueryResource implements QueryCountStatsProvider private final AtomicLong successfulQueryCount = new AtomicLong(); private final AtomicLong failedQueryCount = new AtomicLong(); private final AtomicLong interruptedQueryCount = new AtomicLong(); + private final AtomicLong timedOutQueryCount = new AtomicLong(); @Inject public QueryResource( @@ -332,7 +333,7 @@ public void write(OutputStream outputStream) throws WebApplicationException return ioReaderWriter.gotError(e); } catch (QueryTimeoutException timeout) { - interruptedQueryCount.incrementAndGet(); + timedOutQueryCount.incrementAndGet(); queryLifecycle.emitLogsAndMetrics(timeout, req.getRemoteAddr(), -1); return ioReaderWriter.gotTimeout(timeout); } @@ -514,4 +515,10 @@ public long getInterruptedQueryCount() { return interruptedQueryCount.get(); } + + @Override + public long getTimedOutQueryCount() + { + return timedOutQueryCount.get(); + } } diff --git a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java index c3b956c23d92..c58be1e04c78 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java +++ b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsMonitor.java @@ -48,13 +48,16 @@ public boolean doMonitor(ServiceEmitter emitter) final long successfulQueryCount = statsProvider.getSuccessfulQueryCount(); final long failedQueryCount = statsProvider.getFailedQueryCount(); final long interruptedQueryCount = statsProvider.getInterruptedQueryCount(); + final long timedOutQueryCount = statsProvider.getTimedOutQueryCount(); + Map diff = keyedDiff.to( "queryCountStats", ImmutableMap.of( - "query/count", successfulQueryCount + failedQueryCount + interruptedQueryCount, + "query/count", successfulQueryCount + failedQueryCount + interruptedQueryCount + timedOutQueryCount, "query/success/count", successfulQueryCount, "query/failed/count", failedQueryCount, - "query/interrupted/count", interruptedQueryCount + "query/interrupted/count", interruptedQueryCount, + "query/timeout/count", timedOutQueryCount ) ); if (diff != null) { diff --git a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsProvider.java b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsProvider.java index 9c920857f801..15e608c499dc 100644 --- a/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsProvider.java +++ b/server/src/main/java/org/apache/druid/server/metrics/QueryCountStatsProvider.java @@ -21,9 +21,23 @@ public interface QueryCountStatsProvider { + /** + * Returns the number of successful queries processed during the emission period. + */ long getSuccessfulQueryCount(); + /** + * Returns the number of failed queries during the emission period. + */ long getFailedQueryCount(); + /** + * Returns the number of queries interrupted due to cancellation during the emission period. + */ long getInterruptedQueryCount(); + + /** + * Returns the number of timed out queries during the emission period. + */ + long getTimedOutQueryCount(); } diff --git a/server/src/test/java/org/apache/druid/server/QueryResourceTest.java b/server/src/test/java/org/apache/druid/server/QueryResourceTest.java index 5c501077ab26..899aa14be940 100644 --- a/server/src/test/java/org/apache/druid/server/QueryResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/QueryResourceTest.java @@ -684,6 +684,8 @@ public QueryRunner getQueryRunnerForSegments(Query query, Iterable resultMap = emitter.getEvents() + .stream() + .collect(Collectors.toMap( + event -> (String) event.toMap().get("metric"), + event -> (Long) event.toMap().get("value") + )); + Assert.assertEquals(5, resultMap.size()); + Assert.assertEquals(1L, (long) resultMap.get("query/success/count")); + Assert.assertEquals(2L, (long) resultMap.get("query/failed/count")); + Assert.assertEquals(3L, (long) resultMap.get("query/interrupted/count")); + Assert.assertEquals(4L, (long) resultMap.get("query/timeout/count")); + Assert.assertEquals(10L, (long) resultMap.get("query/count")); + + } +}