From 228d69bb36f08fd21c4f8580b93c29ff0cbe7c7d Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Thu, 24 Aug 2017 16:39:21 -0700 Subject: [PATCH 1/6] Add graceful shutdown timeout --- docs/content/configuration/broker.md | 1 + docs/content/configuration/historical.md | 1 + .../druid/server/initialization/ServerConfig.java | 10 ++++++++++ .../initialization/jetty/JettyServerModule.java | 13 +++++++++++++ 4 files changed, 25 insertions(+) diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index 349d96442ffd..734267ededa7 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -39,6 +39,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| |`druid.server.http.maxScatterGatherBytes`|Maximum number of bytes gathered from data nodes such as historicals and realtime processes to execute a query. This is an advance configuration that allows to protect in case broker is under heavy load and not utilizing the data gathered in memory fast enough and leading to OOMs. This limit can be further reduced at query time using `maxScatterGatherBytes` in the context. Note that having large limit is not necessarily bad if broker is never under heavy concurrent load in which case data gathered is processed quickly and freeing up the memory used.|Long.MAX_VALUE| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT5s| |`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time processes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|20| |`druid.broker.http.compressionCodec`|Compression codec the Broker uses to communicate with historical and real-time processes. May be "gzip" or "identity".|gzip| |`druid.broker.http.readTimeout`|The timeout for data reads from historical and real-time processes.|PT15M| diff --git a/docs/content/configuration/historical.md b/docs/content/configuration/historical.md index 28319fd230f3..53250c6fbab8 100644 --- a/docs/content/configuration/historical.md +++ b/docs/content/configuration/historical.md @@ -50,6 +50,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.numThreads`|Number of threads for HTTP requests.|max(10, (Number of cores * 17) / 16 + 2) + 30| |`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT5s| #### Processing diff --git a/server/src/main/java/io/druid/server/initialization/ServerConfig.java b/server/src/main/java/io/druid/server/initialization/ServerConfig.java index 82e85318ee58..aeaa3a8fc56c 100644 --- a/server/src/main/java/io/druid/server/initialization/ServerConfig.java +++ b/server/src/main/java/io/druid/server/initialization/ServerConfig.java @@ -52,6 +52,10 @@ public class ServerConfig @JsonProperty private boolean tls = false; + @JsonProperty + @NotNull + private Period gracefulShutdownTimeout = new Period("PT5s"); + public int getNumThreads() { return numThreads; @@ -82,6 +86,11 @@ public boolean isTls() return tls; } + public Period getGracefulShutdownTimeout() + { + return gracefulShutdownTimeout; + } + @Override public boolean equals(Object o) { @@ -116,6 +125,7 @@ public String toString() ", maxScatterGatherBytes=" + maxScatterGatherBytes + ", plaintext=" + plaintext + ", tls=" + tls + + ", gracefulStopTimeout=" + gracefulShutdownTimeout + '}'; } } diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 424eb21d322b..fec725819d5d 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -85,6 +85,7 @@ public class JettyServerModule extends JerseyServletModule private static final Logger log = new Logger(JettyServerModule.class); private static final AtomicInteger activeConnections = new AtomicInteger(); + private static final String GRACEFUL_SHUTDOWN_TIME = "graceful_shutdown_time"; @Override protected void configureServlets() @@ -227,6 +228,7 @@ static Server makeJettyServer(DruidNode node, ServerConfig config, TLSServerConf } server.setConnectors(connectors); + server.setAttribute(GRACEFUL_SHUTDOWN_TIME, config.getGracefulShutdownTimeout().getMillis()); return server; } @@ -253,6 +255,17 @@ public void start() throws Exception @Override public void stop() { + if (activeConnections.get() != 0) { + long graceFullShutDownTime = (int)server.getAttribute(GRACEFUL_SHUTDOWN_TIME); + log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", + graceFullShutDownTime, activeConnections.get()); + try { + Thread.sleep(graceFullShutDownTime); + } catch (InterruptedException e) { + log.error("Sleep has been interrupted, while waiting for active requests to be zero"); + } + } + log.info("Stopping Jetty Server with active requests=[%s]", activeConnections.get()); try { server.stop(); } From e9d329a60c47f776007f89c130162e3236d19d2b Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Thu, 24 Aug 2017 16:59:30 -0700 Subject: [PATCH 2/6] Handle interruptedException --- .../server/initialization/jetty/JettyServerModule.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index fec725819d5d..71bac826a5e0 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -256,13 +256,15 @@ public void start() throws Exception public void stop() { if (activeConnections.get() != 0) { - long graceFullShutDownTime = (int)server.getAttribute(GRACEFUL_SHUTDOWN_TIME); + long graceFullShutDownTime = (int) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", graceFullShutDownTime, activeConnections.get()); try { Thread.sleep(graceFullShutDownTime); - } catch (InterruptedException e) { + } + catch (InterruptedException e) { log.error("Sleep has been interrupted, while waiting for active requests to be zero"); + Thread.currentThread().interrupt(); } } log.info("Stopping Jetty Server with active requests=[%s]", activeConnections.get()); From 4f09b3fb76a1ab7f70358fd2c7756cf876fcb5c8 Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Mon, 28 Aug 2017 20:26:08 -0700 Subject: [PATCH 3/6] Incorporate code review comments --- .../initialization/jetty/JettyServerModule.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 71bac826a5e0..e3ed94376573 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -228,7 +228,7 @@ static Server makeJettyServer(DruidNode node, ServerConfig config, TLSServerConf } server.setConnectors(connectors); - server.setAttribute(GRACEFUL_SHUTDOWN_TIME, config.getGracefulShutdownTimeout().getMillis()); + server.setAttribute(GRACEFUL_SHUTDOWN_TIME, config.getGracefulShutdownTimeout().toStandardDuration().getMillis()); return server; } @@ -256,17 +256,24 @@ public void start() throws Exception public void stop() { if (activeConnections.get() != 0) { - long graceFullShutDownTime = (int) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); + long graceFulShutDownTime = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", - graceFullShutDownTime, activeConnections.get()); + graceFulShutDownTime, activeConnections.get()); try { - Thread.sleep(graceFullShutDownTime); + Thread.sleep(graceFulShutDownTime); } catch (InterruptedException e) { log.error("Sleep has been interrupted, while waiting for active requests to be zero"); + stopImmediately(); Thread.currentThread().interrupt(); + return; } } + stopImmediately(); + } + + private void stopImmediately() + { log.info("Stopping Jetty Server with active requests=[%s]", activeConnections.get()); try { server.stop(); From 42a7d314b2e0d2ded8ce6ebb5c48aff25dd52c4c Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Tue, 29 Aug 2017 12:29:35 -0700 Subject: [PATCH 4/6] Address code review comments --- .../server/initialization/jetty/JettyServerModule.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index e3ed94376573..437c95e5cc8f 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -255,12 +255,12 @@ public void start() throws Exception @Override public void stop() { - if (activeConnections.get() != 0) { - long graceFulShutDownTime = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); + if (activeConnections.get() > 0) { + long gracefulShutDownTime = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", - graceFulShutDownTime, activeConnections.get()); + gracefulShutDownTime, activeConnections.get()); try { - Thread.sleep(graceFulShutDownTime); + Thread.sleep(gracefulShutDownTime); } catch (InterruptedException e) { log.error("Sleep has been interrupted, while waiting for active requests to be zero"); From 127ed338ac47ed7408dea91d77fb9aad7f8616b6 Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Tue, 5 Sep 2017 15:36:58 -0700 Subject: [PATCH 5/6] Poll for activeConnections to be zero --- .../server/initialization/ServerConfig.java | 2 +- .../initialization/jetty/JettyServerModule.java | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/io/druid/server/initialization/ServerConfig.java b/server/src/main/java/io/druid/server/initialization/ServerConfig.java index aeaa3a8fc56c..8ec704fff60d 100644 --- a/server/src/main/java/io/druid/server/initialization/ServerConfig.java +++ b/server/src/main/java/io/druid/server/initialization/ServerConfig.java @@ -54,7 +54,7 @@ public class ServerConfig @JsonProperty @NotNull - private Period gracefulShutdownTimeout = new Period("PT5s"); + private Period gracefulShutdownTimeout = new Period("PT15s"); public int getNumThreads() { diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 437c95e5cc8f..99a6257fefda 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -85,7 +85,7 @@ public class JettyServerModule extends JerseyServletModule private static final Logger log = new Logger(JettyServerModule.class); private static final AtomicInteger activeConnections = new AtomicInteger(); - private static final String GRACEFUL_SHUTDOWN_TIME = "graceful_shutdown_time"; + private static final String GRACEFUL_SHUTDOWN_TIMEOUT = "graceful_shutdown_timeout"; @Override protected void configureServlets() @@ -228,7 +228,7 @@ static Server makeJettyServer(DruidNode node, ServerConfig config, TLSServerConf } server.setConnectors(connectors); - server.setAttribute(GRACEFUL_SHUTDOWN_TIME, config.getGracefulShutdownTimeout().toStandardDuration().getMillis()); + server.setAttribute(GRACEFUL_SHUTDOWN_TIMEOUT, config.getGracefulShutdownTimeout().toStandardDuration().getMillis()); return server; } @@ -255,12 +255,16 @@ public void start() throws Exception @Override public void stop() { - if (activeConnections.get() > 0) { - long gracefulShutDownTime = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIME); + long startTime = System.currentTimeMillis(); + long gracefulShutDownTimeout = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIMEOUT); + + while (activeConnections.get() > 0 && + System.currentTimeMillis() - startTime < gracefulShutDownTimeout) { log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", - gracefulShutDownTime, activeConnections.get()); + gracefulShutDownTimeout, activeConnections.get()); + try { - Thread.sleep(gracefulShutDownTime); + Thread.sleep(500); } catch (InterruptedException e) { log.error("Sleep has been interrupted, while waiting for active requests to be zero"); @@ -269,6 +273,7 @@ public void stop() return; } } + stopImmediately(); } From 0d07ba55862462e2980597fb7306af7cb5bd6670 Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Wed, 13 Sep 2017 11:04:46 -0700 Subject: [PATCH 6/6] Use statistics handler to get active requests --- docs/content/configuration/broker.md | 2 +- docs/content/configuration/historical.md | 2 +- .../jetty/JettyServerModule.java | 47 +++++++++++++------ .../cli/QueryJettyServerInitializer.java | 20 +++++--- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index 734267ededa7..2bc8a94022d0 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -39,7 +39,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| |`druid.server.http.maxScatterGatherBytes`|Maximum number of bytes gathered from data nodes such as historicals and realtime processes to execute a query. This is an advance configuration that allows to protect in case broker is under heavy load and not utilizing the data gathered in memory fast enough and leading to OOMs. This limit can be further reduced at query time using `maxScatterGatherBytes` in the context. Note that having large limit is not necessarily bad if broker is never under heavy concurrent load in which case data gathered is processed quickly and freeing up the memory used.|Long.MAX_VALUE| -|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT5s| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT15s| |`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time processes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|20| |`druid.broker.http.compressionCodec`|Compression codec the Broker uses to communicate with historical and real-time processes. May be "gzip" or "identity".|gzip| |`druid.broker.http.readTimeout`|The timeout for data reads from historical and real-time processes.|PT15M| diff --git a/docs/content/configuration/historical.md b/docs/content/configuration/historical.md index 53250c6fbab8..b368bdad3512 100644 --- a/docs/content/configuration/historical.md +++ b/docs/content/configuration/historical.md @@ -50,7 +50,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.numThreads`|Number of threads for HTTP requests.|max(10, (Number of cores * 17) / 16 + 2) + 30| |`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| -|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT5s| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|PT15s| #### Processing diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 99a6257fefda..af826eab264e 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -67,6 +67,7 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; +import org.eclipse.jetty.server.handler.StatisticsHandler; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler; @@ -227,6 +228,7 @@ static Server makeJettyServer(DruidNode node, ServerConfig config, TLSServerConf connector.setConnectionFactories(monitoredConnFactories); } + server.setHandler(new StatisticsHandler()); server.setConnectors(connectors); server.setAttribute(GRACEFUL_SHUTDOWN_TIMEOUT, config.getGracefulShutdownTimeout().toStandardDuration().getMillis()); @@ -252,26 +254,44 @@ public void start() throws Exception server.start(); } + private Handler getStatisticsHandler() + { + for (Handler handler : server.getHandlers()) { + if (handler.getClass().equals(StatisticsHandler.class)) { + return handler; + } + } + + return null; + } + @Override public void stop() { - long startTime = System.currentTimeMillis(); - long gracefulShutDownTimeout = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIMEOUT); + StatisticsHandler statisticsHandler = (StatisticsHandler) getStatisticsHandler(); - while (activeConnections.get() > 0 && - System.currentTimeMillis() - startTime < gracefulShutDownTimeout) { - log.info("Waiting for [%s] milliseconds for active requests to be zero, current active requests=[%s]", - gracefulShutDownTimeout, activeConnections.get()); + if (statisticsHandler != null) { + long startTime = System.currentTimeMillis(); + long gracefulShutDownTimeout = (long) server.getAttribute(GRACEFUL_SHUTDOWN_TIMEOUT); - try { - Thread.sleep(500); + if (statisticsHandler.getRequestsActive() > 0) { + log.info("Waiting for upto [%s] milliseconds for active requests to be zero, current active requests=[%s]", + gracefulShutDownTimeout, statisticsHandler.getRequestsActive()); } - catch (InterruptedException e) { - log.error("Sleep has been interrupted, while waiting for active requests to be zero"); - stopImmediately(); - Thread.currentThread().interrupt(); - return; + + while (statisticsHandler.getRequestsActive() > 0 && + System.currentTimeMillis() - startTime < gracefulShutDownTimeout) { + try { + Thread.sleep(500); + } + catch (InterruptedException e) { + log.error("Sleep has been interrupted, while waiting for active requests to be zero"); + stopImmediately(); + Thread.currentThread().interrupt(); + return; + } } + log.info("Stopping Jetty Server with active requests=[%s]", statisticsHandler.getRequestsActive()); } stopImmediately(); @@ -279,7 +299,6 @@ public void stop() private void stopImmediately() { - log.info("Stopping Jetty Server with active requests=[%s]", activeConnections.get()); try { server.stop(); } diff --git a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java index d3d3f8b9fd43..7feafe1d95cc 100644 --- a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java @@ -57,13 +57,21 @@ public void initialize(Server server, Injector injector) root.addFilter(GuiceFilter.class, "/*", null); final HandlerList handlerList = new HandlerList(); - final Handler[] handlers = new Handler[extensionHandlers.size() + 2]; - handlers[0] = JettyServerInitUtils.getJettyRequestLogHandler(); - handlers[handlers.length - 1] = JettyServerInitUtils.wrapWithDefaultGzipHandler(root); - for (int i = 0; i < extensionHandlers.size(); i++) { - handlers[i + 1] = extensionHandlers.get(i); + // Do not change the order of the handlers that have already been added + for (Handler handler : server.getHandlers()) { + handlerList.addHandler(handler); } - handlerList.setHandlers(handlers); + + handlerList.addHandler(JettyServerInitUtils.getJettyRequestLogHandler()); + + // Add all extension handlers + for (Handler handler : extensionHandlers) { + handlerList.addHandler(handler); + } + + // Add Gzip handler at the very end + handlerList.addHandler(JettyServerInitUtils.wrapWithDefaultGzipHandler(root)); + server.setHandler(handlerList); } }