From 552299b6ecc634ea7f8168620136d6e6f4e593f8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 30 Jan 2018 14:02:24 -0800 Subject: [PATCH 1/2] fix RemoteTaskRunner terminating lazy workers below autoscaler minNumWorkers value --- .../indexing/overlord/RemoteTaskRunner.java | 3 +++ .../overlord/RemoteTaskRunnerTest.java | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java index 6524a2ac8004..038f12693b22 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java @@ -1235,6 +1235,9 @@ private void taskComplete( @Override public Collection markWorkersLazy(Predicate isLazyWorker, int maxWorkers) { + if (maxWorkers < 1) { + return Collections.emptyList(); + } // status lock is used to prevent any tasks being assigned to the worker while we mark it lazy synchronized (statusLock) { Iterator iterator = zkWorkers.keySet().iterator(); diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java index ae57a300f4f9..a681c2b88984 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -550,6 +550,24 @@ public boolean apply(ImmutableWorkerInfo input) Assert.assertEquals(1, remoteTaskRunner.getLazyWorkers().size()); } + @Test + public void testFindLazyWorkerNotRunningAnyTaskButWithZeroMaxWorkers() throws Exception + { + doSetup(); + Collection lazyworkers = remoteTaskRunner.markWorkersLazy( + new Predicate() + { + @Override + public boolean apply(ImmutableWorkerInfo input) + { + return true; + } + }, 0 + ); + Assert.assertEquals(0, lazyworkers.size()); + Assert.assertEquals(0, remoteTaskRunner.getLazyWorkers().size()); + } + @Test public void testWorkerZKReconnect() throws Exception { From 3a1099ef73c5daed79cad339ed73f03bc8b81eb4 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 31 Jan 2018 15:09:43 -0800 Subject: [PATCH 2/2] add comment --- .../main/java/io/druid/indexing/overlord/RemoteTaskRunner.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java index 038f12693b22..e9e623cc8016 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java @@ -1235,6 +1235,8 @@ private void taskComplete( @Override public Collection markWorkersLazy(Predicate isLazyWorker, int maxWorkers) { + // skip the lock and bail early if we should not mark any workers lazy (e.g. number + // of current workers is at or below the minNumWorkers of autoscaler config) if (maxWorkers < 1) { return Collections.emptyList(); }