From 8e084ad4fb782e5b2cafb1c806789ea8bea3ce38 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 3 May 2018 09:59:01 -0700 Subject: [PATCH] SegmentLoadDropHandler: Fix deadlock when segments have errors loading on startup. (#5735) The "lock" object was used to synchronize start/stop as well as synchronize removals from segmentsToDelete (when a segment is done dropping). This could cause a deadlock if a segment-load throws an exception during loadLocalCache. loadLocalCache is run by start() while it holds the lock, but then it spawns loading threads, and those threads will try to acquire the "segmentsToDelete" lock if they want to drop a corrupt segments. I don't see any reason for these two locks to be the same lock, so I split them. --- .../coordination/SegmentLoadDropHandler.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java index 04de12d50970..5774c4bc8e61 100644 --- a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java +++ b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java @@ -73,7 +73,11 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler { private static final EmittingLogger log = new EmittingLogger(SegmentLoadDropHandler.class); - private final Object lock = new Object(); + // Synchronizes removals from segmentsToDelete + private final Object segmentDeleteLock = new Object(); + + // Synchronizes start/stop of this object. + private final Object startStopLock = new Object(); private final ObjectMapper jsonMapper; private final SegmentLoaderConfig config; @@ -137,7 +141,7 @@ public SegmentLoadDropHandler( @LifecycleStart public void start() throws IOException { - synchronized (lock) { + synchronized (startStopLock) { if (started) { return; } @@ -159,7 +163,7 @@ public void start() throws IOException @LifecycleStop public void stop() { - synchronized (lock) { + synchronized (startStopLock) { if (!started) { return; } @@ -296,7 +300,7 @@ each time when addSegment() is called, it has to wait for the lock in order to m things slow. Given that in most cases segmentsToDelete.contains(segment) returns false, it will save a lot of cost of acquiring lock by doing the "contains" check outside the synchronized block. */ - synchronized (lock) { + synchronized (segmentDeleteLock) { segmentsToDelete.remove(segment); } } @@ -423,7 +427,7 @@ private void removeSegment( public void run() { try { - synchronized (lock) { + synchronized (segmentDeleteLock) { if (segmentsToDelete.remove(segment)) { segmentManager.dropSegment(segment);