From 336d66f779e7401261bed5990d5afdd727c0f48d Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 2 May 2018 15:10:25 -0700 Subject: [PATCH] SegmentLoadDropHandler: Fix deadlock when segments have errors loading on startup. The "lock" object was used to synchronize start/stop as well as synchronize removals from segmentsToDelete (when a segment is done dropping). This could cause a deadlock if a segment-load throws an exception during loadLocalCache. loadLocalCache is run by start() while it holds the lock, but then it spawns loading threads, and those threads will try to acquire the "segmentsToDelete" lock if they want to drop a corrupt segments. I don't see any reason for these two locks to be the same lock, so I split them. --- .../coordination/SegmentLoadDropHandler.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java index d0985b7cfee0..312ef20769a6 100644 --- a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java +++ b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java @@ -73,7 +73,11 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler { private static final EmittingLogger log = new EmittingLogger(SegmentLoadDropHandler.class); - private final Object lock = new Object(); + // Synchronizes removals from segmentsToDelete + private final Object segmentDeleteLock = new Object(); + + // Synchronizes start/stop of this object. + private final Object startStopLock = new Object(); private final ObjectMapper jsonMapper; private final SegmentLoaderConfig config; @@ -137,7 +141,7 @@ public SegmentLoadDropHandler( @LifecycleStart public void start() throws IOException { - synchronized (lock) { + synchronized (startStopLock) { if (started) { return; } @@ -159,7 +163,7 @@ public void start() throws IOException @LifecycleStop public void stop() { - synchronized (lock) { + synchronized (startStopLock) { if (!started) { return; } @@ -296,7 +300,7 @@ each time when addSegment() is called, it has to wait for the lock in order to m things slow. Given that in most cases segmentsToDelete.contains(segment) returns false, it will save a lot of cost of acquiring lock by doing the "contains" check outside the synchronized block. */ - synchronized (lock) { + synchronized (segmentDeleteLock) { segmentsToDelete.remove(segment); } } @@ -423,7 +427,7 @@ private void removeSegment( public void run() { try { - synchronized (lock) { + synchronized (segmentDeleteLock) { if (segmentsToDelete.remove(segment)) { segmentManager.dropSegment(segment);