From a5ebcac4ceb14eb8342ce085965b370186b4aba9 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 17 Jul 2017 16:56:45 -0700 Subject: [PATCH] Use blocking=false and add try logging. --- .../src/main/scala/org/apache/spark/MapOutputTracker.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 5d48bc7c96555..7f760a59bda2f 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -194,7 +194,12 @@ private class ShuffleStatus(numPartitions: Int) { */ def invalidateSerializedMapOutputStatusCache(): Unit = synchronized { if (cachedSerializedBroadcast != null) { - cachedSerializedBroadcast.destroy() + // Prevent errors during broadcast cleanup from crashing the DAGScheduler (see SPARK-21444) + Utils.tryLogNonFatalError { + // Use `blocking = false` so that this operation doesn't hang while trying to send cleanup + // RPCs to dead executors. + cachedSerializedBroadcast.destroy(blocking = false) + } cachedSerializedBroadcast = null } cachedSerializedMapStatus = null