From a4626c572565b18776077e8435e6f3feef8ca3b5 Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Thu, 10 Oct 2024 22:20:47 +0800 Subject: [PATCH] [fix](delete) Fix potential delete job stuck util timeout if exception happend in FE DeleteJob execution (#41672) ## Proposed changes Fail task should also count down for the count down latch to prevent job stuck. --- .../src/main/java/org/apache/doris/master/MasterImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 4870b3a5820c9c..a1acd72974d737 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -433,6 +433,9 @@ private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) thro } catch (MetaNotFoundException e) { AgentTaskQueue.removeTask(backendId, TTaskType.REALTIME_PUSH, signature); LOG.warn("finish push replica error", e); + if (pushTask.getPushType() == TPushType.DELETE) { + pushTask.countDownLatch(backendId, pushTabletId); + } } finally { olapTable.writeUnlock(); }