From 0bf9647047ae8bb8962ae8470c964492a04aab3f Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Thu, 10 Oct 2024 22:20:47 +0800 Subject: [PATCH] [fix](delete) Fix potential delete job stuck util timeout if exception happend in FE DeleteJob execution (#41672) ## Proposed changes Fail task should also count down for the count down latch to prevent job stuck. --- .../src/main/java/org/apache/doris/master/MasterImpl.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 3e63a5421f798f..27469301e17615 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -423,6 +423,9 @@ private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) thro } catch (MetaNotFoundException e) { AgentTaskQueue.removeTask(backendId, TTaskType.REALTIME_PUSH, signature); LOG.warn("finish push replica error", e); + if (pushTask.getPushType() == TPushType.DELETE) { + pushTask.countDownLatch(backendId, pushTabletId); + } } finally { olapTable.writeUnlock(); }