From 81d8776e7c1a57b1b4abebc5e73830b761961914 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Sun, 28 Apr 2019 12:58:09 +0800 Subject: [PATCH] Republish txn if error occured during publish --- be/src/agent/task_worker_pool.cpp | 6 +++--- fe/src/main/java/org/apache/doris/master/MasterImpl.java | 6 +++++- .../main/java/org/apache/doris/task/PublishVersionTask.java | 5 +++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 89c8af417c6923..68918a27937190 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -812,9 +812,9 @@ void* TaskWorkerPool::_publish_version_worker_thread_callback(void* arg_this) { TFinishTaskRequest finish_task_request; if (res != OLAP_SUCCESS) { - // if publish failed, should also set status to ok, or fe will not deal with - // partial successfully tablet - status_code = TStatusCode::OK; + // if publish failed, return failed, fe will ignore this error and + // check error tablet ids and fe will also republish this task + status_code = TStatusCode::RUNTIME_ERROR; LOG(WARNING) << "publish version failed. signature:" << agent_task_req.signature; error_msgs.push_back("publish version failed"); finish_task_request.__set_error_tablet_ids(error_tablet_ids); diff --git a/fe/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/src/main/java/org/apache/doris/master/MasterImpl.java index 9b147ffa6161c0..4f3bfde6fcee5d 100644 --- a/fe/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/src/main/java/org/apache/doris/master/MasterImpl.java @@ -132,7 +132,7 @@ public TMasterResult finishTask(TFinishTaskRequest request) throws TException { // We start to let FE perceive the task's error msg if (taskType != TTaskType.MAKE_SNAPSHOT && taskType != TTaskType.UPLOAD && taskType != TTaskType.DOWNLOAD && taskType != TTaskType.MOVE - && taskType != TTaskType.CLONE) { + && taskType != TTaskType.CLONE && taskType != TTaskType.PUBLISH_VERSION) { return result; } } @@ -562,6 +562,10 @@ private void finishPublishVersion(AgentTask task, TFinishTaskRequest request) { PublishVersionTask publishVersionTask = (PublishVersionTask) task; publishVersionTask.addErrorTablets(errorTabletIds); publishVersionTask.setIsFinished(true); + if (request.getTask_status().getStatus_code() != TStatusCode.OK) { + // not remove the task from queue and be will retry + return; + } AgentTaskQueue.removeTask(publishVersionTask.getBackendId(), publishVersionTask.getTaskType(), publishVersionTask.getSignature()); diff --git a/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java b/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java index c65cfe05a96409..76136c22200cfa 100644 --- a/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java +++ b/fe/src/main/java/org/apache/doris/task/PublishVersionTask.java @@ -58,11 +58,12 @@ public List getPartitionVersionInfos() { return partitionVersionInfos; } - public List getErrorTablets() { + public synchronized List getErrorTablets() { return errorTablets; } - public void addErrorTablets(List errorTablets) { + public synchronized void addErrorTablets(List errorTablets) { + this.errorTablets.clear(); if (errorTablets == null) { return; }