From 945ebac67c317a566222cebde85db9749804bfec Mon Sep 17 00:00:00 2001 From: zymap Date: Wed, 7 Sep 2022 15:51:24 +0800 Subject: [PATCH 1/3] [fix][tiered-storage] Don't cleanup data when offload met BadVersion --- *Motivation* There have two ways that will cause the offload data cleanup. One is met offload conflict exception, and another is completeLedgerInfoForOffloaded reaches max retry time and throws zookeeper exceptions. We retry the zookeeper operation on connection loss exception. We should be careful about this exception, because we may loss data if the metadata update successfully. When a MetaStore exception happens, we can not make sure the metadata update is failed or not. Because we have a retry on the connection loss, it is possible to get a BadVersion or other exception after retrying. So we don't clean up the data if this happens. *Modification* - don't delete data if has meta store exception --- .../apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java index 84a7377841c54..55ebda23daaf3 100644 --- a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java +++ b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java @@ -3006,6 +3006,13 @@ private void offloadLoop(CompletableFuture promise, Queue Date: Wed, 7 Sep 2022 20:27:28 +0800 Subject: [PATCH 2/3] log error when skip deleting --- .../bookkeeper/mledger/impl/ManagedLedgerImpl.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java index 55ebda23daaf3..da754ec6ecb7b 100644 --- a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java +++ b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java @@ -3004,14 +3004,18 @@ private void offloadLoop(CompletableFuture promise, Queue { if (exception != null) { - log.error("[{}] Failed to offload data for the ledgerId {}", - name, ledgerId, exception); - if (exception instanceof MetaStoreException) { + Throwable e = FutureUtil.unwrapCompletionException(exception); + if (e instanceof MetaStoreException) { // When a MetaStore exception happens, we can not make sure the metadata // update is failed or not. Because we have a retry on the connection loss, // it is possible to get a BadVersion or other exception after retrying. // So we don't clean up the data if it has metadata operation exception. + log.error("[{}] Failed to update offloaded metadata for the ledgerId {}", + name, ledgerId, exception); return; + } else { + log.error("[{}] Failed to offload data for the ledgerId {}", + name, ledgerId, exception); } cleanupOffloaded( ledgerId, uuid, From ff1eac4735296c44eb4117bc432767b2c2bc5479 Mon Sep 17 00:00:00 2001 From: zymap Date: Fri, 9 Sep 2022 08:49:32 +0800 Subject: [PATCH 3/3] improve logs --- .../apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java index da754ec6ecb7b..ea0206fe26d56 100644 --- a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java +++ b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java @@ -3010,11 +3010,13 @@ private void offloadLoop(CompletableFuture promise, Queue