From b5f21a0074e078ff3df3a8c7cdd5933b6312ee30 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Thu, 12 Sep 2024 15:26:34 -0700 Subject: [PATCH 01/14] HDDS-11411. Snapshot garbage collection should not run when the keys are moved from a deleted snapshot to the next snapshot in the chain Change-Id: Ib48b8aae241efff80fbc8636d3c5e72a49a08a30 --- .../src/main/resources/ozone-default.xml | 9 ++ .../apache/hadoop/ozone/om/OMConfigKeys.java | 2 + .../src/main/proto/OmClientProtocol.proto | 8 ++ .../hadoop/ozone/om/KeyManagerImpl.java | 8 +- .../ozone/om/OmMetadataManagerImpl.java | 75 ++++-------- .../key/OMDirectoriesPurgeRequestWithFSO.java | 45 +++++-- .../om/request/key/OMKeyPurgeRequest.java | 58 ++++++--- .../OMSnapshotMoveDeletedKeysRequest.java | 4 +- .../snapshot/OMSnapshotPurgeRequest.java | 6 +- .../OMDirectoriesPurgeResponseWithFSO.java | 4 + .../service/AbstractKeyDeletingService.java | 50 ++++---- .../om/service/DirectoryDeletingService.java | 36 +++++- .../ozone/om/service/KeyDeletingService.java | 26 ++-- .../om/service/SnapshotDeletingService.java | 7 +- .../SnapshotDirectoryCleaningService.java | 5 +- .../ozone/om/snapshot/SnapshotUtils.java | 112 +++++++++++++++--- 16 files changed, 303 insertions(+), 152 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index fd601e1a7d3e..2d8dfa212db1 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3743,6 +3743,15 @@ + + ozone.snapshot.deep.cleaning.enabled + true + OZONE, PERFORMANCE, OM + + Flag to enable/disable snapshot deep cleaning. + + + ozone.scm.event.ContainerReport.thread.pool.size 10 diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 46becc9e64b5..161b1c555ff2 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -403,6 +403,8 @@ private OMConfigKeys() { /** * Configuration properties for Snapshot Directory Service. */ + public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; + public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = true; public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = "ozone.snapshot.directory.service.interval"; public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index eefcfa7552ca..04fec5399ddf 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -1379,6 +1379,8 @@ message PurgeKeysRequest { // if set, will purge keys in a snapshot DB instead of active DB optional string snapshotTableKey = 2; repeated SnapshotMoveKeyInfos keysToUpdate = 3; + // previous snapshotID can also be null & this field would be absent in older requests. + optional NullableUUID expectedPreviousSnapshotID = 4; } message PurgeKeysResponse { @@ -1401,6 +1403,12 @@ message PurgePathsResponse { message PurgeDirectoriesRequest { repeated PurgePathRequest deletedPath = 1; optional string snapshotTableKey = 2; + // previous snapshotID can also be null & this field would be absent in older requests. + optional NullableUUID expectedPreviousSnapshotID = 3; +} + +message NullableUUID { + optional hadoop.hdds.UUID uuid = 1; } message PurgeDirectoriesResponse { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index 6d276d95284e..9c58da899502 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -141,6 +141,8 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_TIMEOUT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_OPEN_KEY_CLEANUP_SERVICE_TIMEOUT_DEFAULT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_TIMEOUT; @@ -228,6 +230,8 @@ public KeyManagerImpl(OzoneManager om, ScmClient scmClient, @Override public void start(OzoneConfiguration configuration) { + boolean isSnapshotDeepCleaningEnabled = configuration.getBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, + OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT); if (keyDeletingService == null) { long blockDeleteInterval = configuration.getTimeDuration( OZONE_BLOCK_DELETING_SERVICE_INTERVAL, @@ -239,7 +243,7 @@ public void start(OzoneConfiguration configuration) { TimeUnit.MILLISECONDS); keyDeletingService = new KeyDeletingService(ozoneManager, scmClient.getBlockClient(), this, blockDeleteInterval, - serviceTimeout, configuration); + serviceTimeout, configuration, isSnapshotDeepCleaningEnabled); keyDeletingService.start(); } @@ -312,7 +316,7 @@ public void start(OzoneConfiguration configuration) { } } - if (snapshotDirectoryCleaningService == null && + if (isSnapshotDeepCleaningEnabled && snapshotDirectoryCleaningService == null && ozoneManager.isFilesystemSnapshotEnabled()) { long dirDeleteInterval = configuration.getTimeDuration( OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index ee92dbc2fde9..9f5a488e3c34 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -113,6 +113,7 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.BUCKET_NOT_FOUND; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND; +import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INTERNAL_ERROR; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.VOLUME_NOT_FOUND; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.om.service.SnapshotDeletingService.isBlockLocationInfoSame; @@ -1595,11 +1596,20 @@ public PendingKeysDeletion getPendingDeletionKeys(final int keyCount, String[] keySplit = kv.getKey().split(OM_KEY_PREFIX); String bucketKey = getBucketKey(keySplit[1], keySplit[2]); OmBucketInfo bucketInfo = getBucketTable().get(bucketKey); - + SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), + bucketInfo.getBucketName(), ozoneManager, snapshotChainManager); + // previous snapshot is not active or it has not been flushed to disk then don't process the key in this + // iteration. + if (previousSnapshotInfo != null && + (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || + !OmSnapshotManager.areSnapshotChangesFlushedToDB(ozoneManager.getMetadataManager(), + previousSnapshotInfo))) { + continue; + } // Get the latest snapshot in snapshot path. - try (ReferenceCounted - rcLatestSnapshot = getLatestActiveSnapshot( - keySplit[1], keySplit[2], omSnapshotManager)) { + try (ReferenceCounted rcLatestSnapshot = previousSnapshotInfo == null ? null : + omSnapshotManager.getSnapshot(previousSnapshotInfo.getVolumeName(), + previousSnapshotInfo.getBucketName(), previousSnapshotInfo.getName())) { // Multiple keys with the same path can be queued in one DB entry RepeatedOmKeyInfo infoList = kv.getValue(); @@ -1688,6 +1698,14 @@ public PendingKeysDeletion getPendingDeletionKeys(final int keyCount, infoList.getOmKeyInfoList().size()) { keyBlocksList.addAll(blockGroupList); } + SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), + bucketInfo.getBucketName(), ozoneManager, snapshotChainManager); + if (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), + Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId))) { + throw new OMException("Snapshot chain has changed while checking for key reference " + + "Previous snapshot in chain : " + previousSnapshotInfo + " new snapshot in chain : " + + newPreviousSnapshotInfo, INTERNAL_ERROR); + } } } } @@ -1703,55 +1721,6 @@ private boolean versionExistsInPreviousSnapshot(OmKeyInfo omKeyInfo, delOmKeyInfo != null; } - /** - * Get the latest OmSnapshot for a snapshot path. - */ - public ReferenceCounted getLatestActiveSnapshot( - String volumeName, String bucketName, - OmSnapshotManager snapshotManager) - throws IOException { - - String snapshotPath = volumeName + OM_KEY_PREFIX + bucketName; - Optional latestPathSnapshot = Optional.ofNullable( - snapshotChainManager.getLatestPathSnapshotId(snapshotPath)); - - Optional snapshotInfo = Optional.empty(); - - while (latestPathSnapshot.isPresent()) { - Optional snapTableKey = latestPathSnapshot - .map(uuid -> snapshotChainManager.getTableKey(uuid)); - - snapshotInfo = snapTableKey.isPresent() ? - Optional.ofNullable(getSnapshotInfoTable().get(snapTableKey.get())) : - Optional.empty(); - - if (snapshotInfo.isPresent() && snapshotInfo.get().getSnapshotStatus() == - SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE) { - break; - } - - // Update latestPathSnapshot if current snapshot is deleted. - if (snapshotChainManager.hasPreviousPathSnapshot(snapshotPath, - latestPathSnapshot.get())) { - latestPathSnapshot = Optional.ofNullable(snapshotChainManager - .previousPathSnapshot(snapshotPath, latestPathSnapshot.get())); - } else { - latestPathSnapshot = Optional.empty(); - } - } - - Optional> rcOmSnapshot = - snapshotInfo.isPresent() ? - Optional.ofNullable( - snapshotManager.getSnapshot(volumeName, - bucketName, - snapshotInfo.get().getName()) - ) : - Optional.empty(); - - return rcOmSnapshot.orElse(null); - } - /** * Decide whether the open key is a multipart upload related key. * @param openKeyInfo open key related to multipart upload diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index dd08ff171654..a63b63725b12 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -23,13 +23,21 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; +import java.util.UUID; + +import com.google.common.collect.Maps; +import org.apache.commons.compress.utils.Lists; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OMMetrics; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.SnapshotChainManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.ozone.om.OMMetadataManager; @@ -45,8 +53,10 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; +import static org.apache.hadoop.hdds.HddsUtils.fromProtobuf; import static org.apache.hadoop.ozone.OzoneConsts.DELETED_HSYNC_KEY; import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.BUCKET_LOCK; +import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.validatePreviousSnapshotId; /** * Handles purging of keys from OM DB. @@ -66,19 +76,34 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn List purgeRequests = purgeDirsRequest.getDeletedPathList(); - - SnapshotInfo fromSnapshotInfo = null; Set> lockSet = new HashSet<>(); Map, OmBucketInfo> volBucketInfoMap = new HashMap<>(); - OMMetadataManager omMetadataManager = ozoneManager.getMetadataManager(); + OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); Map openKeyInfoMap = new HashMap<>(); - OMMetrics omMetrics = ozoneManager.getMetrics(); + OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder( + getOmRequest()); + final SnapshotInfo fromSnapshotInfo; try { - if (fromSnapshot != null) { - fromSnapshotInfo = SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot); + fromSnapshotInfo = fromSnapshot != null ? SnapshotUtils.getSnapshotInfo(ozoneManager, + fromSnapshot) : null; + // Checking if this request is an old request or new one. + if (purgeDirsRequest.hasExpectedPreviousSnapshotID()) { + // Validating previous snapshot since while purging deletes, a snapshot create request could make this purge + // directory request invalid on AOS since the deletedDirectory would be in the newly created snapshot. Adding + // subdirectories could lead to not being able to reclaim sub-files and subdirectories since the + // file/directory would be present in the newly created snapshot. + // Validating previous snapshot can ensure the chain hasn't changed. + UUID expectedPreviousSnapshotId = purgeDirsRequest.getExpectedPreviousSnapshotID().hasUuid() + ? fromProtobuf(purgeDirsRequest.getExpectedPreviousSnapshotID().getUuid()) : null; + validatePreviousSnapshotId(fromSnapshotInfo, omMetadataManager.getSnapshotChainManager(), + expectedPreviousSnapshotId); } - + } catch (IOException e) { + LOG.error("Error occured while performing OMDirectoriesPurge. ", e); + return new OMDirectoriesPurgeResponseWithFSO(createErrorOMResponse(omResponse, e)); + } + try { for (OzoneManagerProtocolProtos.PurgePathRequest path : purgeRequests) { for (OzoneManagerProtocolProtos.KeyInfo key : path.getMarkDeletedSubDirsList()) { @@ -170,12 +195,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn } } - OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder( - getOmRequest()); - OMClientResponse omClientResponse = new OMDirectoriesPurgeResponseWithFSO( + return new OMDirectoriesPurgeResponseWithFSO( omResponse.build(), purgeRequests, ozoneManager.isRatisEnabled(), getBucketLayout(), volBucketInfoMap, fromSnapshotInfo, openKeyInfoMap); - - return omClientResponse; } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java index 14c80bb7a93b..0719d6909b67 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.utils.db.cache.CacheKey; import org.apache.hadoop.hdds.utils.db.cache.CacheValue; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.ratis.server.protocol.TermIndex; import org.apache.hadoop.ozone.om.OzoneManager; @@ -42,6 +43,10 @@ import org.slf4j.LoggerFactory; import java.util.List; +import java.util.UUID; + +import static org.apache.hadoop.hdds.HddsUtils.fromProtobuf; +import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.validatePreviousSnapshotId; /** * Handles purging of keys from OM DB. @@ -58,30 +63,46 @@ public OMKeyPurgeRequest(OMRequest omRequest) { @Override public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIndex termIndex) { PurgeKeysRequest purgeKeysRequest = getOmRequest().getPurgeKeysRequest(); - List bucketDeletedKeysList = purgeKeysRequest - .getDeletedKeysList(); - List keysToUpdateList = purgeKeysRequest - .getKeysToUpdateList(); - String fromSnapshot = purgeKeysRequest.hasSnapshotTableKey() ? - purgeKeysRequest.getSnapshotTableKey() : null; - List keysToBePurgedList = new ArrayList<>(); + List bucketDeletedKeysList = purgeKeysRequest.getDeletedKeysList(); + List keysToUpdateList = purgeKeysRequest.getKeysToUpdateList(); + String fromSnapshot = purgeKeysRequest.hasSnapshotTableKey() ? purgeKeysRequest.getSnapshotTableKey() : null; OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager(); OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder( getOmRequest()); - OMClientResponse omClientResponse = null; - for (DeletedKeys bucketWithDeleteKeys : bucketDeletedKeysList) { - for (String deletedKey : bucketWithDeleteKeys.getKeysList()) { - keysToBePurgedList.add(deletedKey); + + final SnapshotInfo fromSnapshotInfo; + try { + fromSnapshotInfo = fromSnapshot != null ? SnapshotUtils.getSnapshotInfo(ozoneManager, + fromSnapshot) : null; + // Checking if this request is an old request or new one. + if (purgeKeysRequest.hasExpectedPreviousSnapshotID()) { + // Validating previous snapshot since while purging deletes, a snapshot create request could make this purge + // directory request invalid on AOS since the deletedDirectory would be in the newly created snapshot. Adding + // subdirectories could lead to not being able to reclaim sub-files and subdirectories since the + // file/directory would be present in the newly created snapshot. + // Validating previous snapshot can ensure the chain hasn't changed. + UUID expectedPreviousSnapshotId = purgeKeysRequest.getExpectedPreviousSnapshotID().hasUuid() + ? fromProtobuf(purgeKeysRequest.getExpectedPreviousSnapshotID().getUuid()) : null; + validatePreviousSnapshotId(fromSnapshotInfo, omMetadataManager.getSnapshotChainManager(), + expectedPreviousSnapshotId); } + } catch (IOException e) { + LOG.error("Error occured while performing OMDirectoriesPurge. ", e); + return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, e)); } - final SnapshotInfo fromSnapshotInfo; - try { - fromSnapshotInfo = fromSnapshot == null ? null : SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot); - } catch (IOException ex) { - return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, ex)); + List keysToBePurgedList = new ArrayList<>(); + + for (DeletedKeys bucketWithDeleteKeys : bucketDeletedKeysList) { + keysToBePurgedList.addAll(bucketWithDeleteKeys.getKeysList()); + } + + if (keysToBePurgedList.isEmpty()) { + return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, + new OMException("None of the keys can be purged be purged since a new snapshot was created for all the " + + "buckets, making this request invalid", OMException.ResultCodes.KEY_DELETION_ERROR))); } // Setting transaction info for snapshot, this is to prevent duplicate purge requests to OM from background @@ -95,10 +116,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn } catch (IOException e) { return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, e)); } - omClientResponse = new OMKeyPurgeResponse(omResponse.build(), keysToBePurgedList, fromSnapshotInfo, - keysToUpdateList); - return omClientResponse; + return new OMKeyPurgeResponse(omResponse.build(), + keysToBePurgedList, fromSnapshotInfo, keysToUpdateList); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java index 58fdb1232d31..18055bdda40c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotMoveDeletedKeysRequest.java @@ -80,9 +80,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn OmResponseUtil.getOMResponseBuilder(getOmRequest()); try { // Check the snapshot exists. - SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot.getTableKey()); + SnapshotInfo snapshotInfo = SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot.getTableKey()); - nextSnapshot = SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, ozoneManager); + nextSnapshot = SnapshotUtils.getNextSnapshot(ozoneManager, snapshotChainManager, snapshotInfo); // Get next non-deleted snapshot. List nextDBKeysList = moveDeletedKeysRequest.getNextDBKeysList(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java index 6602f52514b5..38c51d4de5c0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotPurgeRequest.java @@ -103,9 +103,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn "Snapshot purge request.", snapTableKey); continue; } - - SnapshotInfo nextSnapshot = - SnapshotUtils.getNextActiveSnapshot(fromSnapshot, snapshotChainManager, ozoneManager); + SnapshotInfo nextSnapshot = SnapshotUtils.getNextSnapshot(ozoneManager, snapshotChainManager, fromSnapshot); // Step 1: Update the deep clean flag for the next active snapshot updateSnapshotInfoAndCache(nextSnapshot, omMetadataManager, trxnLogIndex); @@ -116,7 +114,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn .addCacheEntry(new CacheKey<>(fromSnapshot.getTableKey()), CacheValue.get(trxnLogIndex)); updatedSnapshotInfos.remove(fromSnapshot.getTableKey()); } - + // Update the snapshotInfo lastTransactionInfo. for (SnapshotInfo snapshotInfo : updatedSnapshotInfos.values()) { snapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString()); omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(snapshotInfo.getTableKey()), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java index 28c3e3d758e2..782063d32446 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/key/OMDirectoriesPurgeResponseWithFSO.java @@ -78,6 +78,10 @@ public OMDirectoriesPurgeResponseWithFSO(@Nonnull OMResponse omResponse, this.openKeyInfoMap = openKeyInfoMap; } + public OMDirectoriesPurgeResponseWithFSO(OMResponse omResponse) { + super(omResponse); + } + @Override public void addToDBBatch(OMMetadataManager metadataManager, BatchOperation batchOp) throws IOException { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 2c2d16bf14c7..845ae34ed0b9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -19,6 +19,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ServiceException; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; import org.apache.hadoop.hdds.utils.BackgroundService; @@ -97,7 +98,7 @@ public AbstractKeyDeletingService(String serviceName, long interval, protected int processKeyDeletes(List keyBlocksList, KeyManager manager, HashMap keysToModify, - String snapTableKey) throws IOException { + String snapTableKey, UUID expectedPreviousSnapshotId) throws IOException { long startTime = Time.monotonicNow(); int delCount = 0; @@ -120,7 +121,7 @@ protected int processKeyDeletes(List keyBlocksList, startTime = Time.monotonicNow(); if (isRatisEnabled()) { delCount = submitPurgeKeysRequest(blockDeletionResults, - keysToModify, snapTableKey); + keysToModify, snapTableKey, expectedPreviousSnapshotId); } else { // TODO: Once HA and non-HA paths are merged, we should have // only one code path here. Purge keys should go through an @@ -172,7 +173,7 @@ private int deleteAllKeys(List results, * @param keysToModify Updated list of RepeatedOmKeyInfo */ private int submitPurgeKeysRequest(List results, - HashMap keysToModify, String snapTableKey) { + HashMap keysToModify, String snapTableKey, UUID expectedPreviousSnapshotId) { Map, List> purgeKeysMapPerBucket = new HashMap<>(); @@ -203,6 +204,15 @@ private int submitPurgeKeysRequest(List results, if (snapTableKey != null) { purgeKeysRequest.setSnapshotTableKey(snapTableKey); } + OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = + OzoneManagerProtocolProtos.NullableUUID.newBuilder(); + if (expectedPreviousSnapshotId != null) { + expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); + } + + if (expectedPreviousSnapshotId != null) { + purgeKeysRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID); + } // Add keys to PurgeKeysRequest bucket wise. for (Map.Entry, List> entry : @@ -274,13 +284,21 @@ private void addToMap(Map, List> map, String object } protected void submitPurgePaths(List requests, - String snapTableKey) { + String snapTableKey, + UUID expectedPreviousSnapshotId) { OzoneManagerProtocolProtos.PurgeDirectoriesRequest.Builder purgeDirRequest = OzoneManagerProtocolProtos.PurgeDirectoriesRequest.newBuilder(); if (snapTableKey != null) { purgeDirRequest.setSnapshotTableKey(snapTableKey); } + OzoneManagerProtocolProtos.NullableUUID.Builder expectedPreviousSnapshotNullableUUID = + OzoneManagerProtocolProtos.NullableUUID.newBuilder(); + if (expectedPreviousSnapshotId != null) { + expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); + } + purgeDirRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); + purgeDirRequest.addAllDeletedPath(requests); OzoneManagerProtocolProtos.OMRequest omRequest = @@ -386,7 +404,8 @@ public long optimizeDirDeletesAndSubmitRequest(long remainNum, List> allSubDirList, List purgePathRequestList, String snapTableKey, long startTime, - int remainingBufLimit, KeyManager keyManager) { + int remainingBufLimit, KeyManager keyManager, + UUID expectedPreviousSnapshotId) { // Optimization to handle delete sub-dir and keys to remove quickly // This case will be useful to handle when depth of directory is high @@ -426,7 +445,7 @@ public long optimizeDirDeletesAndSubmitRequest(long remainNum, } if (!purgePathRequestList.isEmpty()) { - submitPurgePaths(purgePathRequestList, snapTableKey); + submitPurgePaths(purgePathRequestList, snapTableKey, expectedPreviousSnapshotId); } if (dirNum != 0 || subDirNum != 0 || subFileNum != 0) { @@ -549,25 +568,6 @@ protected boolean isBufferLimitCrossed( return cLimit + increment >= maxLimit; } - protected SnapshotInfo getPreviousActiveSnapshot(SnapshotInfo snapInfo, SnapshotChainManager chainManager) - throws IOException { - SnapshotInfo currSnapInfo = snapInfo; - while (chainManager.hasPreviousPathSnapshot( - currSnapInfo.getSnapshotPath(), currSnapInfo.getSnapshotId())) { - - UUID prevPathSnapshot = chainManager.previousPathSnapshot( - currSnapInfo.getSnapshotPath(), currSnapInfo.getSnapshotId()); - String tableKey = chainManager.getTableKey(prevPathSnapshot); - SnapshotInfo prevSnapInfo = SnapshotUtils.getSnapshotInfo(ozoneManager, tableKey); - if (prevSnapInfo.getSnapshotStatus() == - SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE) { - return prevSnapInfo; - } - currSnapInfo = prevSnapInfo; - } - return null; - } - protected boolean isKeyReclaimable( Table previousKeyTable, Table renamedTable, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index c8703c3c4c62..494457e8bffa 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -31,9 +31,12 @@ import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; +import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.util.Time; import org.apache.ratis.protocol.ClientId; @@ -43,11 +46,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT; +import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INTERNAL_ERROR; /** * This is a background service to delete orphan directories and its @@ -155,9 +162,15 @@ public BackgroundTaskResult call() { = new ArrayList<>((int) remainNum); Table.KeyValue pendingDeletedDirInfo; + try (TableIterator> deleteTableIterator = getOzoneManager().getMetadataManager(). getDeletedDirTable().iterator()) { + // This is to avoid race condition b/w purge request and snapshot chain updation. For AOS taking the global + // snapshotId since AOS could process multiple buckets in one iteration. + UUID expectedPreviousSnapshotId = + ((OmMetadataManagerImpl)getOzoneManager().getMetadataManager()).getSnapshotChainManager() + .getLatestGlobalSnapshotId(); long startTime = Time.monotonicNow(); while (remainNum > 0 && deleteTableIterator.hasNext()) { @@ -204,7 +217,7 @@ public BackgroundTaskResult call() { remainNum, dirNum, subDirNum, subFileNum, allSubDirList, purgePathRequestList, null, startTime, ratisByteLimit - consumedSize, - getOzoneManager().getKeyManager()); + getOzoneManager().getKeyManager(), expectedPreviousSnapshotId); } catch (IOException e) { LOG.error("Error while running delete directories and files " + @@ -224,12 +237,21 @@ private boolean previousSnapshotHasDir( getOzoneManager().getOmSnapshotManager(); OmMetadataManagerImpl metadataManager = (OmMetadataManagerImpl) getOzoneManager().getMetadataManager(); - + SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), + deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); + // previous snapshot is not active or it has not been flushed to disk then don't process the key in this + // iteration. + if (previousSnapshotInfo != null && + (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || + !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), + previousSnapshotInfo))) { + return true; + } try (ReferenceCounted rcLatestSnapshot = - metadataManager.getLatestActiveSnapshot( + omSnapshotManager.getSnapshot( deletedDirInfo.getVolumeName(), deletedDirInfo.getBucketName(), - omSnapshotManager)) { + previousSnapshotInfo.getName())) { if (rcLatestSnapshot != null) { String dbRenameKey = metadataManager @@ -250,7 +272,11 @@ private boolean previousSnapshotHasDir( String prevDbKey = prevDirTableDBKey == null ? metadataManager.getOzoneDeletePathDirKey(key) : prevDirTableDBKey; OmDirectoryInfo prevDirInfo = prevDirTable.get(prevDbKey); - return prevDirInfo != null && + //Check the snapshot chain hasn't changed while the checking the previous snapshot. + SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), + deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); + return Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), + Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId)) && prevDirInfo != null && prevDirInfo.getObjectID() == deletedDirInfo.getObjectID(); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index 5e622cb17019..2d886769efbe 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; @@ -38,12 +39,14 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; +import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotSize; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SetSnapshotPropertyRequest; @@ -92,11 +95,13 @@ public class KeyDeletingService extends AbstractKeyDeletingService { private final Map exclusiveReplicatedSizeMap; private final Set completedExclusiveSizeSet; private final Map snapshotSeekMap; + private final boolean deepCleanSnapshots; public KeyDeletingService(OzoneManager ozoneManager, ScmBlockLocationProtocol scmClient, KeyManager manager, long serviceInterval, - long serviceTimeout, ConfigurationSource conf) { + long serviceTimeout, ConfigurationSource conf, + boolean deepCleanSnapshots) { super(KeyDeletingService.class.getSimpleName(), serviceInterval, TimeUnit.MILLISECONDS, KEY_DELETING_CORE_POOL_SIZE, serviceTimeout, ozoneManager, scmClient); @@ -111,6 +116,7 @@ public KeyDeletingService(OzoneManager ozoneManager, this.exclusiveReplicatedSizeMap = new HashMap<>(); this.completedExclusiveSizeSet = new HashSet<>(); this.snapshotSeekMap = new HashMap<>(); + this.deepCleanSnapshots = deepCleanSnapshots; } /** @@ -191,7 +197,11 @@ public BackgroundTaskResult call() { // doesn't have enough entries left. // OM would have to keep track of which snapshot the key is coming // from if the above would be done inside getPendingDeletionKeys(). - + // This is to avoid race condition b/w purge request and snapshot chain updation. For AOS taking the global + // snapshotId since AOS could process multiple buckets in one iteration. + UUID expectedPreviousSnapshotId = + ((OmMetadataManagerImpl)manager.getMetadataManager()).getSnapshotChainManager() + .getLatestGlobalSnapshotId(); PendingKeysDeletion pendingKeysDeletion = manager .getPendingDeletionKeys(getKeyLimitPerTask()); List keyBlocksList = pendingKeysDeletion @@ -199,7 +209,7 @@ public BackgroundTaskResult call() { if (keyBlocksList != null && !keyBlocksList.isEmpty()) { delCount = processKeyDeletes(keyBlocksList, getOzoneManager().getKeyManager(), - pendingKeysDeletion.getKeysToModify(), null); + pendingKeysDeletion.getKeysToModify(), null, expectedPreviousSnapshotId); deletedKeyCount.addAndGet(delCount); } } catch (IOException e) { @@ -208,7 +218,7 @@ public BackgroundTaskResult call() { } try { - if (delCount < keyLimitPerTask) { + if (deepCleanSnapshots && delCount < keyLimitPerTask) { processSnapshotDeepClean(delCount); } } catch (Exception e) { @@ -276,11 +286,13 @@ private void processSnapshotDeepClean(int delCount) } String snapshotBucketKey = dbBucketKey + OzoneConsts.OM_KEY_PREFIX; - SnapshotInfo previousSnapshot = getPreviousActiveSnapshot(currSnapInfo, snapChainManager); + SnapshotInfo previousSnapshot = SnapshotUtils.getPreviousSnapshot(getOzoneManager(), snapChainManager, + currSnapInfo); SnapshotInfo previousToPrevSnapshot = null; if (previousSnapshot != null) { - previousToPrevSnapshot = getPreviousActiveSnapshot(previousSnapshot, snapChainManager); + previousToPrevSnapshot = SnapshotUtils.getPreviousSnapshot(getOzoneManager(), snapChainManager, + previousSnapshot); } Table previousKeyTable = null; @@ -409,7 +421,7 @@ private void processSnapshotDeepClean(int delCount) if (!keysToPurge.isEmpty()) { processKeyDeletes(keysToPurge, currOmSnapshot.getKeyManager(), - keysToModify, currSnapInfo.getTableKey()); + keysToModify, currSnapInfo.getTableKey(), null); } } finally { IOUtils.closeQuietly(rcPrevOmSnapshot, rcPrevToPrevOmSnapshot); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java index f85bd781b050..d7e7f68946ed 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDeletingService.java @@ -49,6 +49,7 @@ import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; import org.apache.hadoop.ozone.om.snapshot.ReferenceCounted; +import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PurgePathRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.SnapshotMoveDeletedKeysRequest; @@ -204,7 +205,7 @@ public BackgroundTaskResult call() throws InterruptedException { } //TODO: [SNAPSHOT] Add lock to deletedTable and Active DB. - SnapshotInfo previousSnapshot = getPreviousActiveSnapshot(snapInfo, chainManager); + SnapshotInfo previousSnapshot = SnapshotUtils.getPreviousSnapshot(ozoneManager, chainManager, snapInfo); Table previousKeyTable = null; Table previousDirTable = null; OmSnapshot omPreviousSnapshot = null; @@ -298,7 +299,7 @@ public BackgroundTaskResult call() throws InterruptedException { // Delete keys From deletedTable processKeyDeletes(keysToPurge, omSnapshot.getKeyManager(), - null, snapInfo.getTableKey()); + null, snapInfo.getTableKey(), null); successRunCount.incrementAndGet(); } catch (IOException ex) { LOG.error("Error while running Snapshot Deleting Service for " + @@ -436,7 +437,7 @@ private long handleDirectoryCleanUp( remainNum = optimizeDirDeletesAndSubmitRequest(remainNum, dirNum, subDirNum, subFileNum, allSubDirList, purgePathRequestList, snapInfo.getTableKey(), startTime, ratisByteLimit - consumedSize, - omSnapshot.getKeyManager()); + omSnapshot.getKeyManager(), null); } catch (IOException e) { LOG.error("Error while running delete directories and files for " + "snapshot " + snapInfo.getTableKey() + " in snapshot deleting " + diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java index 26d5d24a8a03..828eee34283b 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java @@ -61,6 +61,7 @@ import static org.apache.hadoop.ozone.om.helpers.SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE; import static org.apache.hadoop.ozone.om.request.file.OMFileRequest.getDirectoryInfo; import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.getOzonePathKeyForFso; +import static org.apache.hadoop.ozone.om.snapshot.SnapshotUtils.getPreviousSnapshot; /** * Snapshot BG Service for deleted directory deep clean and exclusive size @@ -173,7 +174,7 @@ public BackgroundTaskResult call() { "unexpected state."); } - SnapshotInfo previousSnapshot = getPreviousActiveSnapshot(currSnapInfo, snapChainManager); + SnapshotInfo previousSnapshot = getPreviousSnapshot(getOzoneManager(), snapChainManager, currSnapInfo); SnapshotInfo previousToPrevSnapshot = null; Table previousKeyTable = null; @@ -190,7 +191,7 @@ public BackgroundTaskResult call() { .getKeyTable(bucketInfo.getBucketLayout()); prevRenamedTable = omPreviousSnapshot .getMetadataManager().getSnapshotRenamedTable(); - previousToPrevSnapshot = getPreviousActiveSnapshot(previousSnapshot, snapChainManager); + previousToPrevSnapshot = getPreviousSnapshot(getOzoneManager(), snapChainManager, previousSnapshot); } Table previousToPrevKeyTable = null; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java index e0f40dabd8a7..180ac9952a3d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java @@ -36,6 +36,8 @@ import java.util.NoSuchElementException; import java.util.HashMap; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.UUID; import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; @@ -86,6 +88,13 @@ public static SnapshotInfo getSnapshotInfo(final OzoneManager ozoneManager, return snapshotInfo; } + public static SnapshotInfo getSnapshotInfo(OzoneManager ozoneManager, + SnapshotChainManager chainManager, + UUID snapshotId) throws IOException { + String tableKey = chainManager.getTableKey(snapshotId); + return SnapshotUtils.getSnapshotInfo(ozoneManager, tableKey); + } + public static void dropColumnFamilyHandle( final ManagedRocksDB rocksDB, final ColumnFamilyHandle columnFamilyHandle) { @@ -139,36 +148,60 @@ public static void checkSnapshotActive(SnapshotInfo snapInfo, } /** - * Get the next non deleted snapshot in the snapshot chain. + * Get the next in the snapshot chain. */ - public static SnapshotInfo getNextActiveSnapshot(SnapshotInfo snapInfo, - SnapshotChainManager chainManager, OzoneManager ozoneManager) + public static SnapshotInfo getNextSnapshot(OzoneManager ozoneManager, + SnapshotChainManager chainManager, + SnapshotInfo snapInfo) throws IOException { - // If the snapshot is deleted in the previous run, then the in-memory // SnapshotChainManager might throw NoSuchElementException as the snapshot // is removed in-memory but OMDoubleBuffer has not flushed yet. if (snapInfo == null) { throw new OMException("Snapshot Info is null. Cannot get the next snapshot", INVALID_SNAPSHOT_ERROR); } - try { - while (chainManager.hasNextPathSnapshot(snapInfo.getSnapshotPath(), + if (chainManager.hasNextPathSnapshot(snapInfo.getSnapshotPath(), snapInfo.getSnapshotId())) { + UUID nextPathSnapshot = chainManager.nextPathSnapshot(snapInfo.getSnapshotPath(), snapInfo.getSnapshotId()); + return getSnapshotInfo(ozoneManager, chainManager, nextPathSnapshot); + } + } catch (NoSuchElementException ex) { + LOG.error("The snapshot {} is not longer in snapshot chain, It " + + "maybe removed in the previous Snapshot purge request.", + snapInfo.getTableKey()); + } + return null; + } - UUID nextPathSnapshot = - chainManager.nextPathSnapshot( - snapInfo.getSnapshotPath(), snapInfo.getSnapshotId()); - - String tableKey = chainManager.getTableKey(nextPathSnapshot); - SnapshotInfo nextSnapshotInfo = getSnapshotInfo(ozoneManager, tableKey); - - if (nextSnapshotInfo.getSnapshotStatus().equals( - SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE)) { - return nextSnapshotInfo; - } + /** + * Get the previous in the snapshot chain. + */ + public static SnapshotInfo getPreviousSnapshot(OzoneManager ozoneManager, + SnapshotChainManager chainManager, + SnapshotInfo snapInfo) + throws IOException { + UUID previousSnapshotId = getPreviousSnapshotId(snapInfo, chainManager); + return previousSnapshotId == null ? null : getSnapshotInfo(ozoneManager, chainManager, previousSnapshotId); + } - snapInfo = nextSnapshotInfo; + /** + * Get the previous in the snapshot chain. + */ + public static UUID getPreviousSnapshotId(SnapshotInfo snapInfo, + SnapshotChainManager chainManager) + throws IOException { + // If the snapshot is deleted in the previous run, then the in-memory + // SnapshotChainManager might throw NoSuchElementException as the snapshot + // is removed in-memory but OMDoubleBuffer has not flushed yet. + if (snapInfo == null) { + throw new OMException("Snapshot Info is null. Cannot get the next snapshot", INVALID_SNAPSHOT_ERROR); + } + try { + if (chainManager.hasPreviousPathSnapshot(snapInfo.getSnapshotPath(), + snapInfo.getSnapshotId())) { + return chainManager.previousPathSnapshot(snapInfo.getSnapshotPath(), + snapInfo.getSnapshotId()); } } catch (NoSuchElementException ex) { LOG.error("The snapshot {} is not longer in snapshot chain, It " + @@ -242,4 +275,47 @@ public static String getOzonePathKeyForFso(OMMetadataManager metadataManager, final long bucketId = metadataManager.getBucketId(volumeName, bucketName); return OM_KEY_PREFIX + volumeId + OM_KEY_PREFIX + bucketId + OM_KEY_PREFIX; } + + public static SnapshotInfo getLatestGlobalSnapshotInfo(OzoneManager ozoneManager, + SnapshotChainManager snapshotChainManager) throws IOException { + Optional latestGlobalSnapshot = Optional.ofNullable(snapshotChainManager.getLatestGlobalSnapshotId()); + return latestGlobalSnapshot.isPresent() ? getSnapshotInfo(ozoneManager, snapshotChainManager, + latestGlobalSnapshot.get()) : null; + } + + public static SnapshotInfo getLatestSnapshotInfo(String volumeName, String bucketName, + OzoneManager ozoneManager, + SnapshotChainManager snapshotChainManager) throws IOException { + Optional latestPathSnapshot = Optional.ofNullable( + getLatestSnapshotId(volumeName, bucketName, snapshotChainManager)); + return latestPathSnapshot.isPresent() ? + getSnapshotInfo(ozoneManager, snapshotChainManager, latestPathSnapshot.get()) : null; + } + + public static UUID getLatestSnapshotId(String volumeName, String bucketName, + SnapshotChainManager snapshotChainManager) throws IOException { + String snapshotPath = volumeName + OM_KEY_PREFIX + bucketName; + return snapshotChainManager.getLatestPathSnapshotId(snapshotPath); + } + + // Validates previous snapshotId given a snapshotInfo or volumeName & bucketName. Incase snapshotInfo is null, this + // would be considered as AOS and previous snapshot becomes the latest snapshot in the global snapshot chain. + // Would throw OMException if validation fails otherwise function would pass. + public static void validatePreviousSnapshotId(SnapshotInfo snapshotInfo, + SnapshotChainManager snapshotChainManager, + UUID expectedPreviousSnapshotId) throws IOException { + try { + UUID previousSnapshotId = snapshotInfo == null ? snapshotChainManager.getLatestGlobalSnapshotId() : + SnapshotUtils.getPreviousSnapshotId(snapshotInfo, snapshotChainManager); + if (!Objects.equals(expectedPreviousSnapshotId, previousSnapshotId)) { + throw new OMException("Snapshot validation failed. Expected previous snapshotId : " + + expectedPreviousSnapshotId + " but was " + previousSnapshotId, + OMException.ResultCodes.INVALID_REQUEST); + } + } catch (IOException e) { + LOG.error("Error while validating previous snapshot for snapshot: {}", + snapshotInfo == null ? null : snapshotInfo.getName(), e); + throw e; + } + } } From bad7fe674afeb4899b5aa40e83073d04d7ef47a2 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Fri, 20 Sep 2024 13:39:00 -0700 Subject: [PATCH 02/14] HDDS-11411. Merge with master Change-Id: I33dd55f6ffb92fc96922109c5c94d1c1743056d7 --- .../hadoop/ozone/om/service/KeyDeletingService.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index 7bf2b05cbfa8..f31ce28c17e5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -97,6 +97,7 @@ public class KeyDeletingService extends AbstractKeyDeletingService { private final Map snapshotSeekMap; private AtomicBoolean isRunningOnAOS; private final boolean deepCleanSnapshots; + private final SnapshotChainManager snapshotChainManager; public KeyDeletingService(OzoneManager ozoneManager, ScmBlockLocationProtocol scmClient, @@ -119,6 +120,7 @@ public KeyDeletingService(OzoneManager ozoneManager, this.snapshotSeekMap = new HashMap<>(); this.isRunningOnAOS = new AtomicBoolean(false); this.deepCleanSnapshots = deepCleanSnapshots; + this.snapshotChainManager = ((OmMetadataManagerImpl)manager.getMetadataManager()).getSnapshotChainManager(); } /** @@ -208,11 +210,9 @@ public BackgroundTaskResult call() { // doesn't have enough entries left. // OM would have to keep track of which snapshot the key is coming // from if the above would be done inside getPendingDeletionKeys(). - // This is to avoid race condition b/w purge request and snapshot chain updation. For AOS taking the global + // This is to avoid race condition b/w purge request and snapshot chain update. For AOS taking the global // snapshotId since AOS could process multiple buckets in one iteration. - UUID expectedPreviousSnapshotId = - ((OmMetadataManagerImpl)manager.getMetadataManager()).getSnapshotChainManager() - .getLatestGlobalSnapshotId(); + UUID expectedPreviousSnapshotId = snapshotChainManager.getLatestGlobalSnapshotId(); PendingKeysDeletion pendingKeysDeletion = manager .getPendingDeletionKeys(getKeyLimitPerTask()); List keyBlocksList = pendingKeysDeletion From 7e66143bb2ac05d4207f8462ecd71185babc0f9c Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Fri, 20 Sep 2024 13:56:55 -0700 Subject: [PATCH 03/14] HDDS-11411. Merge with master Change-Id: I7d355461867b90749efce4222860f75e71a117e8 --- .../om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index d77f9bf9d8d4..53a7f9c893c6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -508,7 +508,7 @@ private KeyDeletingService getMockedKeyDeletingService(AtomicBoolean keyDeletion when(ozoneManager.getKeyManager()).thenReturn(keyManager); KeyDeletingService keyDeletingService = Mockito.spy(new KeyDeletingService(ozoneManager, ozoneManager.getScmClient().getBlockClient(), keyManager, 10000, - 100000, cluster.getConf())); + 100000, cluster.getConf(), false)); keyDeletingService.shutdown(); GenericTestUtils.waitFor(() -> keyDeletingService.getThreadCount() == 0, 1000, 100000); From 83491aa9656ead5f759b379287a00c7eeab99998 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 23 Sep 2024 20:31:06 -0700 Subject: [PATCH 04/14] HDDS-11411. Add test cases Change-Id: Ia1c1f2cceaaf80b8ee40f889f4ed7eb8b37966e8 --- .../src/main/resources/ozone-default.xml | 2 +- .../apache/hadoop/ozone/om/OMConfigKeys.java | 2 +- .../TestDirectoryDeletingServiceWithFSO.java | 146 ++++++++++++++++ .../ozone/om/OmMetadataManagerImpl.java | 31 ++-- .../key/OMDirectoriesPurgeRequestWithFSO.java | 8 +- .../om/request/key/OMKeyPurgeRequest.java | 2 +- .../service/AbstractKeyDeletingService.java | 2 - .../om/service/DirectoryDeletingService.java | 16 +- .../ozone/om/service/KeyDeletingService.java | 5 +- .../SnapshotDirectoryCleaningService.java | 4 +- .../om/service/TestKeyDeletingService.java | 164 ++++++++++++++++-- 11 files changed, 320 insertions(+), 62 deletions(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 2d8dfa212db1..9b0ff0e9625c 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3745,7 +3745,7 @@ ozone.snapshot.deep.cleaning.enabled - true + false OZONE, PERFORMANCE, OM Flag to enable/disable snapshot deep cleaning. diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java index 161b1c555ff2..a77bc4f53048 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java @@ -404,7 +404,7 @@ private OMConfigKeys() { * Configuration properties for Snapshot Directory Service. */ public static final String OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED = "ozone.snapshot.deep.cleaning.enabled"; - public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = true; + public static final boolean OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED_DEFAULT = false; public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL = "ozone.snapshot.directory.service.interval"; public static final String OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL_DEFAULT diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index 0abfb1336544..af9366ce6f71 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -18,7 +18,11 @@ package org.apache.hadoop.fs.ozone; +import java.util.List; +import java.util.Random; import java.util.concurrent.CompletableFuture; + +import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -32,10 +36,18 @@ import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.TestDataUtil; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.KeyManagerImpl; import org.apache.hadoop.ozone.om.OMMetadataManager; +import org.apache.hadoop.ozone.om.OmSnapshotManager; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.OmKeyArgs; import org.apache.hadoop.ozone.om.helpers.OzoneFileStatus; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer; @@ -48,12 +60,16 @@ import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +80,8 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.function.LongSupplier; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -72,6 +90,12 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.when; /** * Directory deletion service test cases. @@ -97,6 +121,7 @@ public static void init() throws Exception { conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, TimeUnit.MILLISECONDS); + conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 1000, TimeUnit.MILLISECONDS); conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, omRatisEnabled); conf.setBoolean(OZONE_ACL_ENABLED, true); cluster = MiniOzoneCluster.newBuilder(conf) @@ -460,6 +485,127 @@ public void testDeleteFilesAndSubFiles() throws Exception { assertEquals(prevDeletedKeyCount + 5, currentDeletedKeyCount); } + private void createFileKey(OzoneBucket bucket, String key) + throws Exception { + byte[] value = RandomStringUtils.randomAscii(10240).getBytes(UTF_8); + OzoneOutputStream fileKey = bucket.createKey(key, value.length); + fileKey.write(value); + fileKey.close(); + } + + /* + * Create key d1/k1 + * Create snap1 + * Rename dir1 to dir2 + * Delete dir2 + * Wait for KeyDeletingService to start processing deleted key k2 + * Create snap2 by making the KeyDeletingService thread wait till snap2 is flushed + * Resume KeyDeletingService thread. + * Read d1 from snap1. + */ + @Test + public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() + throws Exception { + OMMetadataManager omMetadataManager = cluster.getOzoneManager().getMetadataManager(); + Table snapshotInfoTable = omMetadataManager.getSnapshotInfoTable(); + Table deletedDirTable = omMetadataManager.getDeletedDirTable(); + Table renameTable = omMetadataManager.getSnapshotRenamedTable(); + cluster.getOzoneManager().getKeyManager().getSnapshotDeletingService().shutdown(); + DirectoryDeletingService dirDeletingService = cluster.getOzoneManager().getKeyManager().getDirDeletingService(); + // Suspend KeyDeletingService + dirDeletingService.suspend(); + GenericTestUtils.waitFor(() -> !dirDeletingService.isRunningOnAOS(), 1000, 10000); + Random random = new Random(); + final String testVolumeName = "volume" + random.nextInt(); + final String testBucketName = "bucket" + random.nextInt(); + // Create Volume and Buckets + ObjectStore store = client.getObjectStore(); + store.createVolume(testVolumeName); + OzoneVolume volume = store.getVolume(testVolumeName); + volume.createBucket(testBucketName, + BucketArgs.newBuilder().setBucketLayout(BucketLayout.FILE_SYSTEM_OPTIMIZED).build()); + OzoneBucket bucket = volume.getBucket(testBucketName); + + OzoneManager ozoneManager = Mockito.spy(cluster.getOzoneManager()); + OmSnapshotManager omSnapshotManager = Mockito.spy(ozoneManager.getOmSnapshotManager()); + KeyManager km = Mockito.spy(new KeyManagerImpl(ozoneManager, ozoneManager.getScmClient(), cluster.getConf(), + ozoneManager.getPerfMetrics())); + when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> { + return omSnapshotManager; + }); + DirectoryDeletingService service = Mockito.spy(new DirectoryDeletingService(1000, TimeUnit.MILLISECONDS, 1000, + ozoneManager, + cluster.getConf())); + service.shutdown(); + final int initialSnapshotCount = + (int) cluster.getOzoneManager().getMetadataManager().countRowsInTable(snapshotInfoTable); + final int initialDeletedCount = (int) omMetadataManager.countRowsInTable(deletedDirTable); + final int initialRenameCount = (int) omMetadataManager.countRowsInTable(renameTable); + String snap1 = "snap1"; + String snap2 = "snap2"; + createFileKey(bucket, "dir1/key1"); + store.createSnapshot(testVolumeName, testBucketName, "snap1"); + bucket.renameKey("dir1", "dir2"); + OmKeyArgs omKeyArgs = new OmKeyArgs.Builder() + .setVolumeName(testVolumeName) + .setBucketName(testBucketName) + .setKeyName("dir2").build(); + long objectId = store.getClientProxy().getOzoneManagerClient().getKeyInfo(omKeyArgs, false) + .getKeyInfo().getObjectID(); + long volumeId = omMetadataManager.getVolumeId(testVolumeName); + long bucketId = omMetadataManager.getBucketId(testVolumeName, testBucketName); + String deletePathKey = omMetadataManager.getOzoneDeletePathKey(objectId, + omMetadataManager.getOzonePathKey(volumeId, + bucketId, bucketId, "dir2")); + bucket.deleteDirectory("dir2", true); + + + assertTableRowCount(deletedDirTable, initialDeletedCount + 1); + assertTableRowCount(renameTable, initialRenameCount + 1); + Mockito.doAnswer(i -> { + List purgePathRequestList = i.getArgument(5); + for (OzoneManagerProtocolProtos.PurgePathRequest purgeRequest : purgePathRequestList) { + Assertions.assertNotEquals(deletePathKey, purgeRequest.getDeletedDir()); + } + return i.callRealMethod(); + }).when(service).optimizeDirDeletesAndSubmitRequest(anyLong(), anyLong(), anyLong(), + anyLong(), anyList(), anyList(), eq(null), anyLong(), anyInt(), Mockito.any(), any()); + + Mockito.doAnswer(i -> { + store.createSnapshot(testVolumeName, testBucketName, snap2); + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snapshotInfo = store.getClientProxy().getOzoneManagerClient() + .getSnapshotInfo(testVolumeName, testBucketName, snap2); + + return OmSnapshotManager.areSnapshotChangesFlushedToDB(cluster.getOzoneManager().getMetadataManager(), + snapshotInfo); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 100000); + GenericTestUtils.waitFor(() -> { + try { + return renameTable.get(omMetadataManager.getRenameKey(testVolumeName, testBucketName, objectId)) == null; + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 10000); + return i.callRealMethod(); + }).when(omSnapshotManager).getSnapshot(ArgumentMatchers.eq(testVolumeName), ArgumentMatchers.eq(testBucketName), + ArgumentMatchers.eq(snap1)); + assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 1); + service.runPeriodicalTaskNow(); + service.runPeriodicalTaskNow(); + assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 2); + store.deleteSnapshot(testVolumeName, testBucketName, snap2); + service.runPeriodicalTaskNow(); + store.deleteSnapshot(testVolumeName, testBucketName, snap1); + cluster.restartOzoneManager(); + assertTableRowCount(cluster.getOzoneManager().getMetadataManager().getSnapshotInfoTable(), initialSnapshotCount); + dirDeletingService.resume(); + } + @Test public void testDirDeletedTableCleanUpForSnapshot() throws Exception { Table deletedDirTable = diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index 59977246d9e8..41cb60f3d162 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -34,7 +34,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; -import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -113,7 +112,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_CHECKPOINT_DIR_CREATION_POLL_TIMEOUT_DEFAULT; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.BUCKET_NOT_FOUND; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.FILE_NOT_FOUND; -import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INTERNAL_ERROR; import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.VOLUME_NOT_FOUND; import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_CHECKPOINT_DIR; import static org.apache.hadoop.ozone.om.service.SnapshotDeletingService.isBlockLocationInfoSame; @@ -1702,25 +1700,22 @@ public PendingKeysDeletion getPendingDeletionKeys(final int keyCount, List notReclaimableKeyInfoList = notReclaimableKeyInfo.getOmKeyInfoList(); - - // If all the versions are not reclaimable, then do nothing. - if (notReclaimableKeyInfoList.size() > 0 && - notReclaimableKeyInfoList.size() != - infoList.getOmKeyInfoList().size()) { - keysToModify.put(kv.getKey(), notReclaimableKeyInfo); - } - - if (notReclaimableKeyInfoList.size() != - infoList.getOmKeyInfoList().size()) { - keyBlocksList.addAll(blockGroupList); - } SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), bucketInfo.getBucketName(), ozoneManager, snapshotChainManager); - if (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), + // Check if the previous snapshot in the chain hasn't changed. + if (Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId))) { - throw new OMException("Snapshot chain has changed while checking for key reference " + - "Previous snapshot in chain : " + previousSnapshotInfo + " new snapshot in chain : " + - newPreviousSnapshotInfo, INTERNAL_ERROR); + // If all the versions are not reclaimable, then do nothing. + if (notReclaimableKeyInfoList.size() > 0 && + notReclaimableKeyInfoList.size() != + infoList.getOmKeyInfoList().size()) { + keysToModify.put(kv.getKey(), notReclaimableKeyInfo); + } + + if (notReclaimableKeyInfoList.size() != + infoList.getOmKeyInfoList().size()) { + keyBlocksList.addAll(blockGroupList); + } } } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index a63b63725b12..f47a40a3891a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -23,12 +23,9 @@ import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.UUID; -import com.google.common.collect.Maps; -import org.apache.commons.compress.utils.Lists; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.utils.TransactionInfo; import org.apache.hadoop.hdds.utils.db.cache.CacheKey; @@ -36,11 +33,8 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; -import org.apache.hadoop.ozone.om.SnapshotChainManager; -import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.ratis.server.protocol.TermIndex; -import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.ozone.om.helpers.BucketLayout; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; @@ -85,7 +79,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn getOmRequest()); final SnapshotInfo fromSnapshotInfo; try { - fromSnapshotInfo = fromSnapshot != null ? SnapshotUtils.getSnapshotInfo(ozoneManager, + fromSnapshotInfo = fromSnapshot != null ? SnapshotUtils.getSnapshotInfo(ozoneManager, fromSnapshot) : null; // Checking if this request is an old request or new one. if (purgeDirsRequest.hasExpectedPreviousSnapshotID()) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java index 0719d6909b67..e898e29cd01f 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java @@ -96,7 +96,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn List keysToBePurgedList = new ArrayList<>(); for (DeletedKeys bucketWithDeleteKeys : bucketDeletedKeysList) { - keysToBePurgedList.addAll(bucketWithDeleteKeys.getKeysList()); + keysToBePurgedList.addAll(bucketWithDeleteKeys.getKeysList()); } if (keysToBePurgedList.isEmpty()) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 845ae34ed0b9..74ce5541f8f8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -33,13 +33,11 @@ import org.apache.hadoop.ozone.om.KeyManager; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.SnapshotChainManager; import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils; -import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DeletedKeys; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 7809dda0fc74..87c4b2fc299d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -31,7 +31,6 @@ import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; -import org.apache.hadoop.ozone.om.exceptions.OMException; import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; @@ -54,7 +53,6 @@ import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK_DEFAULT; -import static org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes.INTERNAL_ERROR; /** * This is a background service to delete orphan directories and its @@ -254,12 +252,14 @@ private boolean previousSnapshotHasDir( getOzoneManager().getMetadataManager(); SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); + if (previousSnapshotInfo == null) { + return false; + } // previous snapshot is not active or it has not been flushed to disk then don't process the key in this // iteration. - if (previousSnapshotInfo != null && - (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || + if (previousSnapshotInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), - previousSnapshotInfo))) { + previousSnapshotInfo)) { return true; } try (ReferenceCounted rcLatestSnapshot = @@ -290,9 +290,9 @@ private boolean previousSnapshotHasDir( //Check the snapshot chain hasn't changed while the checking the previous snapshot. SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); - return Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), - Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId)) && prevDirInfo != null && - prevDirInfo.getObjectID() == deletedDirInfo.getObjectID(); + return (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), + Optional.ofNullable(previousSnapshotInfo).map(SnapshotInfo::getSnapshotId))) || (prevDirInfo != null && + prevDirInfo.getObjectID() == deletedDirInfo.getObjectID()); } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index f31ce28c17e5..f314293a5ff8 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -39,7 +39,6 @@ import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.om.KeyManager; -import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.OmSnapshotManager; @@ -265,11 +264,11 @@ private void processSnapshotDeepClean(int delCount) while (delCount < keyLimitPerTask && iterator.hasNext()) { List keysToPurge = new ArrayList<>(); HashMap keysToModify = new HashMap<>(); - SnapshotInfo currSnapInfo = iterator.next().getValue(); + SnapshotInfo currSnapInfo = snapshotInfoTable.get(iterator.next().getKey()); // Deep clean only on active snapshot. Deleted Snapshots will be // cleaned up by SnapshotDeletingService. - if (currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || + if (currSnapInfo == null || currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || currSnapInfo.getDeepClean()) { continue; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java index 828eee34283b..e7133e625896 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/SnapshotDirectoryCleaningService.java @@ -144,11 +144,11 @@ public BackgroundTaskResult call() { > iterator = snapshotInfoTable.iterator()) { while (iterator.hasNext()) { - SnapshotInfo currSnapInfo = iterator.next().getValue(); + SnapshotInfo currSnapInfo = snapshotInfoTable.get(iterator.next().getKey()); // Expand deleted dirs only on active snapshot. Deleted Snapshots // will be cleaned up by SnapshotDeletingService. - if (currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || + if (currSnapInfo == null || currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || currSnapInfo.getDeepCleanedDeletedDir()) { continue; } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 8163592cfc6d..3323978f7c13 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -39,13 +39,17 @@ import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.ozone.om.KeyManager; +import org.apache.hadoop.ozone.om.KeyManagerImpl; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmSnapshot; +import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OmTestManagers; import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.PendingKeysDeletion; import org.apache.hadoop.ozone.om.ScmBlockLocationTestingClient; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.KeyInfoWithVolumeContext; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup; @@ -57,10 +61,13 @@ import org.apache.ratis.util.ExitUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.io.TempDir; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,12 +88,16 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.when; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; @@ -132,6 +143,7 @@ private void createConfig(File testDir) { 1, TimeUnit.SECONDS); conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 200, TimeUnit.MILLISECONDS); + conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setQuietMode(false); } @@ -285,6 +297,117 @@ void checkDeletedTableCleanUpForSnapshot() throws Exception { assertEquals(0, rangeKVs.size()); } + /* + * Create key k1 + * Create snap1 + * Rename k1 to k2 + * Delete k2 + * Wait for KeyDeletingService to start processing deleted key k2 + * Create snap2 by making the KeyDeletingService thread wait till snap2 is flushed + * Resume KeyDeletingService thread. + * Read k1 from snap1. + */ + @Test + public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() + throws Exception { + Table snapshotInfoTable = + om.getMetadataManager().getSnapshotInfoTable(); + Table deletedTable = + om.getMetadataManager().getDeletedTable(); + Table keyTable = + om.getMetadataManager().getKeyTable(BucketLayout.DEFAULT); + Table renameTable = om.getMetadataManager().getSnapshotRenamedTable(); + + // Suspend KeyDeletingService + keyDeletingService.suspend(); + SnapshotDeletingService snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); + snapshotDeletingService.suspend(); + GenericTestUtils.waitFor(() -> !keyDeletingService.isRunningOnAOS(), 1000, 10000); + final String volumeName = getTestName(); + final String bucketName = uniqueObjectName("bucket"); + OzoneManager ozoneManager = Mockito.spy(om); + OmSnapshotManager omSnapshotManager = Mockito.spy(om.getOmSnapshotManager()); + KeyManager km = Mockito.spy(new KeyManagerImpl(ozoneManager, ozoneManager.getScmClient(), conf, + om.getPerfMetrics())); + when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> { + return omSnapshotManager; + }); + KeyDeletingService service = new KeyDeletingService(ozoneManager, scmBlockTestingClient, km, 10000, + 100000, conf, false); + service.shutdown(); + final long initialSnapshotCount = metadataManager.countRowsInTable(snapshotInfoTable); + final long initialDeletedCount = metadataManager.countRowsInTable(deletedTable); + final long initialRenameCount = metadataManager.countRowsInTable(renameTable); + // Create Volume and Buckets + createVolumeAndBucket(volumeName, bucketName, false); + OmKeyArgs args = createAndCommitKey(volumeName, bucketName, + "key1", 3); + String snap1 = uniqueObjectName("snap"); + String snap2 = uniqueObjectName("snap"); + writeClient.createSnapshot(volumeName, bucketName, snap1); + KeyInfoWithVolumeContext keyInfo = writeClient.getKeyInfo(args, false); + AtomicLong objectId = new AtomicLong(keyInfo.getKeyInfo().getObjectID()); + renameKey(volumeName, bucketName, "key1", "key2"); + deleteKey(volumeName, bucketName, "key2"); + assertTableRowCount(deletedTable, initialDeletedCount + 1, metadataManager); + assertTableRowCount(renameTable, initialRenameCount + 1, metadataManager); + + String[] deletePathKey = {metadataManager.getOzoneDeletePathKey(objectId.get(), + metadataManager.getOzoneKey(volumeName, + bucketName, "key2"))}; + assertNotNull(deletedTable.get(deletePathKey[0])); + Mockito.doAnswer(i -> { + writeClient.createSnapshot(volumeName, bucketName, snap2); + GenericTestUtils.waitFor(() -> { + try { + SnapshotInfo snapshotInfo = writeClient.getSnapshotInfo(volumeName, bucketName, snap2); + return OmSnapshotManager.areSnapshotChangesFlushedToDB(metadataManager, snapshotInfo); + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 100000); + GenericTestUtils.waitFor(() -> { + try { + return renameTable.get(metadataManager.getRenameKey(volumeName, bucketName, objectId.get())) == null; + } catch (IOException e) { + throw new RuntimeException(e); + } + }, 1000, 10000); + return i.callRealMethod(); + }).when(omSnapshotManager).getSnapshot(ArgumentMatchers.eq(volumeName), ArgumentMatchers.eq(bucketName), + ArgumentMatchers.eq(snap1)); + assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 1, metadataManager); + doAnswer(i -> { + PendingKeysDeletion pendingKeysDeletion = (PendingKeysDeletion) i.callRealMethod(); + for (BlockGroup group : pendingKeysDeletion.getKeyBlocksList()) { + Assertions.assertNotEquals(deletePathKey[0], group.getGroupID()); + } + return pendingKeysDeletion; + }).when(km).getPendingDeletionKeys(anyInt()); + service.runPeriodicalTaskNow(); + service.runPeriodicalTaskNow(); + assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 2, metadataManager); + // Create Key3 + OmKeyArgs args2 = createAndCommitKey(volumeName, bucketName, + "key3", 3); + keyInfo = writeClient.getKeyInfo(args2, false); + objectId.set(keyInfo.getKeyInfo().getObjectID()); + // Rename Key3 to key4 + renameKey(volumeName, bucketName, "key3", "key4"); + // Delete Key4 + deleteKey(volumeName, bucketName, "key4"); + deletePathKey[0] = metadataManager.getOzoneDeletePathKey(objectId.get(), metadataManager.getOzoneKey(volumeName, + bucketName, "key4")); + // Delete snapshot + writeClient.deleteSnapshot(volumeName, bucketName, snap2); + // Run KDS and ensure key4 doesn't get purged since snap2 has not been deleted. + service.runPeriodicalTaskNow(); + writeClient.deleteSnapshot(volumeName, bucketName, snap1); + snapshotDeletingService.resume(); + assertTableRowCount(snapshotInfoTable, initialSnapshotCount, metadataManager); + keyDeletingService.resume(); + } + /* * Create Snap1 * Create 10 keys @@ -396,68 +519,68 @@ void testSnapshotExclusiveSize() throws Exception { final long initialDeletedCount = metadataManager.countRowsInTable(deletedTable); final long initialRenamedCount = metadataManager.countRowsInTable(renamedTable); - final String volumeName = getTestName(); - final String bucketName = uniqueObjectName("bucket"); + final String testVolumeName = getTestName(); + final String testBucketName = uniqueObjectName("bucket"); final String keyName = uniqueObjectName("key"); // Create Volume and Buckets - createVolumeAndBucket(volumeName, bucketName, false); + createVolumeAndBucket(testVolumeName, testBucketName, false); // Create 3 keys for (int i = 1; i <= 3; i++) { - createAndCommitKey(volumeName, bucketName, keyName + i, 3); + createAndCommitKey(testVolumeName, testBucketName, keyName + i, 3); } assertTableRowCount(keyTable, initialKeyCount + 3, metadataManager); // Create Snapshot1 String snap1 = uniqueObjectName("snap"); - writeClient.createSnapshot(volumeName, bucketName, snap1); + writeClient.createSnapshot(testVolumeName, testBucketName, snap1); assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 1, metadataManager); assertTableRowCount(deletedTable, initialDeletedCount, metadataManager); // Create 2 keys for (int i = 4; i <= 5; i++) { - createAndCommitKey(volumeName, bucketName, keyName + i, 3); + createAndCommitKey(testVolumeName, testBucketName, keyName + i, 3); } // Delete a key, rename 2 keys. We will be using this to test // how we handle renamed key for exclusive size calculation. - renameKey(volumeName, bucketName, keyName + 1, "renamedKey1"); - renameKey(volumeName, bucketName, keyName + 2, "renamedKey2"); - deleteKey(volumeName, bucketName, keyName + 3); + renameKey(testVolumeName, testBucketName, keyName + 1, "renamedKey1"); + renameKey(testVolumeName, testBucketName, keyName + 2, "renamedKey2"); + deleteKey(testVolumeName, testBucketName, keyName + 3); assertTableRowCount(deletedTable, initialDeletedCount + 1, metadataManager); assertTableRowCount(renamedTable, initialRenamedCount + 2, metadataManager); // Create Snapshot2 String snap2 = uniqueObjectName("snap"); - writeClient.createSnapshot(volumeName, bucketName, snap2); + writeClient.createSnapshot(testVolumeName, testBucketName, snap2); assertTableRowCount(snapshotInfoTable, initialSnapshotCount + 2, metadataManager); assertTableRowCount(deletedTable, initialDeletedCount, metadataManager); // Create 2 keys for (int i = 6; i <= 7; i++) { - createAndCommitKey(volumeName, bucketName, keyName + i, 3); + createAndCommitKey(testVolumeName, testBucketName, keyName + i, 3); } - deleteKey(volumeName, bucketName, "renamedKey1"); - deleteKey(volumeName, bucketName, keyName + 4); + deleteKey(testVolumeName, testBucketName, "renamedKey1"); + deleteKey(testVolumeName, testBucketName, keyName + 4); // Do a second rename of already renamedKey2 - renameKey(volumeName, bucketName, "renamedKey2", "renamedKey22"); + renameKey(testVolumeName, testBucketName, "renamedKey2", "renamedKey22"); assertTableRowCount(deletedTable, initialDeletedCount + 2, metadataManager); assertTableRowCount(renamedTable, initialRenamedCount + 1, metadataManager); // Create Snapshot3 String snap3 = uniqueObjectName("snap"); - writeClient.createSnapshot(volumeName, bucketName, snap3); + writeClient.createSnapshot(testVolumeName, testBucketName, snap3); // Delete 4 keys - deleteKey(volumeName, bucketName, "renamedKey22"); + deleteKey(testVolumeName, testBucketName, "renamedKey22"); for (int i = 5; i <= 7; i++) { - deleteKey(volumeName, bucketName, keyName + i); + deleteKey(testVolumeName, testBucketName, keyName + i); } // Create Snapshot4 String snap4 = uniqueObjectName("snap"); - writeClient.createSnapshot(volumeName, bucketName, snap4); - createAndCommitKey(volumeName, bucketName, uniqueObjectName("key"), 3); + writeClient.createSnapshot(testVolumeName, testBucketName, snap4); + createAndCommitKey(testVolumeName, testBucketName, uniqueObjectName("key"), 3); long prevKdsRunCount = getRunCount(); keyDeletingService.resume(); @@ -468,6 +591,7 @@ void testSnapshotExclusiveSize() throws Exception { .put(snap3, 2000L) .put(snap4, 0L) .build(); + System.out.println(expectedSize); // Let KeyDeletingService to run for some iterations GenericTestUtils.waitFor( @@ -480,8 +604,10 @@ void testSnapshotExclusiveSize() throws Exception { while (iterator.hasNext()) { Table.KeyValue snapshotEntry = iterator.next(); String snapshotName = snapshotEntry.getValue().getName(); + Long expected = expectedSize.getOrDefault(snapshotName, 0L); assertNotNull(expected); + System.out.println(snapshotName); assertEquals(expected, snapshotEntry.getValue().getExclusiveSize()); // Since for the test we are using RATIS/THREE assertEquals(expected * 3, snapshotEntry.getValue().getExclusiveReplicatedSize()); From 05787df09a9f6c721335729120d1411fc381578a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 23 Sep 2024 20:50:34 -0700 Subject: [PATCH 05/14] HDDS-11411. Address review comments Change-Id: I28f0dc728377170f2c8280d3358d70cc753482f4 --- .../key/OMDirectoriesPurgeRequestWithFSO.java | 2 +- .../om/request/key/OMKeyPurgeRequest.java | 6 +-- .../om/service/DirectoryDeletingService.java | 3 +- .../ozone/om/snapshot/SnapshotUtils.java | 41 ++++++------------- 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java index f47a40a3891a..29ed5d9fc7b5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMDirectoriesPurgeRequestWithFSO.java @@ -94,7 +94,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn expectedPreviousSnapshotId); } } catch (IOException e) { - LOG.error("Error occured while performing OMDirectoriesPurge. ", e); + LOG.error("Error occurred while performing OMDirectoriesPurge. ", e); return new OMDirectoriesPurgeResponseWithFSO(createErrorOMResponse(omResponse, e)); } try { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java index e898e29cd01f..468d473214b6 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java @@ -79,10 +79,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn // Checking if this request is an old request or new one. if (purgeKeysRequest.hasExpectedPreviousSnapshotID()) { // Validating previous snapshot since while purging deletes, a snapshot create request could make this purge - // directory request invalid on AOS since the deletedDirectory would be in the newly created snapshot. Adding - // subdirectories could lead to not being able to reclaim sub-files and subdirectories since the - // file/directory would be present in the newly created snapshot. - // Validating previous snapshot can ensure the chain hasn't changed. + // key request invalid on AOS since the deletedKey would be in the newly created snapshot. This would add an + // redundant tombstone entry in the deletedTable. It is better to skip the transaction. UUID expectedPreviousSnapshotId = purgeKeysRequest.getExpectedPreviousSnapshotID().hasUuid() ? fromProtobuf(purgeKeysRequest.getExpectedPreviousSnapshotID().getUuid()) : null; validatePreviousSnapshotId(fromSnapshotInfo, omMetadataManager.getSnapshotChainManager(), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 87c4b2fc299d..0938ff27557d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -287,7 +287,8 @@ private boolean previousSnapshotHasDir( String prevDbKey = prevDirTableDBKey == null ? metadataManager.getOzoneDeletePathDirKey(key) : prevDirTableDBKey; OmDirectoryInfo prevDirInfo = prevDirTable.get(prevDbKey); - //Check the snapshot chain hasn't changed while the checking the previous snapshot. + //Check the snapshot chain hasn't changed while the checking the previous snapshot for the presence of the + // deleted directory in the previous snapshot. SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); return (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java index 4902facb7b83..9fe8431af384 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java @@ -163,7 +163,8 @@ public static SnapshotInfo getNextSnapshot(OzoneManager ozoneManager, // SnapshotChainManager might throw NoSuchElementException as the snapshot // is removed in-memory but OMDoubleBuffer has not flushed yet. if (snapInfo == null) { - throw new OMException("Snapshot Info is null. Cannot get the next snapshot", INVALID_SNAPSHOT_ERROR); + throw new OMException("Provided Snapshot Info argument is null. Cannot get the next snapshot for a null value", + INVALID_SNAPSHOT_ERROR); } try { if (chainManager.hasNextPathSnapshot(snapInfo.getSnapshotPath(), @@ -180,7 +181,7 @@ public static SnapshotInfo getNextSnapshot(OzoneManager ozoneManager, } /** - * Get the previous in the snapshot chain. + * Get the previous snapshot in the snapshot chain. */ public static SnapshotInfo getPreviousSnapshot(OzoneManager ozoneManager, SnapshotChainManager chainManager, @@ -191,10 +192,9 @@ public static SnapshotInfo getPreviousSnapshot(OzoneManager ozoneManager, } /** - * Get the previous in the snapshot chain. + * Get the previous snapshot in the snapshot chain. */ - public static UUID getPreviousSnapshotId(SnapshotInfo snapInfo, - SnapshotChainManager chainManager) + private static UUID getPreviousSnapshotId(SnapshotInfo snapInfo, SnapshotChainManager chainManager) throws IOException { // If the snapshot is deleted in the previous run, then the in-memory // SnapshotChainManager might throw NoSuchElementException as the snapshot @@ -208,10 +208,8 @@ public static UUID getPreviousSnapshotId(SnapshotInfo snapInfo, return chainManager.previousPathSnapshot(snapInfo.getSnapshotPath(), snapInfo.getSnapshotId()); } - } catch (NoSuchElementException ex) { - LOG.error("The snapshot {} is not longer in snapshot chain, It " + - "maybe removed in the previous Snapshot purge request.", - snapInfo.getTableKey()); + } catch (NoSuchElementException ignored) { + } return null; } @@ -321,13 +319,6 @@ private static boolean isSameAsLatestOmKeyInfo(List omKeyInfos, return false; } - public static SnapshotInfo getLatestGlobalSnapshotInfo(OzoneManager ozoneManager, - SnapshotChainManager snapshotChainManager) throws IOException { - Optional latestGlobalSnapshot = Optional.ofNullable(snapshotChainManager.getLatestGlobalSnapshotId()); - return latestGlobalSnapshot.isPresent() ? getSnapshotInfo(ozoneManager, snapshotChainManager, - latestGlobalSnapshot.get()) : null; - } - public static SnapshotInfo getLatestSnapshotInfo(String volumeName, String bucketName, OzoneManager ozoneManager, SnapshotChainManager snapshotChainManager) throws IOException { @@ -349,18 +340,12 @@ public static UUID getLatestSnapshotId(String volumeName, String bucketName, public static void validatePreviousSnapshotId(SnapshotInfo snapshotInfo, SnapshotChainManager snapshotChainManager, UUID expectedPreviousSnapshotId) throws IOException { - try { - UUID previousSnapshotId = snapshotInfo == null ? snapshotChainManager.getLatestGlobalSnapshotId() : - SnapshotUtils.getPreviousSnapshotId(snapshotInfo, snapshotChainManager); - if (!Objects.equals(expectedPreviousSnapshotId, previousSnapshotId)) { - throw new OMException("Snapshot validation failed. Expected previous snapshotId : " + - expectedPreviousSnapshotId + " but was " + previousSnapshotId, - OMException.ResultCodes.INVALID_REQUEST); - } - } catch (IOException e) { - LOG.error("Error while validating previous snapshot for snapshot: {}", - snapshotInfo == null ? null : snapshotInfo.getName(), e); - throw e; + UUID previousSnapshotId = snapshotInfo == null ? snapshotChainManager.getLatestGlobalSnapshotId() : + SnapshotUtils.getPreviousSnapshotId(snapshotInfo, snapshotChainManager); + if (!Objects.equals(expectedPreviousSnapshotId, previousSnapshotId)) { + throw new OMException("Snapshot validation failed. Expected previous snapshotId : " + + expectedPreviousSnapshotId + " but was " + previousSnapshotId, + OMException.ResultCodes.INVALID_REQUEST); } } } From bf40940de704080a979b61c4d6bd2cfb0ef29f6e Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 23 Sep 2024 20:58:36 -0700 Subject: [PATCH 06/14] HDDS-11411. FIx deep clean snapshot code Change-Id: I6f4673d71887e07563ad1304d047bacb54cddc4e --- .../hadoop/ozone/om/service/KeyDeletingService.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index f314293a5ff8..ebee07f99a72 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -265,7 +265,6 @@ private void processSnapshotDeepClean(int delCount) List keysToPurge = new ArrayList<>(); HashMap keysToModify = new HashMap<>(); SnapshotInfo currSnapInfo = snapshotInfoTable.get(iterator.next().getKey()); - // Deep clean only on active snapshot. Deleted Snapshots will be // cleaned up by SnapshotDeletingService. if (currSnapInfo == null || currSnapInfo.getSnapshotStatus() != SNAPSHOT_ACTIVE || @@ -273,6 +272,15 @@ private void processSnapshotDeepClean(int delCount) continue; } + SnapshotInfo prevSnapInfo = SnapshotUtils.getPreviousSnapshot(getOzoneManager(), snapChainManager, + currSnapInfo); + if (prevSnapInfo != null && + (prevSnapInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || + !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), + prevSnapInfo))) { + continue; + } + try (ReferenceCounted rcCurrOmSnapshot = omSnapshotManager.getSnapshot( currSnapInfo.getVolumeName(), From 7b570d53b466e39d80f4883df9154374816ccf52 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Mon, 23 Sep 2024 22:56:57 -0700 Subject: [PATCH 07/14] HDDS-11411. FIx deep clean snapshot code Change-Id: I0f4944a14280ef8c941fb0a4b54e184bc78b676d --- .../fs/ozone/TestDirectoryDeletingServiceWithFSO.java | 8 +------- .../hadoop/ozone/om/service/TestKeyDeletingService.java | 2 -- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java index af9366ce6f71..8d161dedeb33 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestDirectoryDeletingServiceWithFSO.java @@ -42,8 +42,6 @@ import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; -import org.apache.hadoop.ozone.om.KeyManager; -import org.apache.hadoop.ozone.om.KeyManagerImpl; import org.apache.hadoop.ozone.om.OMMetadataManager; import org.apache.hadoop.ozone.om.OmSnapshotManager; import org.apache.hadoop.ozone.om.OzoneManager; @@ -528,11 +526,7 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() OzoneManager ozoneManager = Mockito.spy(cluster.getOzoneManager()); OmSnapshotManager omSnapshotManager = Mockito.spy(ozoneManager.getOmSnapshotManager()); - KeyManager km = Mockito.spy(new KeyManagerImpl(ozoneManager, ozoneManager.getScmClient(), cluster.getConf(), - ozoneManager.getPerfMetrics())); - when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> { - return omSnapshotManager; - }); + when(ozoneManager.getOmSnapshotManager()).thenAnswer(i -> omSnapshotManager); DirectoryDeletingService service = Mockito.spy(new DirectoryDeletingService(1000, TimeUnit.MILLISECONDS, 1000, ozoneManager, cluster.getConf())); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java index 3323978f7c13..ff6506da0347 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java @@ -314,8 +314,6 @@ public void testAOSKeyDeletingWithSnapshotCreateParallelExecution() om.getMetadataManager().getSnapshotInfoTable(); Table deletedTable = om.getMetadataManager().getDeletedTable(); - Table keyTable = - om.getMetadataManager().getKeyTable(BucketLayout.DEFAULT); Table renameTable = om.getMetadataManager().getSnapshotRenamedTable(); // Suspend KeyDeletingService From 89ca0f354493e139b7025b276a525a62cf7708ae Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 09:09:04 -0700 Subject: [PATCH 08/14] HDDS-11411. Handle bucket delete case Change-Id: Id05d9e2d1b286eb90a859418e92c6080faa94558 --- .../org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index 41cb60f3d162..de201fd5d4b5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -1610,7 +1610,9 @@ public PendingKeysDeletion getPendingDeletionKeys(final int keyCount, String[] keySplit = kv.getKey().split(OM_KEY_PREFIX); String bucketKey = getBucketKey(keySplit[1], keySplit[2]); OmBucketInfo bucketInfo = getBucketTable().get(bucketKey); - SnapshotInfo previousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), + // If Bucket deleted bucketInfo would be null, thus making previous snapshot also null. + SnapshotInfo previousSnapshotInfo = bucketInfo == null ? null : + SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), bucketInfo.getBucketName(), ozoneManager, snapshotChainManager); // previous snapshot is not active or it has not been flushed to disk then don't process the key in this // iteration. @@ -1700,7 +1702,9 @@ public PendingKeysDeletion getPendingDeletionKeys(final int keyCount, List notReclaimableKeyInfoList = notReclaimableKeyInfo.getOmKeyInfoList(); - SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), + // If Bucket deleted bucketInfo would be null, thus making previous snapshot also null. + SnapshotInfo newPreviousSnapshotInfo = bucketInfo == null ? null : + SnapshotUtils.getLatestSnapshotInfo(bucketInfo.getVolumeName(), bucketInfo.getBucketName(), ozoneManager, snapshotChainManager); // Check if the previous snapshot in the chain hasn't changed. if (Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), From 2a707fa325ee9b1f9807a76a70801c4e61b6496a Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 09:18:01 -0700 Subject: [PATCH 09/14] HDDS-11411. Fix nullable uuid issue Change-Id: Iee84d72891478a94d31ad2804c11601590a025d1 --- .../hadoop/ozone/om/service/AbstractKeyDeletingService.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java index 74ce5541f8f8..0d3a05c9e471 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/AbstractKeyDeletingService.java @@ -207,10 +207,7 @@ private int submitPurgeKeysRequest(List results, if (expectedPreviousSnapshotId != null) { expectedPreviousSnapshotNullableUUID.setUuid(HddsUtils.toProtobuf(expectedPreviousSnapshotId)); } - - if (expectedPreviousSnapshotId != null) { - purgeKeysRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID); - } + purgeKeysRequest.setExpectedPreviousSnapshotID(expectedPreviousSnapshotNullableUUID.build()); // Add keys to PurgeKeysRequest bucket wise. for (Map.Entry, List> entry : From 792db8e6d2ab4fdb838f0aa2d12215b56434d6c7 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 09:29:45 -0700 Subject: [PATCH 10/14] HDDS-11411. Address review comments Change-Id: I5990881537cacdf1bbaf26b15607e9c398825d25 --- .../apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java index 468d473214b6..a5e8cb145255 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyPurgeRequest.java @@ -87,7 +87,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn expectedPreviousSnapshotId); } } catch (IOException e) { - LOG.error("Error occured while performing OMDirectoriesPurge. ", e); + LOG.error("Error occurred while performing OmKeyPurge. ", e); return new OMKeyPurgeResponse(createErrorOMResponse(omResponse, e)); } From 36411a6bcb6a7a58f6e095926050a6b61a3a509f Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 12:22:46 -0700 Subject: [PATCH 11/14] HDDS-11411. Fix deep cleaning snapshot Change-Id: I36be1adaf56b11ce1049813ae90c7f6a2817aa9d --- .../hadoop/ozone/om/service/KeyDeletingService.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index ebee07f99a72..a81c23cbbb21 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -23,6 +23,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -272,15 +273,6 @@ private void processSnapshotDeepClean(int delCount) continue; } - SnapshotInfo prevSnapInfo = SnapshotUtils.getPreviousSnapshot(getOzoneManager(), snapChainManager, - currSnapInfo); - if (prevSnapInfo != null && - (prevSnapInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || - !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), - prevSnapInfo))) { - continue; - } - try (ReferenceCounted rcCurrOmSnapshot = omSnapshotManager.getSnapshot( currSnapInfo.getVolumeName(), @@ -444,7 +436,8 @@ private void processSnapshotDeepClean(int delCount) if (!keysToPurge.isEmpty()) { processKeyDeletes(keysToPurge, currOmSnapshot.getKeyManager(), - keysToModify, currSnapInfo.getTableKey(), null); + keysToModify, currSnapInfo.getTableKey(), + Optional.ofNullable(previousSnapshot).map(SnapshotInfo::getSnapshotId).orElse(null)); } } finally { IOUtils.closeQuietly(rcPrevOmSnapshot, rcPrevToPrevOmSnapshot); From 723c52b082918a3ce7ecdd0fe5f6e17e3ae1cd18 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 14:29:56 -0700 Subject: [PATCH 12/14] HDDS-11411. Fix deep cleaning snapshot Change-Id: Ib110db810fae238e13039b15c0e36944081900a4 --- .../TestSnapshotDeletingServiceIntegrationTest.java | 4 +++- .../snapshot/TestSnapshotDirectoryCleaningService.java | 2 ++ .../hadoop/ozone/om/service/KeyDeletingService.java | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java index 53a7f9c893c6..254de072e05b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDeletingServiceIntegrationTest.java @@ -87,6 +87,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.ArgumentMatchers.anyBoolean; @@ -130,6 +131,7 @@ public void setup() throws Exception { 1, StorageUnit.MB); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 500, TimeUnit.MILLISECONDS); + conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_TIMEOUT, 10000, TimeUnit.MILLISECONDS); conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 500); @@ -447,7 +449,7 @@ public void testSnapshotWithFSO() throws Exception { while (iterator.hasNext()) { Table.KeyValue next = iterator.next(); String activeDBDeletedKey = next.getKey(); - if (activeDBDeletedKey.matches(".*/key1.*")) { + if (activeDBDeletedKey.matches(".*/key1/.*")) { RepeatedOmKeyInfo activeDBDeleted = next.getValue(); OMMetadataManager metadataManager = cluster.getOzoneManager().getMetadataManager(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java index 03df331087b8..3be0725a0093 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDirectoryCleaningService.java @@ -57,6 +57,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_FS_ITERATE_BATCH_SIZE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -79,6 +80,7 @@ public class TestSnapshotDirectoryCleaningService { public static void init() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); conf.setInt(OMConfigKeys.OZONE_SNAPSHOT_DIRECTORY_SERVICE_INTERVAL, 2500); + conf.setBoolean(OZONE_SNAPSHOT_DEEP_CLEANING_ENABLED, true); conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 2500, TimeUnit.MILLISECONDS); conf.setBoolean(OZONE_ACL_ENABLED, true); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java index a81c23cbbb21..9a4f74eba59c 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java @@ -273,6 +273,15 @@ private void processSnapshotDeepClean(int delCount) continue; } + SnapshotInfo prevSnapInfo = SnapshotUtils.getPreviousSnapshot(getOzoneManager(), snapChainManager, + currSnapInfo); + if (prevSnapInfo != null && + (prevSnapInfo.getSnapshotStatus() != SnapshotInfo.SnapshotStatus.SNAPSHOT_ACTIVE || + !OmSnapshotManager.areSnapshotChangesFlushedToDB(getOzoneManager().getMetadataManager(), + prevSnapInfo))) { + continue; + } + try (ReferenceCounted rcCurrOmSnapshot = omSnapshotManager.getSnapshot( currSnapInfo.getVolumeName(), From 3f568db094e9f6a862856669e6707291a8e27694 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 14:39:30 -0700 Subject: [PATCH 13/14] HDDS-11411. Address review comments Change-Id: I0b2187ccff966d9b35cdcc0c87ea72dea4324c12 --- .../ozone/om/service/DirectoryDeletingService.java | 5 +++-- .../apache/hadoop/ozone/om/snapshot/SnapshotUtils.java | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java index 0938ff27557d..e638ce45a4a9 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/DirectoryDeletingService.java @@ -287,8 +287,9 @@ private boolean previousSnapshotHasDir( String prevDbKey = prevDirTableDBKey == null ? metadataManager.getOzoneDeletePathDirKey(key) : prevDirTableDBKey; OmDirectoryInfo prevDirInfo = prevDirTable.get(prevDbKey); - //Check the snapshot chain hasn't changed while the checking the previous snapshot for the presence of the - // deleted directory in the previous snapshot. + //Checking if the previous snapshot in the chain hasn't changed while checking if the deleted directory is + // present in the previous snapshot. If the chain has changed, the deleted directory could have been moved + // to the newly created snapshot. SnapshotInfo newPreviousSnapshotInfo = SnapshotUtils.getLatestSnapshotInfo(deletedDirInfo.getVolumeName(), deletedDirInfo.getBucketName(), getOzoneManager(), metadataManager.getSnapshotChainManager()); return (!Objects.equals(Optional.ofNullable(newPreviousSnapshotInfo).map(SnapshotInfo::getSnapshotId), diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java index 9fe8431af384..1b9688856ca6 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java @@ -200,7 +200,8 @@ private static UUID getPreviousSnapshotId(SnapshotInfo snapInfo, SnapshotChainMa // SnapshotChainManager might throw NoSuchElementException as the snapshot // is removed in-memory but OMDoubleBuffer has not flushed yet. if (snapInfo == null) { - throw new OMException("Snapshot Info is null. Cannot get the next snapshot", INVALID_SNAPSHOT_ERROR); + throw new OMException("Provided Snapshot Info argument is null. Cannot get the previous snapshot for a null " + + "value", INVALID_SNAPSHOT_ERROR); } try { if (chainManager.hasPreviousPathSnapshot(snapInfo.getSnapshotPath(), @@ -334,9 +335,9 @@ public static UUID getLatestSnapshotId(String volumeName, String bucketName, return snapshotChainManager.getLatestPathSnapshotId(snapshotPath); } - // Validates previous snapshotId given a snapshotInfo or volumeName & bucketName. Incase snapshotInfo is null, this - // would be considered as AOS and previous snapshot becomes the latest snapshot in the global snapshot chain. - // Would throw OMException if validation fails otherwise function would pass. + // Validates the previous path snapshotId for given a snapshotInfo. In case snapshotInfo is + // null, the snapshotInfo would be considered as AOS and previous snapshot becomes the latest snapshot in the global + // snapshot chain. Would throw OMException if validation fails otherwise function would pass. public static void validatePreviousSnapshotId(SnapshotInfo snapshotInfo, SnapshotChainManager snapshotChainManager, UUID expectedPreviousSnapshotId) throws IOException { From 7cb49b71df62e55ceb3747d73d017366ee9c51e3 Mon Sep 17 00:00:00 2001 From: Swaminathan Balachandran Date: Tue, 24 Sep 2024 14:42:11 -0700 Subject: [PATCH 14/14] HDDS-11411. Address review comments Change-Id: I4871b9f913244135bc8af486262d6c6ac4490821 --- .../org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java index 1b9688856ca6..87983b072679 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotUtils.java @@ -324,13 +324,13 @@ public static SnapshotInfo getLatestSnapshotInfo(String volumeName, String bucke OzoneManager ozoneManager, SnapshotChainManager snapshotChainManager) throws IOException { Optional latestPathSnapshot = Optional.ofNullable( - getLatestSnapshotId(volumeName, bucketName, snapshotChainManager)); + getLatestPathSnapshotId(volumeName, bucketName, snapshotChainManager)); return latestPathSnapshot.isPresent() ? getSnapshotInfo(ozoneManager, snapshotChainManager, latestPathSnapshot.get()) : null; } - public static UUID getLatestSnapshotId(String volumeName, String bucketName, - SnapshotChainManager snapshotChainManager) throws IOException { + public static UUID getLatestPathSnapshotId(String volumeName, String bucketName, + SnapshotChainManager snapshotChainManager) throws IOException { String snapshotPath = volumeName + OM_KEY_PREFIX + bucketName; return snapshotChainManager.getLatestPathSnapshotId(snapshotPath); }