From bf0dd6e8717442cb22ff8b9025f21b34ccea34a6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 5 Jun 2024 21:48:42 -0700 Subject: [PATCH 01/24] Initial outline of checksum manager and merkle tree with proto --- .../checksum/ContainerChecksumManager.java | 135 +++++++++++++++++ .../checksum/ContainerMerkleTree.java | 140 ++++++++++++++++++ .../container/checksum/package-info.java | 21 +++ .../main/proto/DatanodeClientProtocol.proto | 17 +++ 4 files changed, 313 insertions(+) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java new file mode 100644 index 000000000000..b709a85a55a7 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.SortedSet; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.Lock; + +import com.google.common.util.concurrent.Striped; +import org.apache.hadoop.hdds.utils.SimpleStriped; + +/** + * This class coordinates reading and writing Container checksum information for all containers. + */ +public class ContainerChecksumManager { + + private Striped fileLock; + + /** + * Creates one instance that should be used to coordinate all container checksum info within a datanode. + */ + public ContainerChecksumManager() { + // TODO add config key for size. + fileLock = SimpleStriped.readWriteLock(127, true); + } + + /** + * Writes the specified container merkle tree to the specified container's checksum file. + * The data merkle tree within the file is replaced with the {@code tree} parameter, but all other content of the + * file remains unchanged. + * Concurrent writes to the same file are coordinated internally. + */ + public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try { + ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + .setDataMerkleTree(tree.toProto()) + .build(); + write(data, newChecksumInfo); + } finally { + writeLock.unlock(); + } + } + + /** + * Adds the specified blocks to the list of deleted blocks specified in the container's checksum file. + * All other content of the file remains unchanged. + * Concurrent writes to the same file are coordinated internally. + */ + public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet deletedBlockIDs) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try { + ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + // TODO actually need to merge here to keep blocks in sorted order. + .addAllDeletedBlocks(deletedBlockIDs) + .build(); + write(data, newChecksumInfo); + } finally { + writeLock.unlock(); + } + } + + public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { + // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. + // Callers can act on this summary to repair their container replica using the peer's replica. + return new ContainerDiff(); + } + + private Lock getReadLock(long containerID) { + return fileLock.get(containerID).readLock(); + } + + private Lock getWriteLock(long containerID) { + return fileLock.get(containerID).writeLock(); + } + + private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + Lock readLock = getReadLock(data.getContainerID()); + readLock.lock(); + try (FileInputStream inStream = new FileInputStream(getContainerChecksumFile(data))) { + return ContainerChecksumInfo.parseFrom(inStream); + } finally { + readLock.unlock(); + } + } + + private void write(KeyValueContainerData data, ContainerChecksumInfo checksumInfo) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try (FileOutputStream outStream = new FileOutputStream(getContainerChecksumFile(data))) { + checksumInfo.writeTo(outStream); + } finally { + writeLock.unlock(); + } + } + + private File getContainerChecksumFile(KeyValueContainerData data) { + return new File(data.getMetadataPath(), data.getContainerID() + ".checksum"); + } + + /** + * This class represents the different between our replica of a container, and a peer's replica of a container. + * It summarizes the operations we need to do to reconcile our replica the peer replica it was compared to. + * + * TODO HDDS-10928 + */ + public static class ContainerDiff { + public ContainerDiff() { + + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java new file mode 100644 index 000000000000..c400401318b2 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; +import org.apache.hadoop.ozone.common.ChecksumByteBuffer; +import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * This class represents a Merkle tree that provides one checksum for all data within a container. + * + * The leaves of the tree are the checksums of every chunk. Each chunk checksum in a block is further checksumed + * together to generate the block level checksum. Finally, The checksums of all blocks are checksumed together to + * create a container level checksum. Note that checksums are order dependent. Chunk checksums are sorted by their + * offset within a block, and blocks are sorted by their ID. + * + * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. + * It allows building a container merkle tree from scratch by incrementally adding chunks. + * The final checksums at higher levels of the tree are not calculated until + * {@link ContainerMerkleTree#toProto} is called. + */ +public class ContainerMerkleTree { + + private final SortedMap blocks; + + /** + * Constructs an empty Container merkle tree object. + */ + public ContainerMerkleTree() { + blocks = new TreeMap<>(); + } + + /** + * Adds chunks to a block in the tree. The block entry will be created if it is the first time adding chunks to it. + * If the block entry already exists, the chunks will be added to the existing chunks for that block. + * + * @param blockID The ID of the block that these chunks belong to. + * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the + * existing sorted list of chunks stored for this block. + */ + public void addChunks(long blockID, SortedMap offset2Chunks) { + blocks.getOrDefault(blockID, new BlockMerkleTree(blockID)).addChunks(offset2Chunks); + } + + /** + * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum + * computation happens outside of this method. + * + * @return A complete protobuf object representation of this tree. + */ + public ContainerMerkleTreeProto toProto() { + // Compute checksums and return the result. + ContainerMerkleTreeProto.Builder treeProto = ContainerMerkleTreeProto.newBuilder(); + // TODO configurable checksum implementation + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * blocks.size()); + for (Map.Entry entry: blocks.entrySet()) { + BlockMerkleTreeProto blockTreeProto = entry.getValue().toProto(); + treeProto.addBlockMerkleTree(blockTreeProto); + containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); + } + treeProto.setDataChecksum(checksumImpl.getValue()); + return treeProto.build(); + } + + /** + * Represents a merkle tree for a single block within a container. + */ + private static class BlockMerkleTree { + // Map of each offset within the block to its chunk info. + // Chunk order in the checksum is determined by their offset. + private final SortedMap chunks; + private final long blockID; + + BlockMerkleTree(long blockID) { + this.blockID = blockID; + this.chunks = new TreeMap<>(); + } + + /** + * Adds the specified chunks to this block. This should run in linear time since the {@link SortedMap} parameter + * is added to an existing {@link TreeMap} internally. + * + * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the + * existing sorted list of chunks stored for this block. + */ + public void addChunks(SortedMap offset2Chunks) { + chunks.putAll(offset2Chunks); + } + + /** + * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. No checksum + * computation happens outside of this method. + * + * @return A complete protobuf object representation of this tree. + */ + public BlockMerkleTreeProto toProto() { + BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); + // TODO configurable checksum implementation + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + + for (ChunkInfo chunk: chunks.values()) { + // TODO can we depend on this ordering to be consistent? + List chunkChecksums = chunk.getChecksumData().getChecksums(); + blockTreeBuilder.addChunks(chunk.getProtoBufMessage()); + for (ByteString checksum: chunkChecksums) { + checksumImpl.update(checksum.asReadOnlyByteBuffer()); + } + } + + return blockTreeBuilder + .setBlockID(blockID) + .setBlockChecksum(checksumImpl.getValue()) + .build(); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java new file mode 100644 index 000000000000..9dfdc88bf1ec --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.checksum; +/** + * This package contains classes handling container level checksums. + */ diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 7755b993caea..7af47a7119de 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -527,6 +527,23 @@ message SendContainerRequest { message SendContainerResponse { } +message BlockMerkleTreeProto { + optional int64 blockID = 1; + optional int64 blockChecksum = 2; + repeated ChunkInfo chunks = 3; +} + +message ContainerMerkleTreeProto { + optional int64 dataChecksum = 1; + repeated BlockMerkleTreeProto blockMerkleTree = 2; +} + +message ContainerChecksumInfo { + optional int64 containerID = 1; + optional ContainerMerkleTreeProto dataMerkleTree = 2; + repeated int64 deletedBlocks = 3; +} + service XceiverClientProtocolService { // A client-to-datanode RPC to send container commands rpc send(stream ContainerCommandRequestProto) returns From 3adbe5a9ffbd08bccf7b61273b783d0bb7641a5f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 18 Jun 2024 17:08:36 -0400 Subject: [PATCH 02/24] Add config key for lock stripes --- .../checksum/ContainerChecksumManager.java | 6 ++--- .../statemachine/DatanodeConfiguration.java | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index b709a85a55a7..eb7740f9b8b3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import java.io.File; @@ -40,9 +41,8 @@ public class ContainerChecksumManager { /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ - public ContainerChecksumManager() { - // TODO add config key for size. - fileLock = SimpleStriped.readWriteLock(127, true); + public ContainerChecksumManager(DatanodeConfiguration dnConf) { + fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index a8b0d8cfa4bc..28bbb17aa8f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -74,6 +74,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { "hdds.datanode.wait.on.all.followers"; public static final String CONTAINER_SCHEMA_V3_ENABLED = "hdds.datanode.container.schema.v3.enabled"; + public static final String CONTAINER_CHECKSUM_LOCK_STRIPES_KEY = "hdds.datanode.container.checksum.lock.stripes"; static final boolean CHUNK_DATA_VALIDATION_CHECK_DEFAULT = false; @@ -109,6 +110,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { "hdds.datanode.rocksdb.delete_obsolete_files_period"; public static final Boolean OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT = false; + public static final int CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT = 127; /** * Number of threads per volume that Datanode will use for chunk read. @@ -550,6 +552,21 @@ public void setWaitOnAllFollowers(boolean val) { private boolean bCheckEmptyContainerDir = OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT; + /** + * Whether to check container directory or not to determine + * container is empty. + */ + @Config(key = "container.checksum.lock.stripes", + type = ConfigType.INT, + defaultValue = "127", + tags = { DATANODE }, + description = "The number of lock stripes used to coordinate modifications to container checksum information. " + + "This information is only updated after a container is closed and does not affect the data read or write" + + " path. Each container in the datanode will be mapped to one lock which will only be held while its " + + "checksum information is updated." + ) + private int containerChecksumLockStripes = CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT; + @PostConstruct public void validate() { if (containerDeleteThreads < 1) { @@ -683,6 +700,12 @@ public void validate() { rocksdbDeleteObsoleteFilesPeriod = ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT; } + + if (containerChecksumLockStripes < 1) { + LOG.warn("{} must be at least 1. Defaulting to {}", CONTAINER_CHECKSUM_LOCK_STRIPES_KEY, + CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT); + containerChecksumLockStripes = CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT; + } } public void setContainerDeleteThreads(int containerDeleteThreads) { @@ -910,4 +933,8 @@ public int getAutoCompactionSmallSstFileNum() { public void setAutoCompactionSmallSstFileNum(int num) { this.autoCompactionSmallSstFileNum = num; } + + public int getContainerChecksumLockStripes() { + return containerChecksumLockStripes; + } } From e02a6096f45b6238768f4c78c1f48ae550cf799c Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 18 Jun 2024 17:27:53 -0400 Subject: [PATCH 03/24] Keep deleted block list sorted --- .../checksum/ContainerChecksumManager.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index eb7740f9b8b3..fdd8c5b279e4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -25,6 +25,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.SortedSet; +import java.util.TreeSet; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.Lock; @@ -36,7 +37,7 @@ */ public class ContainerChecksumManager { - private Striped fileLock; + private final Striped fileLock; /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. @@ -73,11 +74,20 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() - // TODO actually need to merge here to keep blocks in sorted order. - .addAllDeletedBlocks(deletedBlockIDs) + ContainerChecksumInfo.Builder newChecksumInfoBuilder = read(data).toBuilder(); + + // Although the persisted block list should already be sorted, we will sort it here to make sure. + // This will automatically fix any bugs in the persisted order that may show up. + SortedSet sortedDeletedBlockIDs = + new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + // Since the provided list of block IDs is already sorted, this is a linear time addition. + sortedDeletedBlockIDs.addAll(deletedBlockIDs); + + newChecksumInfoBuilder + .clearDeletedBlocks() + .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - write(data, newChecksumInfo); + write(data, newChecksumInfoBuilder.build()); } finally { writeLock.unlock(); } From 3680a2ed8d90d829d96d89bda812e6c8dbefdb00 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 20 Jun 2024 18:03:33 -0400 Subject: [PATCH 04/24] Change chunk checksum proto, add the first passing test --- .../checksum/ContainerMerkleTree.java | 121 +++++++---- .../checksum/TestContainerMerkleTree.java | 199 ++++++++++++++++++ .../main/proto/DatanodeClientProtocol.proto | 9 +- 3 files changed, 289 insertions(+), 40 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index c400401318b2..95ce6f76d662 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; @@ -25,17 +26,17 @@ import java.nio.ByteBuffer; import java.util.List; -import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** * This class represents a Merkle tree that provides one checksum for all data within a container. * - * The leaves of the tree are the checksums of every chunk. Each chunk checksum in a block is further checksumed - * together to generate the block level checksum. Finally, The checksums of all blocks are checksumed together to - * create a container level checksum. Note that checksums are order dependent. Chunk checksums are sorted by their - * offset within a block, and blocks are sorted by their ID. + * As the leaves of the tree, a checksum for each chunk is computed by taking a checksum of all checksums within that + * chunk. Each chunk checksum in a block is further checksummed together to generate the block level checksum. Finally, + * The checksums of all blocks are checksummed together to create a container level checksum. + * Note that checksums are order dependent. Chunk checksums are sorted by their + * offset within a block, and block checksums are sorted by their block ID. * * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. * It allows building a container merkle tree from scratch by incrementally adding chunks. @@ -44,13 +45,13 @@ */ public class ContainerMerkleTree { - private final SortedMap blocks; + private final SortedMap id2Block; /** * Constructs an empty Container merkle tree object. */ public ContainerMerkleTree() { - blocks = new TreeMap<>(); + id2Block = new TreeMap<>(); } /** @@ -58,32 +59,37 @@ public ContainerMerkleTree() { * If the block entry already exists, the chunks will be added to the existing chunks for that block. * * @param blockID The ID of the block that these chunks belong to. - * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the - * existing sorted list of chunks stored for this block. + * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, SortedMap offset2Chunks) { - blocks.getOrDefault(blockID, new BlockMerkleTree(blockID)).addChunks(offset2Chunks); + public void addChunks(long blockID, List chunks) { + id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } /** * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum - * computation happens outside of this method. + * computation for the tree happens outside of this method. * * @return A complete protobuf object representation of this tree. */ public ContainerMerkleTreeProto toProto() { // Compute checksums and return the result. - ContainerMerkleTreeProto.Builder treeProto = ContainerMerkleTreeProto.newBuilder(); - // TODO configurable checksum implementation + ContainerMerkleTreeProto.Builder containerTreeBuilder = ContainerMerkleTreeProto.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); - ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * blocks.size()); - for (Map.Entry entry: blocks.entrySet()) { - BlockMerkleTreeProto blockTreeProto = entry.getValue().toProto(); - treeProto.addBlockMerkleTree(blockTreeProto); + ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); + + for (BlockMerkleTree blockTree: id2Block.values()) { + // Add block's checksum tree to the proto. + BlockMerkleTreeProto blockTreeProto = blockTree.toProto(); + containerTreeBuilder.addBlockMerkleTree(blockTreeProto); + // Add the block's checksum to the buffer to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); } - treeProto.setDataChecksum(checksumImpl.getValue()); - return treeProto.build(); + containerChecksumBuffer.flip(); + checksumImpl.update(containerChecksumBuffer); + + return containerTreeBuilder + .setDataChecksum(checksumImpl.getValue()) + .build(); } /** @@ -92,44 +98,49 @@ public ContainerMerkleTreeProto toProto() { private static class BlockMerkleTree { // Map of each offset within the block to its chunk info. // Chunk order in the checksum is determined by their offset. - private final SortedMap chunks; + private final SortedMap offset2Chunk; private final long blockID; BlockMerkleTree(long blockID) { this.blockID = blockID; - this.chunks = new TreeMap<>(); + this.offset2Chunk = new TreeMap<>(); } /** - * Adds the specified chunks to this block. This should run in linear time since the {@link SortedMap} parameter - * is added to an existing {@link TreeMap} internally. + * Adds the specified chunks to this block. The offset value of the chunk must be unique within the block, + * otherwise it will overwrite the previous value at that offset. * - * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the - * existing sorted list of chunks stored for this block. + * @param chunks A list of chunks to add to this block. */ - public void addChunks(SortedMap offset2Chunks) { - chunks.putAll(offset2Chunks); + public void addChunks(List chunks) { + for (ChunkInfo chunk: chunks) { + offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); + } } /** - * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. No checksum - * computation happens outside of this method. + * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. All block checksum + * computation for the tree happens within this method. * - * @return A complete protobuf object representation of this tree. + * @return A complete protobuf object representation of this block tree. */ public BlockMerkleTreeProto toProto() { BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); - // TODO configurable checksum implementation ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + ByteBuffer blockChecksumBuffer = ByteBuffer.allocate(Long.BYTES * offset2Chunk.size()); - for (ChunkInfo chunk: chunks.values()) { - // TODO can we depend on this ordering to be consistent? - List chunkChecksums = chunk.getChecksumData().getChecksums(); - blockTreeBuilder.addChunks(chunk.getProtoBufMessage()); - for (ByteString checksum: chunkChecksums) { - checksumImpl.update(checksum.asReadOnlyByteBuffer()); - } + for (ChunkMerkleTree chunkTree: offset2Chunk.values()) { + // Ordering of checksums within a chunk is assumed to be in the order they are written. + // This assumption is already built in to the code that reads and writes the values (see + // ChunkInputStream#validateChunk for an example on the client read path). + // There is no other value we can use to sort these checksums, so we assume the stored proto has them in the + // correct order. + ContainerProtos.ChunkMerkleTreeProto chunkTreeProto = chunkTree.toProto(); + blockTreeBuilder.addChunkMerkleTree(chunkTreeProto); + blockChecksumBuffer.putLong(chunkTreeProto.getChunkChecksum()); } + blockChecksumBuffer.flip(); + checksumImpl.update(blockChecksumBuffer); return blockTreeBuilder .setBlockID(blockID) @@ -137,4 +148,36 @@ public BlockMerkleTreeProto toProto() { .build(); } } + + /** + * Represents a merkle tree for a single chunk within a container. + * Each chunk has multiple checksums within it at each "bytesPerChecksum" interval. + * This class computes one checksum for the whole chunk by aggregating these. + */ + private static class ChunkMerkleTree { + private final ChunkInfo chunk; + + ChunkMerkleTree(ChunkInfo chunk) { + this.chunk = chunk; + } + + /** + * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this + * method. + * + * @return A complete protobuf representation of this chunk as a leaf in the container merkle tree. + */ + public ContainerProtos.ChunkMerkleTreeProto toProto() { + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + for (ByteString checksum: chunk.getChecksumData().getChecksums()) { + checksumImpl.update(checksum.asReadOnlyByteBuffer()); + } + + return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + .setOffset(chunk.getOffset()) + .setLength(chunk.getLen()) + .setChunkChecksum(checksumImpl.getValue()) + .build(); + } + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java new file mode 100644 index 000000000000..687b959a65f5 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -0,0 +1,199 @@ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.CRC32; + +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestContainerMerkleTree { + @Test + public void testBuildEmptyTree() { + ContainerMerkleTree tree = new ContainerMerkleTree(); + ContainerMerkleTreeProto treeProto = tree.toProto(); + assertEquals(0, treeProto.getDataChecksum()); + assertEquals(0, treeProto.getBlockMerkleTreeCount()); + } + + @Test + public void testBuildOneChunkTree() throws Exception { + // Seed the expected and actual trees with the same chunk. + final long blockID = 1; + ChunkInfo chunk = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.ChunkMerkleTreeProto chunkTree = buildExpectedChunkTree(chunk); + ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + Collections.singletonList(chunkTree)); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID, Collections.singletonList(chunk)); + + // Check the difference. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesMatch(expectedTree, actualTreeProto); + + // Do some manual verification of the generated tree as well. + assertNotEquals(0, actualTreeProto.getDataChecksum()); + assertEquals(1, actualTreeProto.getBlockMerkleTreeCount()); + + ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTreeProto.getBlockMerkleTree(0); + assertEquals(1, actualBlockTree.getBlockID()); + assertEquals(1, actualBlockTree.getChunkMerkleTreeCount()); + assertNotEquals(0, actualBlockTree.getBlockChecksum()); + + ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); + assertEquals(0, actualChunkTree.getOffset()); + // TODO use existing config for this value. + assertEquals(5 * 1024, actualChunkTree.getLength()); + assertNotEquals(0, actualChunkTree.getChunkChecksum()); + } + +// @Test +// public void testBuildTreeWithMissingChunks() { +// +// } +// +// @Test +// public void testBuildTreeWithMissingBlocks() { +// +// } +// +// @Test +// public void testBuildTreeAtOnce() { +// +// } +// +// @Test +// public void testBuildTreeIncrementally() { +// +// } + + private void assertTreesMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); + assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); + + long prevBlockID = -1; + for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { + ContainerProtos.BlockMerkleTreeProto expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); + + // Blocks should be sorted by block ID. + long currentBlockID = actualBlockTree.getBlockID(); + assertTrue(prevBlockID < currentBlockID); + prevBlockID = currentBlockID; + + assertEquals(expectedBlockTree.getBlockID(), actualBlockTree.getBlockID()); + assertEquals(expectedBlockTree.getBlockChecksum(), actualBlockTree.getBlockChecksum()); + + long prevChunkOffset = -1; + for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { + ContainerProtos.ChunkMerkleTreeProto expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); + + // Chunks should be sorted by offset. + long currentChunkOffset = actualChunkTree.getOffset(); + assertTrue(prevChunkOffset < currentChunkOffset); + prevChunkOffset = currentChunkOffset; + + assertEquals(expectedChunkTree.getOffset(), actualChunkTree.getOffset()); + assertEquals(expectedChunkTree.getLength(), actualChunkTree.getLength()); + assertEquals(expectedChunkTree.getChunkChecksum(), actualChunkTree.getChunkChecksum()); + } + } + } + + private ContainerMerkleTreeProto buildExpectedContainerTree(List blocks) { + return ContainerMerkleTreeProto.newBuilder() + .addAllBlockMerkleTree(blocks) + .setDataChecksum(computeExpectedChecksum( + blocks.stream() + .map(ContainerProtos.BlockMerkleTreeProto::getBlockChecksum) + .collect(Collectors.toList()))) + .build(); + } + + private ContainerProtos.BlockMerkleTreeProto buildExpectedBlockTree(long blockID, + List chunks) { + return ContainerProtos.BlockMerkleTreeProto.newBuilder() + .setBlockID(blockID) + .setBlockChecksum(computeExpectedChecksum( + chunks.stream() + .map(ContainerProtos.ChunkMerkleTreeProto::getChunkChecksum) + .collect(Collectors.toList()))) + .addAllChunkMerkleTree(chunks) + .build(); + } + + private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo chunk) { + return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + .setOffset(chunk.getOffset()) + .setLength(chunk.getLen()) + .setChunkChecksum(computeExpectedChunkChecksum(chunk.getChecksumData().getChecksums())) + .build(); + } + + /** + * Builds a ChunkInfo object using the provided information. No new checksums are calculated, so this can be used + * as either the leaves of pre-computed merkle trees that serve as expected values, or as building blocks to pass + * to ContainerMerkleTree to have it build the whole tree from this information. + * + * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated + * from this based on a fixed length. + * @param chunkChecksums The checksums within the chunk. Each is assumed to apply to a fixed value + * "bytesPerChecksum" amount of data and are assumed to be contiguous. + * @return The ChunkInfo proto object built from this information. + */ + private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + // Arbitrary sizes chosen for testing. + final int bytesPerChecksum = 1024; + final long chunkLength = 1024 * 5; + + // Each chunk checksum is added under the same ChecksumData object. + ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() + .setType(ContainerProtos.ChecksumType.CRC32) + .setBytesPerChecksum(bytesPerChecksum) + .addAllChecksums(Arrays.stream(chunkChecksums) + .map(ByteString::copyFrom) + .collect(Collectors.toList())) + .build(); + + return ChunkInfo.getFromProtoBuf( + ContainerProtos.ChunkInfo.newBuilder() + .setChecksumData(checksumData) + .setChunkName("chunk") + .setOffset(indexInBlock * chunkLength) + .setLen(chunkLength) + .build()); + } + + private long computeExpectedChecksum(List checksums) { + CRC32 crc32 = new CRC32(); + ByteBuffer longBuffer = ByteBuffer.allocate(Long.BYTES * checksums.size()); + checksums.forEach(longBuffer::putLong); + longBuffer.flip(); + crc32.update(longBuffer); + return crc32.getValue(); + } + + private long computeExpectedChunkChecksum(List checksums) { + CRC32 crc32 = new CRC32(); + checksums.forEach(b -> crc32.update(b.asReadOnlyByteBuffer())); + return crc32.getValue(); + } +} diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 7af47a7119de..d4aa92937f6d 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -527,10 +527,17 @@ message SendContainerRequest { message SendContainerResponse { } +// Each chunk contains multiple checksums. This message aggregates them into one checksum for the whole chunk. +message ChunkMerkleTreeProto { + optional int64 offset = 1; + optional int64 length = 2; + optional int64 chunkChecksum = 3; +} + message BlockMerkleTreeProto { optional int64 blockID = 1; optional int64 blockChecksum = 2; - repeated ChunkInfo chunks = 3; + repeated ChunkMerkleTreeProto chunkMerkleTree = 3; } message ContainerMerkleTreeProto { From 0d5800c7256ddc41766eb736b20701d99c0a31e4 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 12:10:00 -0400 Subject: [PATCH 05/24] Use config defaults for test setup --- .../checksum/TestContainerMerkleTree.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 687b959a65f5..1dbae5253840 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -1,5 +1,7 @@ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; @@ -11,6 +13,8 @@ import java.util.stream.Collectors; import java.util.zip.CRC32; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.Test; @@ -20,6 +24,10 @@ import static org.junit.jupiter.api.Assertions.assertTrue; class TestContainerMerkleTree { + private static final long CHUNK_SIZE = (long) new OzoneConfiguration().getStorageSize( + ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); + private static final int BYTES_PER_CHECKSUM = new OzoneClientConfig().getBytesPerChecksum(); + @Test public void testBuildEmptyTree() { ContainerMerkleTree tree = new ContainerMerkleTree(); @@ -59,8 +67,7 @@ public void testBuildOneChunkTree() throws Exception { ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); assertEquals(0, actualChunkTree.getOffset()); - // TODO use existing config for this value. - assertEquals(5 * 1024, actualChunkTree.getLength()); + assertEquals(CHUNK_SIZE, actualChunkTree.getLength()); assertNotEquals(0, actualChunkTree.getChunkChecksum()); } @@ -160,14 +167,10 @@ private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo ch * @return The ChunkInfo proto object built from this information. */ private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { - // Arbitrary sizes chosen for testing. - final int bytesPerChecksum = 1024; - final long chunkLength = 1024 * 5; - // Each chunk checksum is added under the same ChecksumData object. ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() .setType(ContainerProtos.ChecksumType.CRC32) - .setBytesPerChecksum(bytesPerChecksum) + .setBytesPerChecksum(BYTES_PER_CHECKSUM) .addAllChecksums(Arrays.stream(chunkChecksums) .map(ByteString::copyFrom) .collect(Collectors.toList())) @@ -177,8 +180,8 @@ private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) thr ContainerProtos.ChunkInfo.newBuilder() .setChecksumData(checksumData) .setChunkName("chunk") - .setOffset(indexInBlock * chunkLength) - .setLen(chunkLength) + .setOffset(indexInBlock * CHUNK_SIZE) + .setLen(CHUNK_SIZE) .build()); } From eb01da4b2bdcb2f2af504644e390cba06fa4fa1e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 13:06:35 -0400 Subject: [PATCH 06/24] Finish tests for ContainerMerkleTree --- .../checksum/ContainerMerkleTree.java | 6 +- .../checksum/TestContainerMerkleTree.java | 120 ++++++++++++++---- 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 95ce6f76d662..f82ac358d1cf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -25,7 +25,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import java.nio.ByteBuffer; -import java.util.List; +import java.util.Collection; import java.util.SortedMap; import java.util.TreeMap; @@ -61,7 +61,7 @@ public ContainerMerkleTree() { * @param blockID The ID of the block that these chunks belong to. * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, List chunks) { + public void addChunks(long blockID, Collection chunks) { id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } @@ -112,7 +112,7 @@ private static class BlockMerkleTree { * * @param chunks A list of chunks to add to this block. */ - public void addChunks(List chunks) { + public void addChunks(Collection chunks) { for (ChunkInfo chunk: chunks) { offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 1dbae5253840..0106b8d57ae1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -52,9 +52,9 @@ public void testBuildOneChunkTree() throws Exception { ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Collections.singletonList(chunk)); - // Check the difference. + // Ensure the trees match. ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); - assertTreesMatch(expectedTree, actualTreeProto); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); // Do some manual verification of the generated tree as well. assertNotEquals(0, actualTreeProto.getDataChecksum()); @@ -71,27 +71,101 @@ public void testBuildOneChunkTree() throws Exception { assertNotEquals(0, actualChunkTree.getChunkChecksum()); } -// @Test -// public void testBuildTreeWithMissingChunks() { -// -// } -// -// @Test -// public void testBuildTreeWithMissingBlocks() { -// -// } -// -// @Test -// public void testBuildTreeAtOnce() { -// -// } -// -// @Test -// public void testBuildTreeIncrementally() { -// -// } - - private void assertTreesMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + @Test + public void testBuildTreeWithMissingChunks() throws Exception { + // These chunks will be used to seed both the expected and actual trees. + final long blockID = 1; + ChunkInfo chunk1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + // Chunk 2 is missing. + ChunkInfo chunk3 = buildChunk(2, ByteBuffer.wrap(new byte[]{4, 5, 6})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + Arrays.asList(buildExpectedChunkTree(chunk1), buildExpectedChunkTree(chunk3))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID, Arrays.asList(chunk1, chunk3)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + /** + * A container is a set of blocks. Make sure the tree implementation is not dependent on continuity of block IDs. + */ + @Test + public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); + ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + + // Use the ContainerMerkleTree to build the same tree. + // Add blocks and chunks out of order to test sorting. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID3, Arrays.asList(b3c2, b3c1)); + actualTree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + @Test + public void testAppendToBlocksWhileBuilding() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID2 = 2; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c3 = buildChunk(2, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2), buildExpectedChunkTree(b1c3))); + ContainerProtos.BlockMerkleTreeProto blockTree2 = buildExpectedBlockTree(blockID2, + Arrays.asList(buildExpectedChunkTree(b2c1), buildExpectedChunkTree(b2c2))); + ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree( + Arrays.asList(blockTree1, blockTree2, blockTree3)); + + // Use the ContainerMerkleTree to build the same tree. + // Test building by adding chunks to the blocks individually and out of order. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + // Add all of block 2 first. + actualTree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + // Then add block 1 in multiple steps wth chunks out of order. + actualTree.addChunks(blockID1, Collections.singletonList(b1c2)); + actualTree.addChunks(blockID1, Arrays.asList(b1c3, b1c1)); + // Add a duplicate chunk to block 3. It should overwrite the existing one. + actualTree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + actualTree.addChunks(blockID3, Collections.singletonList(b3c2)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); From f35effd5080eff949bdb57b6b4e6578a2951fc93 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 16:52:00 -0400 Subject: [PATCH 07/24] Add tests for checksum maanger and standardize proto names --- .../checksum/ContainerChecksumManager.java | 34 +++- .../checksum/ContainerMerkleTree.java | 22 ++- .../TestContainerChecksumManager.java | 149 ++++++++++++++++++ .../checksum/TestContainerMerkleTree.java | 76 ++++----- .../main/proto/DatanodeClientProtocol.proto | 12 +- 5 files changed, 230 insertions(+), 63 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index fdd8c5b279e4..9dd2ad605c83 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -31,12 +31,17 @@ import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.utils.SimpleStriped; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class coordinates reading and writing Container checksum information for all containers. */ public class ContainerChecksumManager { + private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); + + // Used to coordinate reads and writes to each container's checksum file. private final Striped fileLock; /** @@ -60,6 +65,7 @@ public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkle .setDataMerkleTree(tree.toProto()) .build(); write(data, newChecksumInfo); + LOG.debug("Data merkle tree for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } @@ -78,8 +84,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. - SortedSet sortedDeletedBlockIDs = - new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + SortedSet sortedDeletedBlockIDs = new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); // Since the provided list of block IDs is already sorted, this is a linear time addition. sortedDeletedBlockIDs.addAll(deletedBlockIDs); @@ -88,6 +93,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); write(data, newChecksumInfoBuilder.build()); + LOG.debug("Deleted block list for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } @@ -96,6 +102,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. // Callers can act on this summary to repair their container replica using the peer's replica. + // This method will use the read lock, which is unused in the current implementation. return new ContainerDiff(); } @@ -108,10 +115,23 @@ private Lock getWriteLock(long containerID) { } private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { - Lock readLock = getReadLock(data.getContainerID()); + long containerID = data.getContainerID(); + Lock readLock = getReadLock(containerID); readLock.lock(); - try (FileInputStream inStream = new FileInputStream(getContainerChecksumFile(data))) { - return ContainerChecksumInfo.parseFrom(inStream); + try { + File checksumFile = getContainerChecksumFile(data); + // If the checksum file has not been created yet, return an empty instance. + // Since all writes happen as part of an atomic read-modify-write cycle that requires a write lock, two empty + // instances for the same container obtained only under the read lock will not conflict. + if (!checksumFile.exists()) { + LOG.debug("Creating initial checksum file for container {} at {}", containerID, checksumFile); + return ContainerChecksumInfo.newBuilder() + .setContainerID(containerID) + .build(); + } + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return ContainerChecksumInfo.parseFrom(inStream); + } } finally { readLock.unlock(); } @@ -132,8 +152,8 @@ private File getContainerChecksumFile(KeyValueContainerData data) { } /** - * This class represents the different between our replica of a container, and a peer's replica of a container. - * It summarizes the operations we need to do to reconcile our replica the peer replica it was compared to. + * This class represents the difference between our replica of a container and a peer's replica of a container. + * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. * * TODO HDDS-10928 */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index f82ac358d1cf..274ff7645adb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -17,8 +17,6 @@ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; @@ -38,8 +36,8 @@ * Note that checksums are order dependent. Chunk checksums are sorted by their * offset within a block, and block checksums are sorted by their block ID. * - * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. - * It allows building a container merkle tree from scratch by incrementally adding chunks. + * This class can be used to construct a consistent and completely filled {@link ContainerProtos.ContainerMerkleTree} + * object. It allows building a container merkle tree from scratch by incrementally adding chunks. * The final checksums at higher levels of the tree are not calculated until * {@link ContainerMerkleTree#toProto} is called. */ @@ -71,15 +69,15 @@ public void addChunks(long blockID, Collection chunks) { * * @return A complete protobuf object representation of this tree. */ - public ContainerMerkleTreeProto toProto() { + public ContainerProtos.ContainerMerkleTree toProto() { // Compute checksums and return the result. - ContainerMerkleTreeProto.Builder containerTreeBuilder = ContainerMerkleTreeProto.newBuilder(); + ContainerProtos.ContainerMerkleTree.Builder containerTreeBuilder = ContainerProtos.ContainerMerkleTree.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); for (BlockMerkleTree blockTree: id2Block.values()) { // Add block's checksum tree to the proto. - BlockMerkleTreeProto blockTreeProto = blockTree.toProto(); + ContainerProtos.BlockMerkleTree blockTreeProto = blockTree.toProto(); containerTreeBuilder.addBlockMerkleTree(blockTreeProto); // Add the block's checksum to the buffer to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); @@ -124,8 +122,8 @@ public void addChunks(Collection chunks) { * * @return A complete protobuf object representation of this block tree. */ - public BlockMerkleTreeProto toProto() { - BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); + public ContainerProtos.BlockMerkleTree toProto() { + ContainerProtos.BlockMerkleTree.Builder blockTreeBuilder = ContainerProtos.BlockMerkleTree.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); ByteBuffer blockChecksumBuffer = ByteBuffer.allocate(Long.BYTES * offset2Chunk.size()); @@ -135,7 +133,7 @@ public BlockMerkleTreeProto toProto() { // ChunkInputStream#validateChunk for an example on the client read path). // There is no other value we can use to sort these checksums, so we assume the stored proto has them in the // correct order. - ContainerProtos.ChunkMerkleTreeProto chunkTreeProto = chunkTree.toProto(); + ContainerProtos.ChunkMerkleTree chunkTreeProto = chunkTree.toProto(); blockTreeBuilder.addChunkMerkleTree(chunkTreeProto); blockChecksumBuffer.putLong(chunkTreeProto.getChunkChecksum()); } @@ -167,13 +165,13 @@ private static class ChunkMerkleTree { * * @return A complete protobuf representation of this chunk as a leaf in the container merkle tree. */ - public ContainerProtos.ChunkMerkleTreeProto toProto() { + public ContainerProtos.ChunkMerkleTree toProto() { ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); for (ByteString checksum: chunk.getChecksumData().getChecksums()) { checksumImpl.update(checksum.asReadOnlyByteBuffer()); } - return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + return ContainerProtos.ChunkMerkleTree.newBuilder() .setOffset(chunk.getOffset()) .setLength(chunk.getLen()) .setChunkChecksum(checksumImpl.getValue()) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java new file mode 100644 index 000000000000..53dc9f13e693 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java @@ -0,0 +1,149 @@ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.TreeSet; + +import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.buildChunk; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class TestContainerChecksumManager { + + private KeyValueContainerData container; + private final long containerID = 1L; + @TempDir + private File testDir; + private File checksumFile; + private ContainerChecksumManager checksumManager; + + @BeforeEach + public void init() { + container = mock(KeyValueContainerData.class); + when(container.getContainerID()).thenReturn(containerID); + when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); + checksumFile = new File(testDir, containerID + ".checksum"); + checksumManager = new ContainerChecksumManager(new DatanodeConfiguration()); + } + + @Test + public void testWriteEmptyTreeToFile() throws Exception { + checksumManager.writeContainerMerkleTree(container, new ContainerMerkleTree()); + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testWriteEmptyBlockListToFile() throws Exception { + checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testWriteToFileTreeOnly() throws Exception { + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. + // Therefore, we can use the proto version of our expected tree to check what was written to the file. + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + @Test + public void testWriteToFileDeletedBlocksOnly() throws Exception { + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + @Test + public void testTreePreservedWithDeletedBlocks() throws Exception { + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + private ContainerMerkleTree buildTestTree() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID2 = 2; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{4, 5, 6})); + ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{7, 8, 9})); + ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{12, 11, 10})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{13, 14, 15})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + + ContainerMerkleTree tree = new ContainerMerkleTree(); + tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + + return tree; + } + + private ContainerProtos.ContainerChecksumInfo readFile() throws IOException { + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 0106b8d57ae1..766736eb7ecc 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -3,7 +3,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import java.io.IOException; import java.nio.ByteBuffer; @@ -31,7 +30,7 @@ class TestContainerMerkleTree { @Test public void testBuildEmptyTree() { ContainerMerkleTree tree = new ContainerMerkleTree(); - ContainerMerkleTreeProto treeProto = tree.toProto(); + ContainerProtos.ContainerMerkleTree treeProto = tree.toProto(); assertEquals(0, treeProto.getDataChecksum()); assertEquals(0, treeProto.getBlockMerkleTreeCount()); } @@ -43,29 +42,29 @@ public void testBuildOneChunkTree() throws Exception { ChunkInfo chunk = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. - ContainerProtos.ChunkMerkleTreeProto chunkTree = buildExpectedChunkTree(chunk); - ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + ContainerProtos.ChunkMerkleTree chunkTree = buildExpectedChunkTree(chunk); + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, Collections.singletonList(chunkTree)); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Collections.singletonList(chunk)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); // Do some manual verification of the generated tree as well. assertNotEquals(0, actualTreeProto.getDataChecksum()); assertEquals(1, actualTreeProto.getBlockMerkleTreeCount()); - ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTreeProto.getBlockMerkleTree(0); + ContainerProtos.BlockMerkleTree actualBlockTree = actualTreeProto.getBlockMerkleTree(0); assertEquals(1, actualBlockTree.getBlockID()); assertEquals(1, actualBlockTree.getChunkMerkleTreeCount()); assertNotEquals(0, actualBlockTree.getBlockChecksum()); - ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); + ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(0); assertEquals(0, actualChunkTree.getOffset()); assertEquals(CHUNK_SIZE, actualChunkTree.getLength()); assertNotEquals(0, actualChunkTree.getChunkChecksum()); @@ -80,16 +79,16 @@ public void testBuildTreeWithMissingChunks() throws Exception { ChunkInfo chunk3 = buildChunk(2, ByteBuffer.wrap(new byte[]{4, 5, 6})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, Arrays.asList(buildExpectedChunkTree(chunk1), buildExpectedChunkTree(chunk3))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Arrays.asList(chunk1, chunk3)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } @@ -107,11 +106,11 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); - ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); // Use the ContainerMerkleTree to build the same tree. // Add blocks and chunks out of order to test sorting. @@ -120,7 +119,7 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { actualTree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } @@ -131,21 +130,21 @@ public void testAppendToBlocksWhileBuilding() throws Exception { final long blockID2 = 2; final long blockID3 = 3; ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2})); ChunkInfo b1c3 = buildChunk(2, ByteBuffer.wrap(new byte[]{1, 2, 3})); ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{2, 3, 4})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2), buildExpectedChunkTree(b1c3))); - ContainerProtos.BlockMerkleTreeProto blockTree2 = buildExpectedBlockTree(blockID2, + ContainerProtos.BlockMerkleTree blockTree2 = buildExpectedBlockTree(blockID2, Arrays.asList(buildExpectedChunkTree(b2c1), buildExpectedChunkTree(b2c2))); - ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree( + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree( Arrays.asList(blockTree1, blockTree2, blockTree3)); // Use the ContainerMerkleTree to build the same tree. @@ -161,18 +160,19 @@ public void testAppendToBlocksWhileBuilding() throws Exception { actualTree.addChunks(blockID3, Collections.singletonList(b3c2)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } - private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, + ContainerProtos.ContainerMerkleTree actualTree) { assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); long prevBlockID = -1; for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { - ContainerProtos.BlockMerkleTreeProto expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); - ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTree expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTree actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); // Blocks should be sorted by block ID. long currentBlockID = actualBlockTree.getBlockID(); @@ -184,8 +184,8 @@ private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, Co long prevChunkOffset = -1; for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { - ContainerProtos.ChunkMerkleTreeProto expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); - ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTree expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); // Chunks should be sorted by offset. long currentChunkOffset = actualChunkTree.getOffset(); @@ -199,30 +199,30 @@ private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, Co } } - private ContainerMerkleTreeProto buildExpectedContainerTree(List blocks) { - return ContainerMerkleTreeProto.newBuilder() + private ContainerProtos.ContainerMerkleTree buildExpectedContainerTree(List blocks) { + return ContainerProtos.ContainerMerkleTree.newBuilder() .addAllBlockMerkleTree(blocks) .setDataChecksum(computeExpectedChecksum( blocks.stream() - .map(ContainerProtos.BlockMerkleTreeProto::getBlockChecksum) + .map(ContainerProtos.BlockMerkleTree::getBlockChecksum) .collect(Collectors.toList()))) .build(); } - private ContainerProtos.BlockMerkleTreeProto buildExpectedBlockTree(long blockID, - List chunks) { - return ContainerProtos.BlockMerkleTreeProto.newBuilder() + private ContainerProtos.BlockMerkleTree buildExpectedBlockTree(long blockID, + List chunks) { + return ContainerProtos.BlockMerkleTree.newBuilder() .setBlockID(blockID) .setBlockChecksum(computeExpectedChecksum( chunks.stream() - .map(ContainerProtos.ChunkMerkleTreeProto::getChunkChecksum) + .map(ContainerProtos.ChunkMerkleTree::getChunkChecksum) .collect(Collectors.toList()))) .addAllChunkMerkleTree(chunks) .build(); } - private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo chunk) { - return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + private ContainerProtos.ChunkMerkleTree buildExpectedChunkTree(ChunkInfo chunk) { + return ContainerProtos.ChunkMerkleTree.newBuilder() .setOffset(chunk.getOffset()) .setLength(chunk.getLen()) .setChunkChecksum(computeExpectedChunkChecksum(chunk.getChecksumData().getChecksums())) @@ -240,7 +240,7 @@ private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo ch * "bytesPerChecksum" amount of data and are assumed to be contiguous. * @return The ChunkInfo proto object built from this information. */ - private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + public static ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { // Each chunk checksum is added under the same ChecksumData object. ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() .setType(ContainerProtos.ChecksumType.CRC32) diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index d4aa92937f6d..aa3666fb50c6 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -528,26 +528,26 @@ message SendContainerResponse { } // Each chunk contains multiple checksums. This message aggregates them into one checksum for the whole chunk. -message ChunkMerkleTreeProto { +message ChunkMerkleTree { optional int64 offset = 1; optional int64 length = 2; optional int64 chunkChecksum = 3; } -message BlockMerkleTreeProto { +message BlockMerkleTree { optional int64 blockID = 1; optional int64 blockChecksum = 2; - repeated ChunkMerkleTreeProto chunkMerkleTree = 3; + repeated ChunkMerkleTree chunkMerkleTree = 3; } -message ContainerMerkleTreeProto { +message ContainerMerkleTree { optional int64 dataChecksum = 1; - repeated BlockMerkleTreeProto blockMerkleTree = 2; + repeated BlockMerkleTree blockMerkleTree = 2; } message ContainerChecksumInfo { optional int64 containerID = 1; - optional ContainerMerkleTreeProto dataMerkleTree = 2; + optional ContainerMerkleTree dataMerkleTree = 2; repeated int64 deletedBlocks = 3; } From 21dd87016d461a55471c69de82c6462fb439a1ea Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 17:33:53 -0400 Subject: [PATCH 08/24] Updates after reviewing diff --- .../checksum/ContainerChecksumManager.java | 29 ++++++++++--------- .../checksum/ContainerMerkleTree.java | 3 +- .../TestContainerChecksumManager.java | 7 ++--- .../checksum/TestContainerMerkleTree.java | 3 +- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index 9dd2ad605c83..0f02b04422a2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -42,6 +42,7 @@ public class ContainerChecksumManager { private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); // Used to coordinate reads and writes to each container's checksum file. + // Each container ID is mapped to a stripe. private final Striped fileLock; /** @@ -61,7 +62,7 @@ public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkle Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() .setDataMerkleTree(tree.toProto()) .build(); write(data, newChecksumInfo); @@ -80,26 +81,26 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo.Builder newChecksumInfoBuilder = read(data).toBuilder(); - + ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = read(data).toBuilder(); // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. - SortedSet sortedDeletedBlockIDs = new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + SortedSet sortedDeletedBlockIDs = new TreeSet<>(checksumInfoBuilder.getDeletedBlocksList()); // Since the provided list of block IDs is already sorted, this is a linear time addition. sortedDeletedBlockIDs.addAll(deletedBlockIDs); - newChecksumInfoBuilder + checksumInfoBuilder .clearDeletedBlocks() .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - write(data, newChecksumInfoBuilder.build()); + write(data, checksumInfoBuilder.build()); LOG.debug("Deleted block list for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } } - public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { + public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo otherInfo) + throws IOException { // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. // Callers can act on this summary to repair their container replica using the peer's replica. // This method will use the read lock, which is unused in the current implementation. @@ -114,7 +115,7 @@ private Lock getWriteLock(long containerID) { return fileLock.get(containerID).writeLock(); } - private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { long containerID = data.getContainerID(); Lock readLock = getReadLock(containerID); readLock.lock(); @@ -124,20 +125,22 @@ private ContainerChecksumInfo read(KeyValueContainerData data) throws IOExceptio // Since all writes happen as part of an atomic read-modify-write cycle that requires a write lock, two empty // instances for the same container obtained only under the read lock will not conflict. if (!checksumFile.exists()) { - LOG.debug("Creating initial checksum file for container {} at {}", containerID, checksumFile); - return ContainerChecksumInfo.newBuilder() + LOG.debug("No checksum file currently exists for container {} at the path {}. Returning an empty instance.", + containerID, checksumFile); + return ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(containerID) .build(); } try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerChecksumInfo.parseFrom(inStream); + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); } } finally { readLock.unlock(); } } - private void write(KeyValueContainerData data, ContainerChecksumInfo checksumInfo) throws IOException { + private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) + throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try (FileOutputStream outStream = new FileOutputStream(getContainerChecksumFile(data))) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 274ff7645adb..9eeb50b6498c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -76,10 +76,9 @@ public ContainerProtos.ContainerMerkleTree toProto() { ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); for (BlockMerkleTree blockTree: id2Block.values()) { - // Add block's checksum tree to the proto. ContainerProtos.BlockMerkleTree blockTreeProto = blockTree.toProto(); containerTreeBuilder.addBlockMerkleTree(blockTreeProto); - // Add the block's checksum to the buffer to calculate the container checksum. + // Add the block's checksum to the buffer that will be used to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); } containerChecksumBuffer.flip(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java index 53dc9f13e693..ddad744872b6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java @@ -66,7 +66,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { } @Test - public void testWriteToFileTreeOnly() throws Exception { + public void testWriteOnlyTreeToFile() throws Exception { ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerMerkleTree(container, tree); @@ -80,7 +80,7 @@ public void testWriteToFileTreeOnly() throws Exception { } @Test - public void testWriteToFileDeletedBlocksOnly() throws Exception { + public void testWriteOnlyDeletedBlocksToFile() throws Exception { List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); @@ -108,7 +108,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { } @Test - public void testTreePreservedWithDeletedBlocks() throws Exception { + public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerMerkleTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); @@ -122,7 +122,6 @@ public void testTreePreservedWithDeletedBlocks() throws Exception { } private ContainerMerkleTree buildTestTree() throws Exception { - // Seed the expected and actual trees with the same chunks. final long blockID1 = 1; final long blockID2 = 2; final long blockID3 = 3; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 766736eb7ecc..efcb3843d7d2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -110,7 +110,8 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree( + Arrays.asList(blockTree1, blockTree3)); // Use the ContainerMerkleTree to build the same tree. // Add blocks and chunks out of order to test sorting. From 47cd213474920689122b6582c3ca9153b8905a57 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 24 Jun 2024 13:49:10 -0400 Subject: [PATCH 09/24] Rename checksum manager and file. Fix findbugs and Rat --- ...java => ContainerChecksumTreeManager.java} | 10 ++-- ... => TestContainerChecksumTreeManager.java} | 50 ++++++++++++------- .../checksum/TestContainerMerkleTree.java | 16 ++++++ 3 files changed, 54 insertions(+), 22 deletions(-) rename hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/{ContainerChecksumManager.java => ContainerChecksumTreeManager.java} (96%) rename hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/{TestContainerChecksumManager.java => TestContainerChecksumTreeManager.java} (75%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java similarity index 96% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 0f02b04422a2..71448097393b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -37,9 +37,9 @@ /** * This class coordinates reading and writing Container checksum information for all containers. */ -public class ContainerChecksumManager { +public class ContainerChecksumTreeManager { - private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); + private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumTreeManager.class); // Used to coordinate reads and writes to each container's checksum file. // Each container ID is mapped to a stripe. @@ -48,7 +48,7 @@ public class ContainerChecksumManager { /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ - public ContainerChecksumManager(DatanodeConfiguration dnConf) { + public ContainerChecksumTreeManager(DatanodeConfiguration dnConf) { fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); } @@ -58,7 +58,7 @@ public ContainerChecksumManager(DatanodeConfiguration dnConf) { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { + public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { @@ -151,7 +151,7 @@ private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksum } private File getContainerChecksumFile(KeyValueContainerData data) { - return new File(data.getMetadataPath(), data.getContainerID() + ".checksum"); + return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java similarity index 75% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index ddad744872b6..b964a0922c13 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -23,30 +39,30 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -class TestContainerChecksumManager { +class TestContainerChecksumTreeManager { - private KeyValueContainerData container; - private final long containerID = 1L; + private static final long CONTAINER_ID = 1L; @TempDir private File testDir; + private KeyValueContainerData container; private File checksumFile; - private ContainerChecksumManager checksumManager; + private ContainerChecksumTreeManager checksumManager; @BeforeEach public void init() { container = mock(KeyValueContainerData.class); - when(container.getContainerID()).thenReturn(containerID); + when(container.getContainerID()).thenReturn(CONTAINER_ID); when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); - checksumFile = new File(testDir, containerID + ".checksum"); - checksumManager = new ContainerChecksumManager(new DatanodeConfiguration()); + checksumFile = new File(testDir, CONTAINER_ID + ".tree"); + checksumManager = new ContainerChecksumTreeManager(new DatanodeConfiguration()); } @Test public void testWriteEmptyTreeToFile() throws Exception { - checksumManager.writeContainerMerkleTree(container, new ContainerMerkleTree()); + checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -58,7 +74,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -68,11 +84,11 @@ public void testWriteEmptyBlockListToFile() throws Exception { @Test public void testWriteOnlyTreeToFile() throws Exception { ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. // Therefore, we can use the proto version of our expected tree to check what was written to the file. @@ -86,7 +102,7 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -98,11 +114,11 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); } @@ -110,13 +126,13 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { @Test public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index efcb3843d7d2..a93c4f170236 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.conf.OzoneConfiguration; From 6460fe71b248996c96e54c778ba77fa02a6ddca4 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 25 Jun 2024 13:52:15 -0400 Subject: [PATCH 10/24] Make checksum file read/write for the API based on files, not protos --- .../checksum/ContainerChecksumTreeManager.java | 13 ++++++++----- .../checksum/TestContainerChecksumTreeManager.java | 5 +++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 71448097393b..18cdbfa38d45 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -99,7 +99,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele } } - public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo otherInfo) + public ContainerDiff diff(KeyValueContainerData thisContainer, File otherContainerTree) throws IOException { // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. // Callers can act on this summary to repair their container replica using the peer's replica. @@ -107,6 +107,13 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.C return new ContainerDiff(); } + /** + * Returns the container checksum tree file for the specified container without deserializing it. + */ + public File getContainerChecksumFile(KeyValueContainerData data) { + return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); + } + private Lock getReadLock(long containerID) { return fileLock.get(containerID).readLock(); } @@ -150,10 +157,6 @@ private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksum } } - private File getContainerChecksumFile(KeyValueContainerData data) { - return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); - } - /** * This class represents the difference between our replica of a container and a peer's replica of a container. * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b964a0922c13..c8a2fbb38669 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -137,6 +137,11 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); } + @Test + public void testChecksumTreeFilePath() { + assertEquals(checksumFile.getAbsolutePath(), checksumManager.getContainerChecksumFile(container).getAbsolutePath()); + } + private ContainerMerkleTree buildTestTree() throws Exception { final long blockID1 = 1; final long blockID2 = 2; From ff55f27983ff8f9f29b53b0c16a6b04178ef4a47 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 25 Jun 2024 17:30:06 -0400 Subject: [PATCH 11/24] Initially mark where blocks will be added --- .../ContainerChecksumTreeManager.java | 6 ++++-- .../common/impl/BlockDeletingService.java | 20 +++++++++++++++---- .../background/BlockDeletingTask.java | 10 ++++++++++ .../container/ozoneimpl/OzoneContainer.java | 5 +++++ .../TestContainerChecksumTreeManager.java | 3 ++- 5 files changed, 37 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 18cdbfa38d45..e908300fdbd9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -48,8 +49,9 @@ public class ContainerChecksumTreeManager { /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ - public ContainerChecksumTreeManager(DatanodeConfiguration dnConf) { - fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); + public ContainerChecksumTreeManager(ConfigurationSource conf) { + fileLock = SimpleStriped.readWriteLock( + conf.getObject(DatanodeConfiguration.class).getContainerChecksumLockStripes(), true); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java index 8c090713de8f..c4ffa44dcf22 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java @@ -26,8 +26,10 @@ import org.apache.hadoop.hdds.utils.BackgroundService; import org.apache.hadoop.hdds.utils.BackgroundTask; import org.apache.hadoop.hdds.utils.BackgroundTaskQueue; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDeletionChoosingPolicy; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; @@ -65,24 +67,28 @@ public class BlockDeletingService extends BackgroundService { private final Duration blockDeletingMaxLockHoldingTime; + private final ContainerChecksumTreeManager checksumTreeManager; + @VisibleForTesting public BlockDeletingService( OzoneContainer ozoneContainer, long serviceInterval, long serviceTimeout, TimeUnit timeUnit, int workerSize, ConfigurationSource conf ) { this(ozoneContainer, serviceInterval, serviceTimeout, timeUnit, workerSize, - conf, "", null); + conf, "", new ContainerChecksumTreeManager(conf), null); } @SuppressWarnings("checkstyle:parameternumber") public BlockDeletingService( OzoneContainer ozoneContainer, long serviceInterval, long serviceTimeout, TimeUnit timeUnit, int workerSize, ConfigurationSource conf, - String threadNamePrefix, ReconfigurationHandler reconfigurationHandler + String threadNamePrefix, ContainerChecksumTreeManager checksumTreeManager, + ReconfigurationHandler reconfigurationHandler ) { super("BlockDeletingService", serviceInterval, timeUnit, workerSize, serviceTimeout, threadNamePrefix); this.ozoneContainer = ozoneContainer; + this.checksumTreeManager = checksumTreeManager; try { containerDeletionPolicy = conf.getClass( ScmConfigKeys.OZONE_SCM_KEY_VALUE_CONTAINER_DELETION_CHOOSING_POLICY, @@ -145,6 +151,7 @@ public BackgroundTaskQueue getTasks() { new BlockDeletingTaskBuilder(); builder.setBlockDeletingService(this) .setContainerBlockInfo(containerBlockInfo) + .setChecksumTreeManager(checksumTreeManager) .setPriority(TASK_PRIORITY_DEFAULT); containerBlockInfos = builder.build(); queue.add(containerBlockInfos); @@ -279,6 +286,7 @@ private static class BlockDeletingTaskBuilder { private BlockDeletingService blockDeletingService; private BlockDeletingService.ContainerBlockInfo containerBlockInfo; private int priority; + private ContainerChecksumTreeManager checksumTreeManager; public BlockDeletingTaskBuilder setBlockDeletingService( BlockDeletingService blockDeletingService) { @@ -292,6 +300,11 @@ public BlockDeletingTaskBuilder setContainerBlockInfo( return this; } + public BlockDeletingTaskBuilder setChecksumTreeManager(ContainerChecksumTreeManager treeManager) { + this.checksumTreeManager = treeManager; + return this; + } + public BlockDeletingTaskBuilder setPriority(int priority) { this.priority = priority; return this; @@ -303,8 +316,7 @@ public BackgroundTask build() { if (containerType .equals(ContainerProtos.ContainerType.KeyValueContainer)) { return - new BlockDeletingTask(blockDeletingService, containerBlockInfo, - priority); + new BlockDeletingTask(blockDeletingService, containerBlockInfo, checksumTreeManager, priority); } else { // If another ContainerType is available later, implement it throw new IllegalArgumentException( diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index 60e5a583551e..82f2f8a8d024 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics; import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService; @@ -73,10 +74,12 @@ public class BlockDeletingTask implements BackgroundTask { private final OzoneContainer ozoneContainer; private final ConfigurationSource conf; private Duration blockDeletingMaxLockHoldingTime; + private final ContainerChecksumTreeManager checksumTreeManager; public BlockDeletingTask( BlockDeletingService blockDeletingService, BlockDeletingService.ContainerBlockInfo containerBlockInfo, + ContainerChecksumTreeManager checksumTreeManager, int priority) { this.ozoneContainer = blockDeletingService.getOzoneContainer(); this.metrics = blockDeletingService.getMetrics(); @@ -87,6 +90,7 @@ public BlockDeletingTask( this.containerData = (KeyValueContainerData) containerBlockInfo.getContainerData(); this.blocksToDelete = containerBlockInfo.getNumBlocksToDelete(); + this.checksumTreeManager = checksumTreeManager; } private static class ContainerBackgroundTaskResult @@ -224,6 +228,9 @@ public ContainerBackgroundTaskResult deleteViaSchema1( } } + // TODO add succeededBlocks to deleted here. +// checksumTreeManager.markBlocksAsDeleted(succeedBlocks); + // Once chunks in the blocks are deleted... remove the blockID from // blockDataTable. try (BatchOperation batch = meta.getStore().getBatchHandler() @@ -367,6 +374,9 @@ private ContainerBackgroundTaskResult deleteViaTransactionStore( .map(String::valueOf).collect(Collectors.toList())) ); + // TODO write blocks deleted to the file here since txns are not yet removed from DB. + checksumTreeManager.markBlocksAsDeleted(crr.getDeletedBlocks()); + // Once blocks are deleted... remove the blockID from blockDataTable // and also remove the transactions from txnTable. try (BatchOperation batch = meta.getStore().getBatchHandler() diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index aef3965dcd49..29ac5b9f0b9e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.HddsServerUtil; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -121,6 +122,7 @@ public class OzoneContainer { private final ReplicationServer replicationServer; private DatanodeDetails datanodeDetails; private StateContext context; + private final ContainerChecksumTreeManager checksumTreeManager; private final ContainerMetrics metrics; @@ -223,6 +225,8 @@ public OzoneContainer( Duration blockDeletingSvcInterval = conf.getObject( DatanodeConfiguration.class).getBlockDeletionInterval(); + checksumTreeManager = new ContainerChecksumTreeManager(config); + long blockDeletingServiceTimeout = config .getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_TIMEOUT, OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT, @@ -236,6 +240,7 @@ public OzoneContainer( blockDeletingServiceTimeout, TimeUnit.MILLISECONDS, blockDeletingServiceWorkerSize, config, datanodeDetails.threadNamePrefix(), + checksumTreeManager, context.getParent().getReconfigurationHandler()); Duration recoveringContainerScrubbingSvcInterval = conf.getObject( diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index c8a2fbb38669..939b9d8683e5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -54,7 +55,7 @@ public void init() { when(container.getContainerID()).thenReturn(CONTAINER_ID); when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); checksumFile = new File(testDir, CONTAINER_ID + ".tree"); - checksumManager = new ContainerChecksumTreeManager(new DatanodeConfiguration()); + checksumManager = new ContainerChecksumTreeManager(new OzoneConfiguration()); } @Test From fffeba63ac1761a343e70cdf135950811574e88e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 26 Jun 2024 12:13:44 -0400 Subject: [PATCH 12/24] Block deleting task supports updating the file --- .../ContainerChecksumTreeManager.java | 4 +-- .../background/BlockDeletingTask.java | 34 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index e908300fdbd9..d563619bff64 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -25,6 +25,7 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.util.Collection; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.locks.ReadWriteLock; @@ -79,7 +80,7 @@ public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTr * All other content of the file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet deletedBlockIDs) throws IOException { + public void markBlocksAsDeleted(KeyValueContainerData data, Collection deletedBlockIDs) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { @@ -87,7 +88,6 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. SortedSet sortedDeletedBlockIDs = new TreeSet<>(checksumInfoBuilder.getDeletedBlocksList()); - // Since the provided list of block IDs is already sorted, this is a linear time addition. sortedDeletedBlockIDs.addAll(deletedBlockIDs); checksumInfoBuilder diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index 82f2f8a8d024..ee96abcd50c2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -26,7 +26,6 @@ import java.util.Objects; import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.utils.BackgroundTaskResult; @@ -95,21 +94,21 @@ public BlockDeletingTask( private static class ContainerBackgroundTaskResult implements BackgroundTaskResult { - private List deletedBlockIds; + private final List deletedBlockIds; ContainerBackgroundTaskResult() { deletedBlockIds = new LinkedList<>(); } - public void addBlockId(String blockId) { + public void addBlockId(Long blockId) { deletedBlockIds.add(blockId); } - public void addAll(List blockIds) { + public void addAll(List blockIds) { deletedBlockIds.addAll(blockIds); } - public List getDeletedBlocks() { + public List getDeletedBlocks() { return deletedBlockIds; } @@ -199,7 +198,7 @@ public ContainerBackgroundTaskResult deleteViaSchema1( return crr; } - List succeedBlocks = new LinkedList<>(); + List succeedBlocks = new LinkedList<>(); LOG.debug("Container : {}, To-Delete blocks : {}", containerData.getContainerID(), toDeleteBlocks.size()); @@ -220,7 +219,7 @@ public ContainerBackgroundTaskResult deleteViaSchema1( handler.deleteBlock(container, entry.getValue()); releasedBytes += KeyValueContainerUtil.getBlockLength( entry.getValue()); - succeedBlocks.add(blockName); + succeedBlocks.add(Long.parseLong(blockName)); } catch (InvalidProtocolBufferException e) { LOG.error("Failed to parse block info for block {}", blockName, e); } catch (IOException e) { @@ -228,15 +227,17 @@ public ContainerBackgroundTaskResult deleteViaSchema1( } } - // TODO add succeededBlocks to deleted here. -// checksumTreeManager.markBlocksAsDeleted(succeedBlocks); + // Mark blocks as deleted in the container checksum tree. + // Data for these blocks does not need to be copied if container replicas diverge. + // Do this before the delete transactions are removed from the database. + checksumTreeManager.markBlocksAsDeleted(containerData, succeedBlocks); // Once chunks in the blocks are deleted... remove the blockID from // blockDataTable. try (BatchOperation batch = meta.getStore().getBatchHandler() .initBatchOperation()) { - for (String entry : succeedBlocks) { - blockDataTable.deleteWithBatch(batch, entry); + for (Long entry : succeedBlocks) { + blockDataTable.deleteWithBatch(batch, Long.toString(entry)); } // Handler.deleteBlock calls deleteChunk to delete all the chunks @@ -370,12 +371,13 @@ private ContainerBackgroundTaskResult deleteViaTransactionStore( List deletedBlocksTxs = deleteBlocksResult.deletedBlocksTxs(); deleteBlocksResult.deletedBlocksTxs().forEach( - tx -> crr.addAll(tx.getLocalIDList().stream() - .map(String::valueOf).collect(Collectors.toList())) - ); + tx -> crr.addAll(tx.getLocalIDList())); - // TODO write blocks deleted to the file here since txns are not yet removed from DB. - checksumTreeManager.markBlocksAsDeleted(crr.getDeletedBlocks()); + // TODO if block file was not found, will it still get added to CRR? + // Mark blocks as deleted in the container checksum tree. + // Data for these blocks does not need to be copied if container replicas diverge. + // Do this before the delete transactions are removed from the database. + checksumTreeManager.markBlocksAsDeleted(containerData, crr.getDeletedBlocks()); // Once blocks are deleted... remove the blockID from blockDataTable // and also remove the transactions from txnTable. From 48229411e9b5cc5cce3e283658491402cd232e95 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 26 Jun 2024 15:49:57 -0400 Subject: [PATCH 13/24] Basic block delete test passes --- .../ContainerChecksumTreeManager.java | 6 ++- .../background/BlockDeletingTask.java | 20 +++++----- .../common/TestBlockDeletingService.java | 40 ++++++++++++++++++- 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index d563619bff64..7883b2ec4659 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -18,6 +18,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -112,8 +113,9 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, File otherContain /** * Returns the container checksum tree file for the specified container without deserializing it. */ - public File getContainerChecksumFile(KeyValueContainerData data) { - return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); + public static File getContainerChecksumFile(ContainerData data) { + // TODO change type don't cast + return new File(((KeyValueContainerData) data).getMetadataPath(), data.getContainerID() + ".tree"); } private Lock getReadLock(long containerID) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index ee96abcd50c2..f68612f360c0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -198,7 +198,8 @@ public ContainerBackgroundTaskResult deleteViaSchema1( return crr; } - List succeedBlocks = new LinkedList<>(); + List succeedBlockIDs = new LinkedList<>(); + List succeedBlockDBKeys = new LinkedList<>(); LOG.debug("Container : {}, To-Delete blocks : {}", containerData.getContainerID(), toDeleteBlocks.size()); @@ -219,7 +220,8 @@ public ContainerBackgroundTaskResult deleteViaSchema1( handler.deleteBlock(container, entry.getValue()); releasedBytes += KeyValueContainerUtil.getBlockLength( entry.getValue()); - succeedBlocks.add(Long.parseLong(blockName)); + succeedBlockIDs.add(entry.getValue().getLocalID()); + succeedBlockDBKeys.add(entry.getKey()); } catch (InvalidProtocolBufferException e) { LOG.error("Failed to parse block info for block {}", blockName, e); } catch (IOException e) { @@ -230,14 +232,14 @@ public ContainerBackgroundTaskResult deleteViaSchema1( // Mark blocks as deleted in the container checksum tree. // Data for these blocks does not need to be copied if container replicas diverge. // Do this before the delete transactions are removed from the database. - checksumTreeManager.markBlocksAsDeleted(containerData, succeedBlocks); + checksumTreeManager.markBlocksAsDeleted(containerData, succeedBlockIDs); // Once chunks in the blocks are deleted... remove the blockID from // blockDataTable. try (BatchOperation batch = meta.getStore().getBatchHandler() .initBatchOperation()) { - for (Long entry : succeedBlocks) { - blockDataTable.deleteWithBatch(batch, Long.toString(entry)); + for (String key: succeedBlockDBKeys) { + blockDataTable.deleteWithBatch(batch, key); } // Handler.deleteBlock calls deleteChunk to delete all the chunks @@ -245,7 +247,7 @@ public ContainerBackgroundTaskResult deleteViaSchema1( // updated with decremented used bytes during deleteChunk. This is // done here so that all the DB update for block delete can be // batched together while committing to DB. - int deletedBlocksCount = succeedBlocks.size(); + int deletedBlocksCount = succeedBlockDBKeys.size(); containerData.updateAndCommitDBCounters(meta, batch, deletedBlocksCount, releasedBytes); // Once DB update is persisted, check if there are any blocks @@ -265,13 +267,13 @@ public ContainerBackgroundTaskResult deleteViaSchema1( metrics.incrSuccessBytes(releasedBytes); } - if (!succeedBlocks.isEmpty()) { + if (!succeedBlockDBKeys.isEmpty()) { LOG.debug("Container: {}, deleted blocks: {}, space reclaimed: {}, " + "task elapsed time: {}ms", containerData.getContainerID(), - succeedBlocks.size(), releasedBytes, + succeedBlockDBKeys.size(), releasedBytes, Time.monotonicNow() - startTime); } - crr.addAll(succeedBlocks); + crr.addAll(succeedBlockIDs); return crr; } catch (IOException exception) { LOG.warn("Deletion operation was not successful for container: " + diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index bc56141fb080..34942789383e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -37,6 +37,7 @@ import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; @@ -79,11 +80,13 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.time.Duration; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -92,6 +95,7 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric; @@ -110,7 +114,11 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNotSame; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.Mockito.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -591,6 +599,9 @@ public void testBlockDeletion(ContainerTestVersionInfo versionInfo) // An interval will delete 1 * 2 blocks deleteAndWait(svc, 1); + // Make sure that deletions for each container were recorded in the checksum tree file. + containerData.forEach(c -> assertDeletionsInChecksumFile(c, 2)); + GenericTestUtils.waitFor(() -> containerData.get(0).getBytesUsed() == containerSpace / 3, 100, 3000); @@ -615,6 +626,8 @@ public void testBlockDeletion(ContainerTestVersionInfo versionInfo) deleteAndWait(svc, 2); + containerData.forEach(c -> assertDeletionsInChecksumFile(c, 3)); + // After deletion of all 3 blocks, space used by the containers // should be zero. GenericTestUtils.waitFor(() -> @@ -839,7 +852,7 @@ public void testBlockDeletionTimeout(ContainerTestVersionInfo versionInfo) timeout = 0; svc = new BlockDeletingService(ozoneContainer, TimeUnit.MILLISECONDS.toNanos(1000), timeout, TimeUnit.MILLISECONDS, - 10, conf, "", mock(ReconfigurationHandler.class)); + 10, conf, "", mock(ContainerChecksumTreeManager.class), mock(ReconfigurationHandler.class)); svc.start(); // get container meta data @@ -1154,4 +1167,29 @@ private void setLayoutAndSchemaForTest(ContainerTestVersionInfo versionInfo) { this.schemaVersion = versionInfo.getSchemaVersion(); ContainerTestVersionInfo.setTestSchemaVersion(schemaVersion, conf); } + + private void assertDeletionsInChecksumFile(ContainerData data, int numBlocks) { + File checksumFile = ContainerChecksumTreeManager.getContainerChecksumFile(data); + ContainerProtos.ContainerChecksumInfo checksumInfo = null; + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + checksumInfo = ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + } catch (IOException ex) { + fail("Failed to read container checksum tree file", ex); + } + + assertNotNull(checksumInfo); + + List deletedBlocks = checksumInfo.getDeletedBlocksList(); + + assertEquals(numBlocks, deletedBlocks.size()); + // Create a sorted copy of the list to check the order written to the file. + List sortedDeletedBlocks = checksumInfo.getDeletedBlocksList().stream() + .sorted() + .collect(Collectors.toList()); + assertNotSame(sortedDeletedBlocks, deletedBlocks); + assertEquals(sortedDeletedBlocks, deletedBlocks); + + // Block list should be unique. + assertEquals(new HashSet<>(deletedBlocks).size(), deletedBlocks.size()); + } } From 6087fa3d73b012f803d622d66b56c78562c8c761 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 27 Jun 2024 12:36:42 -0400 Subject: [PATCH 14/24] Add test for block delete commands retried --- .../common/TestBlockDeletingService.java | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 34942789383e..96c36d6a6911 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -19,6 +19,7 @@ import com.google.common.collect.Lists; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.BlockID; @@ -84,6 +85,10 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.time.Duration; import java.util.ArrayList; import java.util.Collections; @@ -109,6 +114,7 @@ import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; import static org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion.FILE_PER_BLOCK; +import static org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion.FILE_PER_CHUNK; import static org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask.LOG; import static org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil.isSameSchemaVersion; import static org.assertj.core.api.Assertions.assertThat; @@ -1101,6 +1107,47 @@ public void testBlockThrottle(ContainerTestVersionInfo versionInfo) } } + /** + * The container checksum file is updated with blocks that have been deleted after the file is removed from disk, + * but before the transaction is removed from the DB. If there is a failure partway through, the checksum file + * should still get updated when the transaction is retried, even if the block file is not present. + */ + @ContainerTestVersionInfo.ContainerTest + public void testChecksumFileUpdatedWhenDeleteRetried(ContainerTestVersionInfo versionInfo) throws Exception { + final int numBlocks = 4; + setLayoutAndSchemaForTest(versionInfo); + DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class); + dnConf.setBlockDeletionLimit(4); + this.blockLimitPerInterval = dnConf.getBlockDeletionLimit(); + conf.setFromObject(dnConf); + ContainerSet containerSet = new ContainerSet(1000); + KeyValueContainerData contData = createToDeleteBlocks(containerSet, numBlocks, 4); + KeyValueHandler keyValueHandler = + new KeyValueHandler(conf, datanodeUuid, containerSet, volumeSet, + ContainerMetrics.create(conf), c -> {}); + BlockDeletingServiceTestImpl svc = + getBlockDeletingService(containerSet, conf, keyValueHandler); + svc.start(); + GenericTestUtils.waitFor(svc::isStarted, 100, 3000); + + // Remove all the block files from the disk, as if they were deleted previously but the system failed before + // doing any metadata updates or removing the transaction of to-delete block IDs from the DB. + File blockDataDir = new File(contData.getChunksPath()); + try (DirectoryStream stream = Files.newDirectoryStream(blockDataDir.toPath())) { + for (Path entry : stream) { + assertTrue(entry.toFile().delete()); + } + } + + String[] blockFilesRemaining = blockDataDir.list(); + assertNotNull(blockFilesRemaining); + assertEquals(0, blockFilesRemaining.length); + + deleteAndWait(svc, 1); + + assertDeletionsInChecksumFile(contData, numBlocks); + } + /** * Check blockData record count of certain container (DBHandle not provided). * @@ -1176,11 +1223,9 @@ private void assertDeletionsInChecksumFile(ContainerData data, int numBlocks) { } catch (IOException ex) { fail("Failed to read container checksum tree file", ex); } - assertNotNull(checksumInfo); List deletedBlocks = checksumInfo.getDeletedBlocksList(); - assertEquals(numBlocks, deletedBlocks.size()); // Create a sorted copy of the list to check the order written to the file. List sortedDeletedBlocks = checksumInfo.getDeletedBlocksList().stream() @@ -1189,7 +1234,7 @@ private void assertDeletionsInChecksumFile(ContainerData data, int numBlocks) { assertNotSame(sortedDeletedBlocks, deletedBlocks); assertEquals(sortedDeletedBlocks, deletedBlocks); - // Block list should be unique. + // Each block in the list should be unique. assertEquals(new HashSet<>(deletedBlocks).size(), deletedBlocks.size()); } } From 9714259b1b0ea75daeb1380deb066f6578dfc6da Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 27 Jun 2024 12:57:58 -0400 Subject: [PATCH 15/24] Rename data tree to container tree --- .../checksum/ContainerChecksumTreeManager.java | 2 +- .../checksum/TestContainerChecksumTreeManager.java | 12 ++++++------ .../src/main/proto/DatanodeClientProtocol.proto | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 18cdbfa38d45..939c6d08b336 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -63,7 +63,7 @@ public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTr writeLock.lock(); try { ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() - .setDataMerkleTree(tree.toProto()) + .setContainerMerkleTree(tree.toProto()) .build(); write(data, newChecksumInfo); LOG.debug("Data merkle tree for container {} updated", data.getContainerID()); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index c8a2fbb38669..767eed8a73d3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -64,7 +64,7 @@ public void testWriteEmptyTreeToFile() throws Exception { assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); - ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); } @@ -76,7 +76,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); - ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); } @@ -92,7 +92,7 @@ public void testWriteOnlyTreeToFile() throws Exception { assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. // Therefore, we can use the proto version of our expected tree to check what was written to the file. - assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); } @Test @@ -104,7 +104,7 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); - ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); } @@ -120,7 +120,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); - assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); } @Test @@ -134,7 +134,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); - assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); } @Test diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index aa3666fb50c6..833159c84ec1 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -547,7 +547,7 @@ message ContainerMerkleTree { message ContainerChecksumInfo { optional int64 containerID = 1; - optional ContainerMerkleTree dataMerkleTree = 2; + optional ContainerMerkleTree containerMerkleTree = 2; repeated int64 deletedBlocks = 3; } From 3161ad87cad1022f4ae9b099e44270d686b4ba52 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 27 Jun 2024 13:47:50 -0400 Subject: [PATCH 16/24] Use generic container type in checksum manager --- .../container/checksum/ContainerChecksumTreeManager.java | 9 ++++----- .../ozone/container/common/impl/ContainerData.java | 6 ++++++ .../ozone/container/keyvalue/KeyValueContainerData.java | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index e739cb4ed4a9..7bd2306a83db 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -62,7 +62,7 @@ public ContainerChecksumTreeManager(ConfigurationSource conf) { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { + public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { @@ -114,8 +114,7 @@ public ContainerDiff diff(KeyValueContainerData thisContainer, File otherContain * Returns the container checksum tree file for the specified container without deserializing it. */ public static File getContainerChecksumFile(ContainerData data) { - // TODO change type don't cast - return new File(((KeyValueContainerData) data).getMetadataPath(), data.getContainerID() + ".tree"); + return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); } private Lock getReadLock(long containerID) { @@ -126,7 +125,7 @@ private Lock getWriteLock(long containerID) { return fileLock.get(containerID).writeLock(); } - private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + private ContainerProtos.ContainerChecksumInfo read(ContainerData data) throws IOException { long containerID = data.getContainerID(); Lock readLock = getReadLock(containerID); readLock.lock(); @@ -150,7 +149,7 @@ private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) t } } - private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) + private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java index 4e3f2a7d53be..cc1cbd42e498 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java @@ -187,6 +187,12 @@ public long getContainerID() { */ public abstract String getContainerPath(); + /** + * Returns container metadata path. + * @return - Physical path where container file and checksum is stored. + */ + public abstract String getMetadataPath(); + /** * Returns the type of the container. * @return ContainerType diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java index 7fce70f8e18e..2ee9fffd41fe 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerData.java @@ -181,6 +181,7 @@ public File getDbFile() { * Returns container metadata path. * @return - Physical path where container file and checksum is stored. */ + @Override public String getMetadataPath() { return metadataPath; } From 041c5c230cee8c7148f596db660720206b419b0c Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 27 Jun 2024 13:56:55 -0400 Subject: [PATCH 17/24] Checkstyle --- .../container/common/impl/BlockDeletingService.java | 1 - .../checksum/TestContainerChecksumTreeManager.java | 1 - .../container/common/TestBlockDeletingService.java | 10 ++-------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java index c4ffa44dcf22..5392af1deb2b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/BlockDeletingService.java @@ -29,7 +29,6 @@ import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.BlockDeletingServiceMetrics; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDeletionChoosingPolicy; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 1910b28cdb97..d0c5b10bbb19 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -19,7 +19,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 96c36d6a6911..b17302496cb6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -19,7 +19,6 @@ import com.google.common.collect.Lists; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.client.BlockID; @@ -88,10 +87,8 @@ import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.time.Duration; import java.util.ArrayList; -import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -114,13 +111,11 @@ import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; import static org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion.FILE_PER_BLOCK; -import static org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion.FILE_PER_CHUNK; import static org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask.LOG; import static org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil.isSameSchemaVersion; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotSame; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -1123,8 +1118,7 @@ public void testChecksumFileUpdatedWhenDeleteRetried(ContainerTestVersionInfo ve ContainerSet containerSet = new ContainerSet(1000); KeyValueContainerData contData = createToDeleteBlocks(containerSet, numBlocks, 4); KeyValueHandler keyValueHandler = - new KeyValueHandler(conf, datanodeUuid, containerSet, volumeSet, - ContainerMetrics.create(conf), c -> {}); + new KeyValueHandler(conf, datanodeUuid, containerSet, volumeSet, ContainerMetrics.create(conf), c -> { }); BlockDeletingServiceTestImpl svc = getBlockDeletingService(containerSet, conf, keyValueHandler); svc.start(); @@ -1219,7 +1213,7 @@ private void assertDeletionsInChecksumFile(ContainerData data, int numBlocks) { File checksumFile = ContainerChecksumTreeManager.getContainerChecksumFile(data); ContainerProtos.ContainerChecksumInfo checksumInfo = null; try (FileInputStream inStream = new FileInputStream(checksumFile)) { - checksumInfo = ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + checksumInfo = ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); } catch (IOException ex) { fail("Failed to read container checksum tree file", ex); } From 245844c0c5b8248cdfba0a0e75f2f215d961f746 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 26 Jul 2024 15:07:22 -0400 Subject: [PATCH 18/24] Use block name in task --- .../statemachine/background/BlockDeletingTask.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java index f68612f360c0..38c2bfad2de7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingTask.java @@ -221,7 +221,7 @@ public ContainerBackgroundTaskResult deleteViaSchema1( releasedBytes += KeyValueContainerUtil.getBlockLength( entry.getValue()); succeedBlockIDs.add(entry.getValue().getLocalID()); - succeedBlockDBKeys.add(entry.getKey()); + succeedBlockDBKeys.add(blockName); } catch (InvalidProtocolBufferException e) { LOG.error("Failed to parse block info for block {}", blockName, e); } catch (IOException e) { @@ -230,7 +230,7 @@ public ContainerBackgroundTaskResult deleteViaSchema1( } // Mark blocks as deleted in the container checksum tree. - // Data for these blocks does not need to be copied if container replicas diverge. + // Data for these blocks does not need to be copied during container reconciliation if container replicas diverge. // Do this before the delete transactions are removed from the database. checksumTreeManager.markBlocksAsDeleted(containerData, succeedBlockIDs); @@ -375,9 +375,8 @@ private ContainerBackgroundTaskResult deleteViaTransactionStore( deleteBlocksResult.deletedBlocksTxs().forEach( tx -> crr.addAll(tx.getLocalIDList())); - // TODO if block file was not found, will it still get added to CRR? // Mark blocks as deleted in the container checksum tree. - // Data for these blocks does not need to be copied if container replicas diverge. + // Data for these blocks does not need to be copied if container replicas diverge during container reconciliation. // Do this before the delete transactions are removed from the database. checksumTreeManager.markBlocksAsDeleted(containerData, crr.getDeletedBlocks()); From 51e70208dcfbdc4d10714b7402539604ee3f08d6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 30 Jul 2024 12:31:51 -0400 Subject: [PATCH 19/24] Update test description --- .../ozone/container/common/TestBlockDeletingService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index b17302496cb6..646b89ad2c1b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -1103,8 +1103,8 @@ public void testBlockThrottle(ContainerTestVersionInfo versionInfo) } /** - * The container checksum file is updated with blocks that have been deleted after the file is removed from disk, - * but before the transaction is removed from the DB. If there is a failure partway through, the checksum file + * The container checksum tree file is updated with the blocks that have been deleted after the on disk block files are removed from disk, + * but before the transaction is removed from the DB. If there is a failure partway through, the checksum tree file * should still get updated when the transaction is retried, even if the block file is not present. */ @ContainerTestVersionInfo.ContainerTest From 1748205dbb48fbe35417ff1ca6cb5dc2ef9fec2f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 30 Jul 2024 15:32:29 -0400 Subject: [PATCH 20/24] Move test helper methods to util class --- .../ContainerChecksumTreeManager.java | 4 - .../ContainerMerkleTreeTestUtils.java | 100 +++++++++++++++ .../TestContainerChecksumTreeManager.java | 43 +++---- .../checksum/TestContainerMerkleTree.java | 114 +++++------------- .../common/TestBlockDeletingService.java | 8 +- 5 files changed, 151 insertions(+), 118 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 0befb77d0d42..f05d69cdcebf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -175,10 +175,6 @@ private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo che } } - public File getContainerChecksumFile(KeyValueContainerData data) { - return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); - } - @VisibleForTesting public ContainerMerkleTreeMetrics getMetrics() { return this.metrics; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java new file mode 100644 index 000000000000..0fea8a39963a --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -0,0 +1,100 @@ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ContainerMerkleTreeTestUtils { + public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, + ContainerProtos.ContainerMerkleTree actualTree) { + assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); + assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); + + long prevBlockID = -1; + for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { + ContainerProtos.BlockMerkleTree expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTree actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); + + // Blocks should be sorted by block ID. + long currentBlockID = actualBlockTree.getBlockID(); + assertTrue(prevBlockID < currentBlockID); + prevBlockID = currentBlockID; + + assertEquals(expectedBlockTree.getBlockID(), actualBlockTree.getBlockID()); + assertEquals(expectedBlockTree.getBlockChecksum(), actualBlockTree.getBlockChecksum()); + + long prevChunkOffset = -1; + for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { + ContainerProtos.ChunkMerkleTree expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); + + // Chunks should be sorted by offset. + long currentChunkOffset = actualChunkTree.getOffset(); + assertTrue(prevChunkOffset < currentChunkOffset); + prevChunkOffset = currentChunkOffset; + + assertEquals(expectedChunkTree.getOffset(), actualChunkTree.getOffset()); + assertEquals(expectedChunkTree.getLength(), actualChunkTree.getLength()); + assertEquals(expectedChunkTree.getChunkChecksum(), actualChunkTree.getChunkChecksum()); + } + } + } + + /** + * Builds a ChunkInfo object using the provided information. No new checksums are calculated, so this can be used + * as either the leaves of pre-computed merkle trees that serve as expected values, or as building blocks to pass + * to ContainerMerkleTree to have it build the whole tree from this information. + * + * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated + * from this based on a fixed length. + * @param chunkChecksums The checksums within the chunk. Each is assumed to apply to a fixed value + * "bytesPerChecksum" amount of data and are assumed to be contiguous. + * @return The ChunkInfo proto object built from this information. + */ + public static ChunkInfo buildChunk(ConfigurationSource config, int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + final long chunkSize = (long) config.getStorageSize( + ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); + final int bytesPerChecksum = config.getObject(OzoneClientConfig.class).getBytesPerChecksum(); + + // Each chunk checksum is added under the same ChecksumData object. + ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() + .setType(ContainerProtos.ChecksumType.CRC32) + .setBytesPerChecksum(bytesPerChecksum) + .addAllChecksums(Arrays.stream(chunkChecksums) + .map(ByteString::copyFrom) + .collect(Collectors.toList())) + .build(); + + return ChunkInfo.getFromProtoBuf( + ContainerProtos.ChunkInfo.newBuilder() + .setChecksumData(checksumData) + .setChunkName("chunk") + .setOffset(indexInBlock * chunkSize) + .setLen(chunkSize) + .build()); + } + + /** + * This reads the checksum file for a container from the disk without synchronization/coordination between readers + * and writers within a datanode. + */ + public static ContainerProtos.ContainerChecksumInfo readChecksumFile(ContainerData data) throws IOException { + try (FileInputStream inStream = new FileInputStream(ContainerChecksumTreeManager.getContainerChecksumFile(data))) { + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index e06fbf78ed99..35c741ed380b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -16,25 +16,24 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; import java.util.TreeSet; -import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.assertTreesSortedAndMatch; -import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.buildChunk; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildChunk; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; @@ -49,6 +48,7 @@ class TestContainerChecksumTreeManager { private File checksumFile; private ContainerChecksumTreeManager checksumManager; private ContainerMerkleTreeMetrics metrics; + private ConfigurationSource config; @BeforeEach public void init() { @@ -58,6 +58,7 @@ public void init() { checksumFile = new File(testDir, CONTAINER_ID + ".tree"); checksumManager = new ContainerChecksumTreeManager(new OzoneConfiguration()); metrics = checksumManager.getMetrics(); + config = new OzoneConfiguration(); } @Test @@ -68,7 +69,7 @@ public void testWriteEmptyTreeToFile() throws Exception { assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); @@ -83,7 +84,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); @@ -99,7 +100,7 @@ public void testWriteOnlyTreeToFile() throws Exception { ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); @@ -116,7 +117,7 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().changed()); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); @@ -138,7 +139,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); @@ -159,7 +160,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); @@ -182,19 +183,19 @@ public void testReadContainerMerkleTreeMetric() throws Exception { @Test public void testChecksumTreeFilePath() { assertEquals(checksumFile.getAbsolutePath(), - checksumManager.getContainerChecksumFile(container).getAbsolutePath()); + ContainerChecksumTreeManager.getContainerChecksumFile(container).getAbsolutePath()); } private ContainerMerkleTree buildTestTree() throws Exception { final long blockID1 = 1; final long blockID2 = 2; final long blockID3 = 3; - ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{4, 5, 6})); - ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{7, 8, 9})); - ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{12, 11, 10})); - ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{13, 14, 15})); - ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + ChunkInfo b1c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{4, 5, 6})); + ChunkInfo b2c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{7, 8, 9})); + ChunkInfo b2c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{12, 11, 10})); + ChunkInfo b3c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{13, 14, 15})); + ChunkInfo b3c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{16, 17, 18})); ContainerMerkleTree tree = new ContainerMerkleTree(); tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); @@ -203,10 +204,4 @@ private ContainerMerkleTree buildTestTree() throws Exception { return tree; } - - private ContainerProtos.ContainerChecksumInfo readFile() throws IOException { - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); - } - } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index a93c4f170236..42e4413c4b8a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -16,11 +16,11 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.Collections; @@ -28,20 +28,27 @@ import java.util.stream.Collectors; import java.util.zip.CRC32; -import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildChunk; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; class TestContainerMerkleTree { - private static final long CHUNK_SIZE = (long) new OzoneConfiguration().getStorageSize( - ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); - private static final int BYTES_PER_CHECKSUM = new OzoneClientConfig().getBytesPerChecksum(); + private ConfigurationSource config; + private long chunkSize; + + @BeforeEach + public void init() { + config = new OzoneConfiguration(); + chunkSize = (long) config.getStorageSize( + ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); + } @Test public void testBuildEmptyTree() { @@ -55,7 +62,7 @@ public void testBuildEmptyTree() { public void testBuildOneChunkTree() throws Exception { // Seed the expected and actual trees with the same chunk. final long blockID = 1; - ChunkInfo chunk = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo chunk = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. ContainerProtos.ChunkMerkleTree chunkTree = buildExpectedChunkTree(chunk); @@ -82,7 +89,7 @@ public void testBuildOneChunkTree() throws Exception { ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(0); assertEquals(0, actualChunkTree.getOffset()); - assertEquals(CHUNK_SIZE, actualChunkTree.getLength()); + assertEquals(chunkSize, actualChunkTree.getLength()); assertNotEquals(0, actualChunkTree.getChunkChecksum()); } @@ -90,9 +97,9 @@ public void testBuildOneChunkTree() throws Exception { public void testBuildTreeWithMissingChunks() throws Exception { // These chunks will be used to seed both the expected and actual trees. final long blockID = 1; - ChunkInfo chunk1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo chunk1 = buildChunk(config,0, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Chunk 2 is missing. - ChunkInfo chunk3 = buildChunk(2, ByteBuffer.wrap(new byte[]{4, 5, 6})); + ChunkInfo chunk3 = buildChunk(config, 2, ByteBuffer.wrap(new byte[]{4, 5, 6})); // Build the expected tree proto using the test code. ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, @@ -116,10 +123,10 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { // Seed the expected and actual trees with the same chunks. final long blockID1 = 1; final long blockID3 = 3; - ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, @@ -146,13 +153,13 @@ public void testAppendToBlocksWhileBuilding() throws Exception { final long blockID1 = 1; final long blockID2 = 2; final long blockID3 = 3; - ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2})); - ChunkInfo b1c3 = buildChunk(2, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1})); - ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{2, 3, 4})); + ChunkInfo b1c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{1, 2})); + ChunkInfo b1c3 = buildChunk(config, 2, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1})); + ChunkInfo b3c2 = buildChunk(config, 1, ByteBuffer.wrap(new byte[]{2, 3, 4})); // Build the expected tree proto using the test code. ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, @@ -181,41 +188,6 @@ public void testAppendToBlocksWhileBuilding() throws Exception { assertTreesSortedAndMatch(expectedTree, actualTreeProto); } - public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, - ContainerProtos.ContainerMerkleTree actualTree) { - assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); - assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); - - long prevBlockID = -1; - for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { - ContainerProtos.BlockMerkleTree expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); - ContainerProtos.BlockMerkleTree actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); - - // Blocks should be sorted by block ID. - long currentBlockID = actualBlockTree.getBlockID(); - assertTrue(prevBlockID < currentBlockID); - prevBlockID = currentBlockID; - - assertEquals(expectedBlockTree.getBlockID(), actualBlockTree.getBlockID()); - assertEquals(expectedBlockTree.getBlockChecksum(), actualBlockTree.getBlockChecksum()); - - long prevChunkOffset = -1; - for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { - ContainerProtos.ChunkMerkleTree expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); - ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); - - // Chunks should be sorted by offset. - long currentChunkOffset = actualChunkTree.getOffset(); - assertTrue(prevChunkOffset < currentChunkOffset); - prevChunkOffset = currentChunkOffset; - - assertEquals(expectedChunkTree.getOffset(), actualChunkTree.getOffset()); - assertEquals(expectedChunkTree.getLength(), actualChunkTree.getLength()); - assertEquals(expectedChunkTree.getChunkChecksum(), actualChunkTree.getChunkChecksum()); - } - } - } - private ContainerProtos.ContainerMerkleTree buildExpectedContainerTree(List blocks) { return ContainerProtos.ContainerMerkleTree.newBuilder() .addAllBlockMerkleTree(blocks) @@ -246,36 +218,6 @@ private ContainerProtos.ChunkMerkleTree buildExpectedChunkTree(ChunkInfo chunk) .build(); } - /** - * Builds a ChunkInfo object using the provided information. No new checksums are calculated, so this can be used - * as either the leaves of pre-computed merkle trees that serve as expected values, or as building blocks to pass - * to ContainerMerkleTree to have it build the whole tree from this information. - * - * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated - * from this based on a fixed length. - * @param chunkChecksums The checksums within the chunk. Each is assumed to apply to a fixed value - * "bytesPerChecksum" amount of data and are assumed to be contiguous. - * @return The ChunkInfo proto object built from this information. - */ - public static ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { - // Each chunk checksum is added under the same ChecksumData object. - ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() - .setType(ContainerProtos.ChecksumType.CRC32) - .setBytesPerChecksum(BYTES_PER_CHECKSUM) - .addAllChecksums(Arrays.stream(chunkChecksums) - .map(ByteString::copyFrom) - .collect(Collectors.toList())) - .build(); - - return ChunkInfo.getFromProtoBuf( - ContainerProtos.ChunkInfo.newBuilder() - .setChecksumData(checksumData) - .setChunkName("chunk") - .setOffset(indexInBlock * CHUNK_SIZE) - .setLen(CHUNK_SIZE) - .build()); - } - private long computeExpectedChecksum(List checksums) { CRC32 crc32 = new CRC32(); ByteBuffer longBuffer = ByteBuffer.allocate(Long.BYTES * checksums.size()); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 646b89ad2c1b..5b92531d5bcb 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -107,6 +107,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V1; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V2; import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V3; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.COMMIT_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; @@ -1210,12 +1211,11 @@ private void setLayoutAndSchemaForTest(ContainerTestVersionInfo versionInfo) { } private void assertDeletionsInChecksumFile(ContainerData data, int numBlocks) { - File checksumFile = ContainerChecksumTreeManager.getContainerChecksumFile(data); ContainerProtos.ContainerChecksumInfo checksumInfo = null; - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - checksumInfo = ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + try { + checksumInfo = readChecksumFile(data); } catch (IOException ex) { - fail("Failed to read container checksum tree file", ex); + fail("Failed to read container checksum tree file: " + ex.getMessage()); } assertNotNull(checksumInfo); From 5d247040f5e46a6fe42bc7e98a2b36218d4c8a4e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 30 Jul 2024 15:48:26 -0400 Subject: [PATCH 21/24] Add more tests for blocks being written to the file --- .../TestContainerChecksumTreeManager.java | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 35c741ed380b..2074d3a12200 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -27,7 +27,9 @@ import java.io.File; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.TreeSet; @@ -81,7 +83,7 @@ public void testWriteEmptyTreeToFile() throws Exception { @Test public void testWriteEmptyBlockListToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); + checksumManager.markBlocksAsDeleted(container, Collections.emptySet()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); @@ -114,7 +116,7 @@ public void testWriteOnlyTreeToFile() throws Exception { public void testWriteOnlyDeletedBlocksToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); - checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().changed()); ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); @@ -126,13 +128,39 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); } + @Test + public void testWriteDuplicateDeletedBlocks() throws Exception { + // Blocks are expected to appear in the file deduplicated in this order. + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + // Pass a duplicate block, it should be filtered out. + checksumManager.markBlocksAsDeleted(container, Arrays.asList(1L, 2L, 2L, 3L)); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + + // Blocks are expected to appear in the file deduplicated in this order. + expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L, 4L); + // Pass another set of blocks. This and the previous list passed should be joined, deduplicated, and sorted. + checksumManager.markBlocksAsDeleted(container, Arrays.asList(2L, 2L, 3L, 4L)); + checksumInfo = readChecksumFile(container); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + } + + @Test + public void testWriteBlocksOutOfOrder() throws Exception { + // Blocks are expected to be written to the file in this order. + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, Arrays.asList(3L, 1L, 2L)); + ContainerProtos.ContainerChecksumInfo checksumInfo = readChecksumFile(container); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + } + @Test public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); - checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); @@ -156,7 +184,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { checksumManager.writeContainerDataTree(container, tree); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); - checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); From 014580936f3d531e9e0721faf15ebafcb7fee354 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 30 Jul 2024 15:56:32 -0400 Subject: [PATCH 22/24] Checkstyle --- .../checksum/ContainerMerkleTreeTestUtils.java | 10 ++++++++-- .../checksum/TestContainerChecksumTreeManager.java | 1 - .../container/checksum/TestContainerMerkleTree.java | 2 +- .../container/common/TestBlockDeletingService.java | 8 ++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 0fea8a39963a..af91a40c6aa3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -18,7 +18,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -public class ContainerMerkleTreeTestUtils { +/** + * Helper methods for testing container checksum tree files and container reconciliation. + */ +public final class ContainerMerkleTreeTestUtils { + private ContainerMerkleTreeTestUtils() { } + public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, ContainerProtos.ContainerMerkleTree actualTree) { assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); @@ -65,7 +70,8 @@ public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree * "bytesPerChecksum" amount of data and are assumed to be contiguous. * @return The ChunkInfo proto object built from this information. */ - public static ChunkInfo buildChunk(ConfigurationSource config, int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + public static ChunkInfo buildChunk(ConfigurationSource config, int indexInBlock, ByteBuffer... chunkChecksums) + throws IOException { final long chunkSize = (long) config.getStorageSize( ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); final int bytesPerChecksum = config.getObject(OzoneClientConfig.class).getBytesPerChecksum(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 2074d3a12200..9258f656e0d4 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -31,7 +31,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.TreeSet; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildChunk; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 42e4413c4b8a..536e9a1fa3f5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -97,7 +97,7 @@ public void testBuildOneChunkTree() throws Exception { public void testBuildTreeWithMissingChunks() throws Exception { // These chunks will be used to seed both the expected and actual trees. final long blockID = 1; - ChunkInfo chunk1 = buildChunk(config,0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo chunk1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Chunk 2 is missing. ChunkInfo chunk3 = buildChunk(config, 2, ByteBuffer.wrap(new byte[]{4, 5, 6})); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java index 5b92531d5bcb..ab313d0ce66a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java @@ -80,7 +80,6 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; @@ -1104,9 +1103,10 @@ public void testBlockThrottle(ContainerTestVersionInfo versionInfo) } /** - * The container checksum tree file is updated with the blocks that have been deleted after the on disk block files are removed from disk, - * but before the transaction is removed from the DB. If there is a failure partway through, the checksum tree file - * should still get updated when the transaction is retried, even if the block file is not present. + * The container checksum tree file is updated with the blocks that have been deleted after the on disk block files + * are removed from disk, but before the transaction is removed from the DB. If there is a failure partway through, + * the checksum tree file should still get updated when the transaction is retried, even if the block file is not + * present. */ @ContainerTestVersionInfo.ContainerTest public void testChecksumFileUpdatedWhenDeleteRetried(ContainerTestVersionInfo versionInfo) throws Exception { From 52207ce2feef0e4ecfa32ff4bf9bd5eef5d5834a Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 30 Jul 2024 17:27:19 -0400 Subject: [PATCH 23/24] Fix rat --- .../checksum/ContainerMerkleTreeTestUtils.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index af91a40c6aa3..27857546eb7a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.conf.ConfigurationSource; From a82c86388ac9ff6b6dd6a3b7ab050992e031e302 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 31 Jul 2024 13:01:27 -0400 Subject: [PATCH 24/24] Add metrics unregister workaround from WIP HDDS-10376 This will be handled properly once HDDS-11254 is complete. --- .../ozone/container/checksum/ContainerMerkleTreeMetrics.java | 2 +- .../hadoop/ozone/container/ozoneimpl/OzoneContainer.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index a288e15f6bd2..5bcf2bc04e14 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -35,7 +35,7 @@ public static ContainerMerkleTreeMetrics create() { new ContainerMerkleTreeMetrics()); } - public void unregister() { + public static void unregister() { MetricsSystem ms = DefaultMetricsSystem.instance(); ms.unregisterSource(METRICS_SOURCE_NAME); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 29ac5b9f0b9e..17676664caee 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.HddsServerUtil; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeMetrics; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.BlockDeletingService; @@ -499,6 +500,8 @@ public void stop() { blockDeletingService.shutdown(); recoveringContainerScrubbingService.shutdown(); ContainerMetrics.remove(); + // TODO: To properly shut down ContainerMerkleTreeMetrics + ContainerMerkleTreeMetrics.unregister(); } public void handleVolumeFailures() {