From bf0dd6e8717442cb22ff8b9025f21b34ccea34a6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 5 Jun 2024 21:48:42 -0700 Subject: [PATCH 01/13] Initial outline of checksum manager and merkle tree with proto --- .../checksum/ContainerChecksumManager.java | 135 +++++++++++++++++ .../checksum/ContainerMerkleTree.java | 140 ++++++++++++++++++ .../container/checksum/package-info.java | 21 +++ .../main/proto/DatanodeClientProtocol.proto | 17 +++ 4 files changed, 313 insertions(+) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java new file mode 100644 index 000000000000..b709a85a55a7 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.SortedSet; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.Lock; + +import com.google.common.util.concurrent.Striped; +import org.apache.hadoop.hdds.utils.SimpleStriped; + +/** + * This class coordinates reading and writing Container checksum information for all containers. + */ +public class ContainerChecksumManager { + + private Striped fileLock; + + /** + * Creates one instance that should be used to coordinate all container checksum info within a datanode. + */ + public ContainerChecksumManager() { + // TODO add config key for size. + fileLock = SimpleStriped.readWriteLock(127, true); + } + + /** + * Writes the specified container merkle tree to the specified container's checksum file. + * The data merkle tree within the file is replaced with the {@code tree} parameter, but all other content of the + * file remains unchanged. + * Concurrent writes to the same file are coordinated internally. + */ + public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try { + ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + .setDataMerkleTree(tree.toProto()) + .build(); + write(data, newChecksumInfo); + } finally { + writeLock.unlock(); + } + } + + /** + * Adds the specified blocks to the list of deleted blocks specified in the container's checksum file. + * All other content of the file remains unchanged. + * Concurrent writes to the same file are coordinated internally. + */ + public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet deletedBlockIDs) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try { + ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + // TODO actually need to merge here to keep blocks in sorted order. + .addAllDeletedBlocks(deletedBlockIDs) + .build(); + write(data, newChecksumInfo); + } finally { + writeLock.unlock(); + } + } + + public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { + // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. + // Callers can act on this summary to repair their container replica using the peer's replica. + return new ContainerDiff(); + } + + private Lock getReadLock(long containerID) { + return fileLock.get(containerID).readLock(); + } + + private Lock getWriteLock(long containerID) { + return fileLock.get(containerID).writeLock(); + } + + private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + Lock readLock = getReadLock(data.getContainerID()); + readLock.lock(); + try (FileInputStream inStream = new FileInputStream(getContainerChecksumFile(data))) { + return ContainerChecksumInfo.parseFrom(inStream); + } finally { + readLock.unlock(); + } + } + + private void write(KeyValueContainerData data, ContainerChecksumInfo checksumInfo) throws IOException { + Lock writeLock = getWriteLock(data.getContainerID()); + writeLock.lock(); + try (FileOutputStream outStream = new FileOutputStream(getContainerChecksumFile(data))) { + checksumInfo.writeTo(outStream); + } finally { + writeLock.unlock(); + } + } + + private File getContainerChecksumFile(KeyValueContainerData data) { + return new File(data.getMetadataPath(), data.getContainerID() + ".checksum"); + } + + /** + * This class represents the different between our replica of a container, and a peer's replica of a container. + * It summarizes the operations we need to do to reconcile our replica the peer replica it was compared to. + * + * TODO HDDS-10928 + */ + public static class ContainerDiff { + public ContainerDiff() { + + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java new file mode 100644 index 000000000000..c400401318b2 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; +import org.apache.hadoop.ozone.common.ChecksumByteBuffer; +import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * This class represents a Merkle tree that provides one checksum for all data within a container. + * + * The leaves of the tree are the checksums of every chunk. Each chunk checksum in a block is further checksumed + * together to generate the block level checksum. Finally, The checksums of all blocks are checksumed together to + * create a container level checksum. Note that checksums are order dependent. Chunk checksums are sorted by their + * offset within a block, and blocks are sorted by their ID. + * + * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. + * It allows building a container merkle tree from scratch by incrementally adding chunks. + * The final checksums at higher levels of the tree are not calculated until + * {@link ContainerMerkleTree#toProto} is called. + */ +public class ContainerMerkleTree { + + private final SortedMap blocks; + + /** + * Constructs an empty Container merkle tree object. + */ + public ContainerMerkleTree() { + blocks = new TreeMap<>(); + } + + /** + * Adds chunks to a block in the tree. The block entry will be created if it is the first time adding chunks to it. + * If the block entry already exists, the chunks will be added to the existing chunks for that block. + * + * @param blockID The ID of the block that these chunks belong to. + * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the + * existing sorted list of chunks stored for this block. + */ + public void addChunks(long blockID, SortedMap offset2Chunks) { + blocks.getOrDefault(blockID, new BlockMerkleTree(blockID)).addChunks(offset2Chunks); + } + + /** + * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum + * computation happens outside of this method. + * + * @return A complete protobuf object representation of this tree. + */ + public ContainerMerkleTreeProto toProto() { + // Compute checksums and return the result. + ContainerMerkleTreeProto.Builder treeProto = ContainerMerkleTreeProto.newBuilder(); + // TODO configurable checksum implementation + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * blocks.size()); + for (Map.Entry entry: blocks.entrySet()) { + BlockMerkleTreeProto blockTreeProto = entry.getValue().toProto(); + treeProto.addBlockMerkleTree(blockTreeProto); + containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); + } + treeProto.setDataChecksum(checksumImpl.getValue()); + return treeProto.build(); + } + + /** + * Represents a merkle tree for a single block within a container. + */ + private static class BlockMerkleTree { + // Map of each offset within the block to its chunk info. + // Chunk order in the checksum is determined by their offset. + private final SortedMap chunks; + private final long blockID; + + BlockMerkleTree(long blockID) { + this.blockID = blockID; + this.chunks = new TreeMap<>(); + } + + /** + * Adds the specified chunks to this block. This should run in linear time since the {@link SortedMap} parameter + * is added to an existing {@link TreeMap} internally. + * + * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the + * existing sorted list of chunks stored for this block. + */ + public void addChunks(SortedMap offset2Chunks) { + chunks.putAll(offset2Chunks); + } + + /** + * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. No checksum + * computation happens outside of this method. + * + * @return A complete protobuf object representation of this tree. + */ + public BlockMerkleTreeProto toProto() { + BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); + // TODO configurable checksum implementation + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + + for (ChunkInfo chunk: chunks.values()) { + // TODO can we depend on this ordering to be consistent? + List chunkChecksums = chunk.getChecksumData().getChecksums(); + blockTreeBuilder.addChunks(chunk.getProtoBufMessage()); + for (ByteString checksum: chunkChecksums) { + checksumImpl.update(checksum.asReadOnlyByteBuffer()); + } + } + + return blockTreeBuilder + .setBlockID(blockID) + .setBlockChecksum(checksumImpl.getValue()) + .build(); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java new file mode 100644 index 000000000000..9dfdc88bf1ec --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/package-info.java @@ -0,0 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.checksum; +/** + * This package contains classes handling container level checksums. + */ diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 7755b993caea..7af47a7119de 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -527,6 +527,23 @@ message SendContainerRequest { message SendContainerResponse { } +message BlockMerkleTreeProto { + optional int64 blockID = 1; + optional int64 blockChecksum = 2; + repeated ChunkInfo chunks = 3; +} + +message ContainerMerkleTreeProto { + optional int64 dataChecksum = 1; + repeated BlockMerkleTreeProto blockMerkleTree = 2; +} + +message ContainerChecksumInfo { + optional int64 containerID = 1; + optional ContainerMerkleTreeProto dataMerkleTree = 2; + repeated int64 deletedBlocks = 3; +} + service XceiverClientProtocolService { // A client-to-datanode RPC to send container commands rpc send(stream ContainerCommandRequestProto) returns From 3adbe5a9ffbd08bccf7b61273b783d0bb7641a5f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 18 Jun 2024 17:08:36 -0400 Subject: [PATCH 02/13] Add config key for lock stripes --- .../checksum/ContainerChecksumManager.java | 6 ++--- .../statemachine/DatanodeConfiguration.java | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index b709a85a55a7..eb7740f9b8b3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import java.io.File; @@ -40,9 +41,8 @@ public class ContainerChecksumManager { /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ - public ContainerChecksumManager() { - // TODO add config key for size. - fileLock = SimpleStriped.readWriteLock(127, true); + public ContainerChecksumManager(DatanodeConfiguration dnConf) { + fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index a8b0d8cfa4bc..28bbb17aa8f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -74,6 +74,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { "hdds.datanode.wait.on.all.followers"; public static final String CONTAINER_SCHEMA_V3_ENABLED = "hdds.datanode.container.schema.v3.enabled"; + public static final String CONTAINER_CHECKSUM_LOCK_STRIPES_KEY = "hdds.datanode.container.checksum.lock.stripes"; static final boolean CHUNK_DATA_VALIDATION_CHECK_DEFAULT = false; @@ -109,6 +110,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { "hdds.datanode.rocksdb.delete_obsolete_files_period"; public static final Boolean OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT = false; + public static final int CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT = 127; /** * Number of threads per volume that Datanode will use for chunk read. @@ -550,6 +552,21 @@ public void setWaitOnAllFollowers(boolean val) { private boolean bCheckEmptyContainerDir = OZONE_DATANODE_CHECK_EMPTY_CONTAINER_DIR_ON_DELETE_DEFAULT; + /** + * Whether to check container directory or not to determine + * container is empty. + */ + @Config(key = "container.checksum.lock.stripes", + type = ConfigType.INT, + defaultValue = "127", + tags = { DATANODE }, + description = "The number of lock stripes used to coordinate modifications to container checksum information. " + + "This information is only updated after a container is closed and does not affect the data read or write" + + " path. Each container in the datanode will be mapped to one lock which will only be held while its " + + "checksum information is updated." + ) + private int containerChecksumLockStripes = CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT; + @PostConstruct public void validate() { if (containerDeleteThreads < 1) { @@ -683,6 +700,12 @@ public void validate() { rocksdbDeleteObsoleteFilesPeriod = ROCKSDB_DELETE_OBSOLETE_FILES_PERIOD_MICRO_SECONDS_DEFAULT; } + + if (containerChecksumLockStripes < 1) { + LOG.warn("{} must be at least 1. Defaulting to {}", CONTAINER_CHECKSUM_LOCK_STRIPES_KEY, + CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT); + containerChecksumLockStripes = CONTAINER_CHECKSUM_LOCK_STRIPES_DEFAULT; + } } public void setContainerDeleteThreads(int containerDeleteThreads) { @@ -910,4 +933,8 @@ public int getAutoCompactionSmallSstFileNum() { public void setAutoCompactionSmallSstFileNum(int num) { this.autoCompactionSmallSstFileNum = num; } + + public int getContainerChecksumLockStripes() { + return containerChecksumLockStripes; + } } From e02a6096f45b6238768f4c78c1f48ae550cf799c Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 18 Jun 2024 17:27:53 -0400 Subject: [PATCH 03/13] Keep deleted block list sorted --- .../checksum/ContainerChecksumManager.java | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index eb7740f9b8b3..fdd8c5b279e4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -25,6 +25,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.util.SortedSet; +import java.util.TreeSet; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.Lock; @@ -36,7 +37,7 @@ */ public class ContainerChecksumManager { - private Striped fileLock; + private final Striped fileLock; /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. @@ -73,11 +74,20 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() - // TODO actually need to merge here to keep blocks in sorted order. - .addAllDeletedBlocks(deletedBlockIDs) + ContainerChecksumInfo.Builder newChecksumInfoBuilder = read(data).toBuilder(); + + // Although the persisted block list should already be sorted, we will sort it here to make sure. + // This will automatically fix any bugs in the persisted order that may show up. + SortedSet sortedDeletedBlockIDs = + new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + // Since the provided list of block IDs is already sorted, this is a linear time addition. + sortedDeletedBlockIDs.addAll(deletedBlockIDs); + + newChecksumInfoBuilder + .clearDeletedBlocks() + .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - write(data, newChecksumInfo); + write(data, newChecksumInfoBuilder.build()); } finally { writeLock.unlock(); } From 3680a2ed8d90d829d96d89bda812e6c8dbefdb00 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 20 Jun 2024 18:03:33 -0400 Subject: [PATCH 04/13] Change chunk checksum proto, add the first passing test --- .../checksum/ContainerMerkleTree.java | 121 +++++++---- .../checksum/TestContainerMerkleTree.java | 199 ++++++++++++++++++ .../main/proto/DatanodeClientProtocol.proto | 9 +- 3 files changed, 289 insertions(+), 40 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index c400401318b2..95ce6f76d662 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; @@ -25,17 +26,17 @@ import java.nio.ByteBuffer; import java.util.List; -import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; /** * This class represents a Merkle tree that provides one checksum for all data within a container. * - * The leaves of the tree are the checksums of every chunk. Each chunk checksum in a block is further checksumed - * together to generate the block level checksum. Finally, The checksums of all blocks are checksumed together to - * create a container level checksum. Note that checksums are order dependent. Chunk checksums are sorted by their - * offset within a block, and blocks are sorted by their ID. + * As the leaves of the tree, a checksum for each chunk is computed by taking a checksum of all checksums within that + * chunk. Each chunk checksum in a block is further checksummed together to generate the block level checksum. Finally, + * The checksums of all blocks are checksummed together to create a container level checksum. + * Note that checksums are order dependent. Chunk checksums are sorted by their + * offset within a block, and block checksums are sorted by their block ID. * * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. * It allows building a container merkle tree from scratch by incrementally adding chunks. @@ -44,13 +45,13 @@ */ public class ContainerMerkleTree { - private final SortedMap blocks; + private final SortedMap id2Block; /** * Constructs an empty Container merkle tree object. */ public ContainerMerkleTree() { - blocks = new TreeMap<>(); + id2Block = new TreeMap<>(); } /** @@ -58,32 +59,37 @@ public ContainerMerkleTree() { * If the block entry already exists, the chunks will be added to the existing chunks for that block. * * @param blockID The ID of the block that these chunks belong to. - * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the - * existing sorted list of chunks stored for this block. + * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, SortedMap offset2Chunks) { - blocks.getOrDefault(blockID, new BlockMerkleTree(blockID)).addChunks(offset2Chunks); + public void addChunks(long blockID, List chunks) { + id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } /** * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum - * computation happens outside of this method. + * computation for the tree happens outside of this method. * * @return A complete protobuf object representation of this tree. */ public ContainerMerkleTreeProto toProto() { // Compute checksums and return the result. - ContainerMerkleTreeProto.Builder treeProto = ContainerMerkleTreeProto.newBuilder(); - // TODO configurable checksum implementation + ContainerMerkleTreeProto.Builder containerTreeBuilder = ContainerMerkleTreeProto.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); - ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * blocks.size()); - for (Map.Entry entry: blocks.entrySet()) { - BlockMerkleTreeProto blockTreeProto = entry.getValue().toProto(); - treeProto.addBlockMerkleTree(blockTreeProto); + ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); + + for (BlockMerkleTree blockTree: id2Block.values()) { + // Add block's checksum tree to the proto. + BlockMerkleTreeProto blockTreeProto = blockTree.toProto(); + containerTreeBuilder.addBlockMerkleTree(blockTreeProto); + // Add the block's checksum to the buffer to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); } - treeProto.setDataChecksum(checksumImpl.getValue()); - return treeProto.build(); + containerChecksumBuffer.flip(); + checksumImpl.update(containerChecksumBuffer); + + return containerTreeBuilder + .setDataChecksum(checksumImpl.getValue()) + .build(); } /** @@ -92,44 +98,49 @@ public ContainerMerkleTreeProto toProto() { private static class BlockMerkleTree { // Map of each offset within the block to its chunk info. // Chunk order in the checksum is determined by their offset. - private final SortedMap chunks; + private final SortedMap offset2Chunk; private final long blockID; BlockMerkleTree(long blockID) { this.blockID = blockID; - this.chunks = new TreeMap<>(); + this.offset2Chunk = new TreeMap<>(); } /** - * Adds the specified chunks to this block. This should run in linear time since the {@link SortedMap} parameter - * is added to an existing {@link TreeMap} internally. + * Adds the specified chunks to this block. The offset value of the chunk must be unique within the block, + * otherwise it will overwrite the previous value at that offset. * - * @param offset2Chunks A map of chunk offset to chunk info, sorted by chunk offset. This will be merged with the - * existing sorted list of chunks stored for this block. + * @param chunks A list of chunks to add to this block. */ - public void addChunks(SortedMap offset2Chunks) { - chunks.putAll(offset2Chunks); + public void addChunks(List chunks) { + for (ChunkInfo chunk: chunks) { + offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); + } } /** - * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. No checksum - * computation happens outside of this method. + * Uses chunk hashes to compute a block hash for this tree, and returns it as a protobuf object. All block checksum + * computation for the tree happens within this method. * - * @return A complete protobuf object representation of this tree. + * @return A complete protobuf object representation of this block tree. */ public BlockMerkleTreeProto toProto() { BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); - // TODO configurable checksum implementation ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + ByteBuffer blockChecksumBuffer = ByteBuffer.allocate(Long.BYTES * offset2Chunk.size()); - for (ChunkInfo chunk: chunks.values()) { - // TODO can we depend on this ordering to be consistent? - List chunkChecksums = chunk.getChecksumData().getChecksums(); - blockTreeBuilder.addChunks(chunk.getProtoBufMessage()); - for (ByteString checksum: chunkChecksums) { - checksumImpl.update(checksum.asReadOnlyByteBuffer()); - } + for (ChunkMerkleTree chunkTree: offset2Chunk.values()) { + // Ordering of checksums within a chunk is assumed to be in the order they are written. + // This assumption is already built in to the code that reads and writes the values (see + // ChunkInputStream#validateChunk for an example on the client read path). + // There is no other value we can use to sort these checksums, so we assume the stored proto has them in the + // correct order. + ContainerProtos.ChunkMerkleTreeProto chunkTreeProto = chunkTree.toProto(); + blockTreeBuilder.addChunkMerkleTree(chunkTreeProto); + blockChecksumBuffer.putLong(chunkTreeProto.getChunkChecksum()); } + blockChecksumBuffer.flip(); + checksumImpl.update(blockChecksumBuffer); return blockTreeBuilder .setBlockID(blockID) @@ -137,4 +148,36 @@ public BlockMerkleTreeProto toProto() { .build(); } } + + /** + * Represents a merkle tree for a single chunk within a container. + * Each chunk has multiple checksums within it at each "bytesPerChecksum" interval. + * This class computes one checksum for the whole chunk by aggregating these. + */ + private static class ChunkMerkleTree { + private final ChunkInfo chunk; + + ChunkMerkleTree(ChunkInfo chunk) { + this.chunk = chunk; + } + + /** + * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this + * method. + * + * @return A complete protobuf representation of this chunk as a leaf in the container merkle tree. + */ + public ContainerProtos.ChunkMerkleTreeProto toProto() { + ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); + for (ByteString checksum: chunk.getChecksumData().getChecksums()) { + checksumImpl.update(checksum.asReadOnlyByteBuffer()); + } + + return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + .setOffset(chunk.getOffset()) + .setLength(chunk.getLen()) + .setChunkChecksum(checksumImpl.getValue()) + .build(); + } + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java new file mode 100644 index 000000000000..687b959a65f5 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -0,0 +1,199 @@ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.CRC32; + +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestContainerMerkleTree { + @Test + public void testBuildEmptyTree() { + ContainerMerkleTree tree = new ContainerMerkleTree(); + ContainerMerkleTreeProto treeProto = tree.toProto(); + assertEquals(0, treeProto.getDataChecksum()); + assertEquals(0, treeProto.getBlockMerkleTreeCount()); + } + + @Test + public void testBuildOneChunkTree() throws Exception { + // Seed the expected and actual trees with the same chunk. + final long blockID = 1; + ChunkInfo chunk = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.ChunkMerkleTreeProto chunkTree = buildExpectedChunkTree(chunk); + ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + Collections.singletonList(chunkTree)); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID, Collections.singletonList(chunk)); + + // Check the difference. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesMatch(expectedTree, actualTreeProto); + + // Do some manual verification of the generated tree as well. + assertNotEquals(0, actualTreeProto.getDataChecksum()); + assertEquals(1, actualTreeProto.getBlockMerkleTreeCount()); + + ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTreeProto.getBlockMerkleTree(0); + assertEquals(1, actualBlockTree.getBlockID()); + assertEquals(1, actualBlockTree.getChunkMerkleTreeCount()); + assertNotEquals(0, actualBlockTree.getBlockChecksum()); + + ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); + assertEquals(0, actualChunkTree.getOffset()); + // TODO use existing config for this value. + assertEquals(5 * 1024, actualChunkTree.getLength()); + assertNotEquals(0, actualChunkTree.getChunkChecksum()); + } + +// @Test +// public void testBuildTreeWithMissingChunks() { +// +// } +// +// @Test +// public void testBuildTreeWithMissingBlocks() { +// +// } +// +// @Test +// public void testBuildTreeAtOnce() { +// +// } +// +// @Test +// public void testBuildTreeIncrementally() { +// +// } + + private void assertTreesMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); + assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); + + long prevBlockID = -1; + for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { + ContainerProtos.BlockMerkleTreeProto expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); + + // Blocks should be sorted by block ID. + long currentBlockID = actualBlockTree.getBlockID(); + assertTrue(prevBlockID < currentBlockID); + prevBlockID = currentBlockID; + + assertEquals(expectedBlockTree.getBlockID(), actualBlockTree.getBlockID()); + assertEquals(expectedBlockTree.getBlockChecksum(), actualBlockTree.getBlockChecksum()); + + long prevChunkOffset = -1; + for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { + ContainerProtos.ChunkMerkleTreeProto expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); + + // Chunks should be sorted by offset. + long currentChunkOffset = actualChunkTree.getOffset(); + assertTrue(prevChunkOffset < currentChunkOffset); + prevChunkOffset = currentChunkOffset; + + assertEquals(expectedChunkTree.getOffset(), actualChunkTree.getOffset()); + assertEquals(expectedChunkTree.getLength(), actualChunkTree.getLength()); + assertEquals(expectedChunkTree.getChunkChecksum(), actualChunkTree.getChunkChecksum()); + } + } + } + + private ContainerMerkleTreeProto buildExpectedContainerTree(List blocks) { + return ContainerMerkleTreeProto.newBuilder() + .addAllBlockMerkleTree(blocks) + .setDataChecksum(computeExpectedChecksum( + blocks.stream() + .map(ContainerProtos.BlockMerkleTreeProto::getBlockChecksum) + .collect(Collectors.toList()))) + .build(); + } + + private ContainerProtos.BlockMerkleTreeProto buildExpectedBlockTree(long blockID, + List chunks) { + return ContainerProtos.BlockMerkleTreeProto.newBuilder() + .setBlockID(blockID) + .setBlockChecksum(computeExpectedChecksum( + chunks.stream() + .map(ContainerProtos.ChunkMerkleTreeProto::getChunkChecksum) + .collect(Collectors.toList()))) + .addAllChunkMerkleTree(chunks) + .build(); + } + + private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo chunk) { + return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + .setOffset(chunk.getOffset()) + .setLength(chunk.getLen()) + .setChunkChecksum(computeExpectedChunkChecksum(chunk.getChecksumData().getChecksums())) + .build(); + } + + /** + * Builds a ChunkInfo object using the provided information. No new checksums are calculated, so this can be used + * as either the leaves of pre-computed merkle trees that serve as expected values, or as building blocks to pass + * to ContainerMerkleTree to have it build the whole tree from this information. + * + * @param indexInBlock Which chunk number within a block this is. The chunk's offset is automatically calculated + * from this based on a fixed length. + * @param chunkChecksums The checksums within the chunk. Each is assumed to apply to a fixed value + * "bytesPerChecksum" amount of data and are assumed to be contiguous. + * @return The ChunkInfo proto object built from this information. + */ + private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + // Arbitrary sizes chosen for testing. + final int bytesPerChecksum = 1024; + final long chunkLength = 1024 * 5; + + // Each chunk checksum is added under the same ChecksumData object. + ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() + .setType(ContainerProtos.ChecksumType.CRC32) + .setBytesPerChecksum(bytesPerChecksum) + .addAllChecksums(Arrays.stream(chunkChecksums) + .map(ByteString::copyFrom) + .collect(Collectors.toList())) + .build(); + + return ChunkInfo.getFromProtoBuf( + ContainerProtos.ChunkInfo.newBuilder() + .setChecksumData(checksumData) + .setChunkName("chunk") + .setOffset(indexInBlock * chunkLength) + .setLen(chunkLength) + .build()); + } + + private long computeExpectedChecksum(List checksums) { + CRC32 crc32 = new CRC32(); + ByteBuffer longBuffer = ByteBuffer.allocate(Long.BYTES * checksums.size()); + checksums.forEach(longBuffer::putLong); + longBuffer.flip(); + crc32.update(longBuffer); + return crc32.getValue(); + } + + private long computeExpectedChunkChecksum(List checksums) { + CRC32 crc32 = new CRC32(); + checksums.forEach(b -> crc32.update(b.asReadOnlyByteBuffer())); + return crc32.getValue(); + } +} diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index 7af47a7119de..d4aa92937f6d 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -527,10 +527,17 @@ message SendContainerRequest { message SendContainerResponse { } +// Each chunk contains multiple checksums. This message aggregates them into one checksum for the whole chunk. +message ChunkMerkleTreeProto { + optional int64 offset = 1; + optional int64 length = 2; + optional int64 chunkChecksum = 3; +} + message BlockMerkleTreeProto { optional int64 blockID = 1; optional int64 blockChecksum = 2; - repeated ChunkInfo chunks = 3; + repeated ChunkMerkleTreeProto chunkMerkleTree = 3; } message ContainerMerkleTreeProto { From 0d5800c7256ddc41766eb736b20701d99c0a31e4 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 12:10:00 -0400 Subject: [PATCH 05/13] Use config defaults for test setup --- .../checksum/TestContainerMerkleTree.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 687b959a65f5..1dbae5253840 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -1,5 +1,7 @@ package org.apache.hadoop.ozone.container.checksum; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; @@ -11,6 +13,8 @@ import java.util.stream.Collectors; import java.util.zip.CRC32; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.Test; @@ -20,6 +24,10 @@ import static org.junit.jupiter.api.Assertions.assertTrue; class TestContainerMerkleTree { + private static final long CHUNK_SIZE = (long) new OzoneConfiguration().getStorageSize( + ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY, ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); + private static final int BYTES_PER_CHECKSUM = new OzoneClientConfig().getBytesPerChecksum(); + @Test public void testBuildEmptyTree() { ContainerMerkleTree tree = new ContainerMerkleTree(); @@ -59,8 +67,7 @@ public void testBuildOneChunkTree() throws Exception { ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); assertEquals(0, actualChunkTree.getOffset()); - // TODO use existing config for this value. - assertEquals(5 * 1024, actualChunkTree.getLength()); + assertEquals(CHUNK_SIZE, actualChunkTree.getLength()); assertNotEquals(0, actualChunkTree.getChunkChecksum()); } @@ -160,14 +167,10 @@ private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo ch * @return The ChunkInfo proto object built from this information. */ private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { - // Arbitrary sizes chosen for testing. - final int bytesPerChecksum = 1024; - final long chunkLength = 1024 * 5; - // Each chunk checksum is added under the same ChecksumData object. ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() .setType(ContainerProtos.ChecksumType.CRC32) - .setBytesPerChecksum(bytesPerChecksum) + .setBytesPerChecksum(BYTES_PER_CHECKSUM) .addAllChecksums(Arrays.stream(chunkChecksums) .map(ByteString::copyFrom) .collect(Collectors.toList())) @@ -177,8 +180,8 @@ private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) thr ContainerProtos.ChunkInfo.newBuilder() .setChecksumData(checksumData) .setChunkName("chunk") - .setOffset(indexInBlock * chunkLength) - .setLen(chunkLength) + .setOffset(indexInBlock * CHUNK_SIZE) + .setLen(CHUNK_SIZE) .build()); } From eb01da4b2bdcb2f2af504644e390cba06fa4fa1e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 13:06:35 -0400 Subject: [PATCH 06/13] Finish tests for ContainerMerkleTree --- .../checksum/ContainerMerkleTree.java | 6 +- .../checksum/TestContainerMerkleTree.java | 120 ++++++++++++++---- 2 files changed, 100 insertions(+), 26 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 95ce6f76d662..f82ac358d1cf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -25,7 +25,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import java.nio.ByteBuffer; -import java.util.List; +import java.util.Collection; import java.util.SortedMap; import java.util.TreeMap; @@ -61,7 +61,7 @@ public ContainerMerkleTree() { * @param blockID The ID of the block that these chunks belong to. * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, List chunks) { + public void addChunks(long blockID, Collection chunks) { id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); } @@ -112,7 +112,7 @@ private static class BlockMerkleTree { * * @param chunks A list of chunks to add to this block. */ - public void addChunks(List chunks) { + public void addChunks(Collection chunks) { for (ChunkInfo chunk: chunks) { offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 1dbae5253840..0106b8d57ae1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -52,9 +52,9 @@ public void testBuildOneChunkTree() throws Exception { ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Collections.singletonList(chunk)); - // Check the difference. + // Ensure the trees match. ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); - assertTreesMatch(expectedTree, actualTreeProto); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); // Do some manual verification of the generated tree as well. assertNotEquals(0, actualTreeProto.getDataChecksum()); @@ -71,27 +71,101 @@ public void testBuildOneChunkTree() throws Exception { assertNotEquals(0, actualChunkTree.getChunkChecksum()); } -// @Test -// public void testBuildTreeWithMissingChunks() { -// -// } -// -// @Test -// public void testBuildTreeWithMissingBlocks() { -// -// } -// -// @Test -// public void testBuildTreeAtOnce() { -// -// } -// -// @Test -// public void testBuildTreeIncrementally() { -// -// } - - private void assertTreesMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + @Test + public void testBuildTreeWithMissingChunks() throws Exception { + // These chunks will be used to seed both the expected and actual trees. + final long blockID = 1; + ChunkInfo chunk1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + // Chunk 2 is missing. + ChunkInfo chunk3 = buildChunk(2, ByteBuffer.wrap(new byte[]{4, 5, 6})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + Arrays.asList(buildExpectedChunkTree(chunk1), buildExpectedChunkTree(chunk3))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID, Arrays.asList(chunk1, chunk3)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + /** + * A container is a set of blocks. Make sure the tree implementation is not dependent on continuity of block IDs. + */ + @Test + public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); + ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + + // Use the ContainerMerkleTree to build the same tree. + // Add blocks and chunks out of order to test sorting. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addChunks(blockID3, Arrays.asList(b3c2, b3c1)); + actualTree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + @Test + public void testAppendToBlocksWhileBuilding() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID2 = 2; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c3 = buildChunk(2, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + + // Build the expected tree proto using the test code. + ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2), buildExpectedChunkTree(b1c3))); + ContainerProtos.BlockMerkleTreeProto blockTree2 = buildExpectedBlockTree(blockID2, + Arrays.asList(buildExpectedChunkTree(b2c1), buildExpectedChunkTree(b2c2))); + ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); + ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree( + Arrays.asList(blockTree1, blockTree2, blockTree3)); + + // Use the ContainerMerkleTree to build the same tree. + // Test building by adding chunks to the blocks individually and out of order. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + // Add all of block 2 first. + actualTree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + // Then add block 1 in multiple steps wth chunks out of order. + actualTree.addChunks(blockID1, Collections.singletonList(b1c2)); + actualTree.addChunks(blockID1, Arrays.asList(b1c3, b1c1)); + // Add a duplicate chunk to block 3. It should overwrite the existing one. + actualTree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + actualTree.addChunks(blockID3, Collections.singletonList(b3c2)); + + // Ensure the trees match. + ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); From f35effd5080eff949bdb57b6b4e6578a2951fc93 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 16:52:00 -0400 Subject: [PATCH 07/13] Add tests for checksum maanger and standardize proto names --- .../checksum/ContainerChecksumManager.java | 34 +++- .../checksum/ContainerMerkleTree.java | 22 ++- .../TestContainerChecksumManager.java | 149 ++++++++++++++++++ .../checksum/TestContainerMerkleTree.java | 76 ++++----- .../main/proto/DatanodeClientProtocol.proto | 12 +- 5 files changed, 230 insertions(+), 63 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index fdd8c5b279e4..9dd2ad605c83 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -31,12 +31,17 @@ import com.google.common.util.concurrent.Striped; import org.apache.hadoop.hdds.utils.SimpleStriped; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class coordinates reading and writing Container checksum information for all containers. */ public class ContainerChecksumManager { + private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); + + // Used to coordinate reads and writes to each container's checksum file. private final Striped fileLock; /** @@ -60,6 +65,7 @@ public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkle .setDataMerkleTree(tree.toProto()) .build(); write(data, newChecksumInfo); + LOG.debug("Data merkle tree for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } @@ -78,8 +84,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. - SortedSet sortedDeletedBlockIDs = - new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + SortedSet sortedDeletedBlockIDs = new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); // Since the provided list of block IDs is already sorted, this is a linear time addition. sortedDeletedBlockIDs.addAll(deletedBlockIDs); @@ -88,6 +93,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); write(data, newChecksumInfoBuilder.build()); + LOG.debug("Deleted block list for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } @@ -96,6 +102,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. // Callers can act on this summary to repair their container replica using the peer's replica. + // This method will use the read lock, which is unused in the current implementation. return new ContainerDiff(); } @@ -108,10 +115,23 @@ private Lock getWriteLock(long containerID) { } private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { - Lock readLock = getReadLock(data.getContainerID()); + long containerID = data.getContainerID(); + Lock readLock = getReadLock(containerID); readLock.lock(); - try (FileInputStream inStream = new FileInputStream(getContainerChecksumFile(data))) { - return ContainerChecksumInfo.parseFrom(inStream); + try { + File checksumFile = getContainerChecksumFile(data); + // If the checksum file has not been created yet, return an empty instance. + // Since all writes happen as part of an atomic read-modify-write cycle that requires a write lock, two empty + // instances for the same container obtained only under the read lock will not conflict. + if (!checksumFile.exists()) { + LOG.debug("Creating initial checksum file for container {} at {}", containerID, checksumFile); + return ContainerChecksumInfo.newBuilder() + .setContainerID(containerID) + .build(); + } + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return ContainerChecksumInfo.parseFrom(inStream); + } } finally { readLock.unlock(); } @@ -132,8 +152,8 @@ private File getContainerChecksumFile(KeyValueContainerData data) { } /** - * This class represents the different between our replica of a container, and a peer's replica of a container. - * It summarizes the operations we need to do to reconcile our replica the peer replica it was compared to. + * This class represents the difference between our replica of a container and a peer's replica of a container. + * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. * * TODO HDDS-10928 */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index f82ac358d1cf..274ff7645adb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -17,8 +17,6 @@ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.BlockMerkleTreeProto; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import org.apache.hadoop.ozone.common.ChecksumByteBuffer; import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; @@ -38,8 +36,8 @@ * Note that checksums are order dependent. Chunk checksums are sorted by their * offset within a block, and block checksums are sorted by their block ID. * - * This class can be used to construct a consistent and completely filled {@link ContainerMerkleTreeProto} object. - * It allows building a container merkle tree from scratch by incrementally adding chunks. + * This class can be used to construct a consistent and completely filled {@link ContainerProtos.ContainerMerkleTree} + * object. It allows building a container merkle tree from scratch by incrementally adding chunks. * The final checksums at higher levels of the tree are not calculated until * {@link ContainerMerkleTree#toProto} is called. */ @@ -71,15 +69,15 @@ public void addChunks(long blockID, Collection chunks) { * * @return A complete protobuf object representation of this tree. */ - public ContainerMerkleTreeProto toProto() { + public ContainerProtos.ContainerMerkleTree toProto() { // Compute checksums and return the result. - ContainerMerkleTreeProto.Builder containerTreeBuilder = ContainerMerkleTreeProto.newBuilder(); + ContainerProtos.ContainerMerkleTree.Builder containerTreeBuilder = ContainerProtos.ContainerMerkleTree.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); for (BlockMerkleTree blockTree: id2Block.values()) { // Add block's checksum tree to the proto. - BlockMerkleTreeProto blockTreeProto = blockTree.toProto(); + ContainerProtos.BlockMerkleTree blockTreeProto = blockTree.toProto(); containerTreeBuilder.addBlockMerkleTree(blockTreeProto); // Add the block's checksum to the buffer to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); @@ -124,8 +122,8 @@ public void addChunks(Collection chunks) { * * @return A complete protobuf object representation of this block tree. */ - public BlockMerkleTreeProto toProto() { - BlockMerkleTreeProto.Builder blockTreeBuilder = BlockMerkleTreeProto.newBuilder(); + public ContainerProtos.BlockMerkleTree toProto() { + ContainerProtos.BlockMerkleTree.Builder blockTreeBuilder = ContainerProtos.BlockMerkleTree.newBuilder(); ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); ByteBuffer blockChecksumBuffer = ByteBuffer.allocate(Long.BYTES * offset2Chunk.size()); @@ -135,7 +133,7 @@ public BlockMerkleTreeProto toProto() { // ChunkInputStream#validateChunk for an example on the client read path). // There is no other value we can use to sort these checksums, so we assume the stored proto has them in the // correct order. - ContainerProtos.ChunkMerkleTreeProto chunkTreeProto = chunkTree.toProto(); + ContainerProtos.ChunkMerkleTree chunkTreeProto = chunkTree.toProto(); blockTreeBuilder.addChunkMerkleTree(chunkTreeProto); blockChecksumBuffer.putLong(chunkTreeProto.getChunkChecksum()); } @@ -167,13 +165,13 @@ private static class ChunkMerkleTree { * * @return A complete protobuf representation of this chunk as a leaf in the container merkle tree. */ - public ContainerProtos.ChunkMerkleTreeProto toProto() { + public ContainerProtos.ChunkMerkleTree toProto() { ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); for (ByteString checksum: chunk.getChecksumData().getChecksums()) { checksumImpl.update(checksum.asReadOnlyByteBuffer()); } - return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + return ContainerProtos.ChunkMerkleTree.newBuilder() .setOffset(chunk.getOffset()) .setLength(chunk.getLen()) .setChunkChecksum(checksumImpl.getValue()) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java new file mode 100644 index 000000000000..53dc9f13e693 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java @@ -0,0 +1,149 @@ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.List; +import java.util.TreeSet; + +import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.TestContainerMerkleTree.buildChunk; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class TestContainerChecksumManager { + + private KeyValueContainerData container; + private final long containerID = 1L; + @TempDir + private File testDir; + private File checksumFile; + private ContainerChecksumManager checksumManager; + + @BeforeEach + public void init() { + container = mock(KeyValueContainerData.class); + when(container.getContainerID()).thenReturn(containerID); + when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); + checksumFile = new File(testDir, containerID + ".checksum"); + checksumManager = new ContainerChecksumManager(new DatanodeConfiguration()); + } + + @Test + public void testWriteEmptyTreeToFile() throws Exception { + checksumManager.writeContainerMerkleTree(container, new ContainerMerkleTree()); + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testWriteEmptyBlockListToFile() throws Exception { + checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testWriteToFileTreeOnly() throws Exception { + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); + // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. + // Therefore, we can use the proto version of our expected tree to check what was written to the file. + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + @Test + public void testWriteToFileDeletedBlocksOnly() throws Exception { + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); + assertEquals(0, treeProto.getDataChecksum()); + assertTrue(treeProto.getBlockMerkleTreeList().isEmpty()); + } + + @Test + public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + @Test + public void testTreePreservedWithDeletedBlocks() throws Exception { + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerMerkleTree(container, tree); + List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); + checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + + assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); + assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); + } + + private ContainerMerkleTree buildTestTree() throws Exception { + // Seed the expected and actual trees with the same chunks. + final long blockID1 = 1; + final long blockID2 = 2; + final long blockID3 = 3; + ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{4, 5, 6})); + ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{7, 8, 9})); + ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{12, 11, 10})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{13, 14, 15})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{16, 17, 18})); + + ContainerMerkleTree tree = new ContainerMerkleTree(); + tree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + tree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + tree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); + + return tree; + } + + private ContainerProtos.ContainerChecksumInfo readFile() throws IOException { + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + } + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 0106b8d57ae1..766736eb7ecc 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -3,7 +3,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerMerkleTreeProto; import java.io.IOException; import java.nio.ByteBuffer; @@ -31,7 +30,7 @@ class TestContainerMerkleTree { @Test public void testBuildEmptyTree() { ContainerMerkleTree tree = new ContainerMerkleTree(); - ContainerMerkleTreeProto treeProto = tree.toProto(); + ContainerProtos.ContainerMerkleTree treeProto = tree.toProto(); assertEquals(0, treeProto.getDataChecksum()); assertEquals(0, treeProto.getBlockMerkleTreeCount()); } @@ -43,29 +42,29 @@ public void testBuildOneChunkTree() throws Exception { ChunkInfo chunk = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. - ContainerProtos.ChunkMerkleTreeProto chunkTree = buildExpectedChunkTree(chunk); - ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + ContainerProtos.ChunkMerkleTree chunkTree = buildExpectedChunkTree(chunk); + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, Collections.singletonList(chunkTree)); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Collections.singletonList(chunk)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); // Do some manual verification of the generated tree as well. assertNotEquals(0, actualTreeProto.getDataChecksum()); assertEquals(1, actualTreeProto.getBlockMerkleTreeCount()); - ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTreeProto.getBlockMerkleTree(0); + ContainerProtos.BlockMerkleTree actualBlockTree = actualTreeProto.getBlockMerkleTree(0); assertEquals(1, actualBlockTree.getBlockID()); assertEquals(1, actualBlockTree.getChunkMerkleTreeCount()); assertNotEquals(0, actualBlockTree.getBlockChecksum()); - ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(0); + ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(0); assertEquals(0, actualChunkTree.getOffset()); assertEquals(CHUNK_SIZE, actualChunkTree.getLength()); assertNotEquals(0, actualChunkTree.getChunkChecksum()); @@ -80,16 +79,16 @@ public void testBuildTreeWithMissingChunks() throws Exception { ChunkInfo chunk3 = buildChunk(2, ByteBuffer.wrap(new byte[]{4, 5, 6})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree = buildExpectedBlockTree(blockID, + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, Arrays.asList(buildExpectedChunkTree(chunk1), buildExpectedChunkTree(chunk3))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); actualTree.addChunks(blockID, Arrays.asList(chunk1, chunk3)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } @@ -107,11 +106,11 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); - ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); // Use the ContainerMerkleTree to build the same tree. // Add blocks and chunks out of order to test sorting. @@ -120,7 +119,7 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { actualTree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } @@ -131,21 +130,21 @@ public void testAppendToBlocksWhileBuilding() throws Exception { final long blockID2 = 2; final long blockID3 = 3; ChunkInfo b1c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b1c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2})); ChunkInfo b1c3 = buildChunk(2, ByteBuffer.wrap(new byte[]{1, 2, 3})); ChunkInfo b2c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); ChunkInfo b2c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1, 2, 3})); - ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ChunkInfo b3c1 = buildChunk(0, ByteBuffer.wrap(new byte[]{1})); + ChunkInfo b3c2 = buildChunk(1, ByteBuffer.wrap(new byte[]{2, 3, 4})); // Build the expected tree proto using the test code. - ContainerProtos.BlockMerkleTreeProto blockTree1 = buildExpectedBlockTree(blockID1, + ContainerProtos.BlockMerkleTree blockTree1 = buildExpectedBlockTree(blockID1, Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2), buildExpectedChunkTree(b1c3))); - ContainerProtos.BlockMerkleTreeProto blockTree2 = buildExpectedBlockTree(blockID2, + ContainerProtos.BlockMerkleTree blockTree2 = buildExpectedBlockTree(blockID2, Arrays.asList(buildExpectedChunkTree(b2c1), buildExpectedChunkTree(b2c2))); - ContainerProtos.BlockMerkleTreeProto blockTree3 = buildExpectedBlockTree(blockID3, + ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerMerkleTreeProto expectedTree = buildExpectedContainerTree( + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree( Arrays.asList(blockTree1, blockTree2, blockTree3)); // Use the ContainerMerkleTree to build the same tree. @@ -161,18 +160,19 @@ public void testAppendToBlocksWhileBuilding() throws Exception { actualTree.addChunks(blockID3, Collections.singletonList(b3c2)); // Ensure the trees match. - ContainerMerkleTreeProto actualTreeProto = actualTree.toProto(); + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); assertTreesSortedAndMatch(expectedTree, actualTreeProto); } - private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, ContainerMerkleTreeProto actualTree) { + public static void assertTreesSortedAndMatch(ContainerProtos.ContainerMerkleTree expectedTree, + ContainerProtos.ContainerMerkleTree actualTree) { assertEquals(expectedTree.getDataChecksum(), actualTree.getDataChecksum()); assertEquals(expectedTree.getBlockMerkleTreeCount(), actualTree.getBlockMerkleTreeCount()); long prevBlockID = -1; for (int blockIndex = 0; blockIndex < expectedTree.getBlockMerkleTreeCount(); blockIndex++) { - ContainerProtos.BlockMerkleTreeProto expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); - ContainerProtos.BlockMerkleTreeProto actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTree expectedBlockTree = expectedTree.getBlockMerkleTree(blockIndex); + ContainerProtos.BlockMerkleTree actualBlockTree = actualTree.getBlockMerkleTree(blockIndex); // Blocks should be sorted by block ID. long currentBlockID = actualBlockTree.getBlockID(); @@ -184,8 +184,8 @@ private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, Co long prevChunkOffset = -1; for (int chunkIndex = 0; chunkIndex < expectedBlockTree.getChunkMerkleTreeCount(); chunkIndex++) { - ContainerProtos.ChunkMerkleTreeProto expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); - ContainerProtos.ChunkMerkleTreeProto actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTree expectedChunkTree = expectedBlockTree.getChunkMerkleTree(chunkIndex); + ContainerProtos.ChunkMerkleTree actualChunkTree = actualBlockTree.getChunkMerkleTree(chunkIndex); // Chunks should be sorted by offset. long currentChunkOffset = actualChunkTree.getOffset(); @@ -199,30 +199,30 @@ private void assertTreesSortedAndMatch(ContainerMerkleTreeProto expectedTree, Co } } - private ContainerMerkleTreeProto buildExpectedContainerTree(List blocks) { - return ContainerMerkleTreeProto.newBuilder() + private ContainerProtos.ContainerMerkleTree buildExpectedContainerTree(List blocks) { + return ContainerProtos.ContainerMerkleTree.newBuilder() .addAllBlockMerkleTree(blocks) .setDataChecksum(computeExpectedChecksum( blocks.stream() - .map(ContainerProtos.BlockMerkleTreeProto::getBlockChecksum) + .map(ContainerProtos.BlockMerkleTree::getBlockChecksum) .collect(Collectors.toList()))) .build(); } - private ContainerProtos.BlockMerkleTreeProto buildExpectedBlockTree(long blockID, - List chunks) { - return ContainerProtos.BlockMerkleTreeProto.newBuilder() + private ContainerProtos.BlockMerkleTree buildExpectedBlockTree(long blockID, + List chunks) { + return ContainerProtos.BlockMerkleTree.newBuilder() .setBlockID(blockID) .setBlockChecksum(computeExpectedChecksum( chunks.stream() - .map(ContainerProtos.ChunkMerkleTreeProto::getChunkChecksum) + .map(ContainerProtos.ChunkMerkleTree::getChunkChecksum) .collect(Collectors.toList()))) .addAllChunkMerkleTree(chunks) .build(); } - private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo chunk) { - return ContainerProtos.ChunkMerkleTreeProto.newBuilder() + private ContainerProtos.ChunkMerkleTree buildExpectedChunkTree(ChunkInfo chunk) { + return ContainerProtos.ChunkMerkleTree.newBuilder() .setOffset(chunk.getOffset()) .setLength(chunk.getLen()) .setChunkChecksum(computeExpectedChunkChecksum(chunk.getChecksumData().getChecksums())) @@ -240,7 +240,7 @@ private ContainerProtos.ChunkMerkleTreeProto buildExpectedChunkTree(ChunkInfo ch * "bytesPerChecksum" amount of data and are assumed to be contiguous. * @return The ChunkInfo proto object built from this information. */ - private ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { + public static ChunkInfo buildChunk(int indexInBlock, ByteBuffer... chunkChecksums) throws IOException { // Each chunk checksum is added under the same ChecksumData object. ContainerProtos.ChecksumData checksumData = ContainerProtos.ChecksumData.newBuilder() .setType(ContainerProtos.ChecksumType.CRC32) diff --git a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto index d4aa92937f6d..aa3666fb50c6 100644 --- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto +++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto @@ -528,26 +528,26 @@ message SendContainerResponse { } // Each chunk contains multiple checksums. This message aggregates them into one checksum for the whole chunk. -message ChunkMerkleTreeProto { +message ChunkMerkleTree { optional int64 offset = 1; optional int64 length = 2; optional int64 chunkChecksum = 3; } -message BlockMerkleTreeProto { +message BlockMerkleTree { optional int64 blockID = 1; optional int64 blockChecksum = 2; - repeated ChunkMerkleTreeProto chunkMerkleTree = 3; + repeated ChunkMerkleTree chunkMerkleTree = 3; } -message ContainerMerkleTreeProto { +message ContainerMerkleTree { optional int64 dataChecksum = 1; - repeated BlockMerkleTreeProto blockMerkleTree = 2; + repeated BlockMerkleTree blockMerkleTree = 2; } message ContainerChecksumInfo { optional int64 containerID = 1; - optional ContainerMerkleTreeProto dataMerkleTree = 2; + optional ContainerMerkleTree dataMerkleTree = 2; repeated int64 deletedBlocks = 3; } From 21dd87016d461a55471c69de82c6462fb439a1ea Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 21 Jun 2024 17:33:53 -0400 Subject: [PATCH 08/13] Updates after reviewing diff --- .../checksum/ContainerChecksumManager.java | 29 ++++++++++--------- .../checksum/ContainerMerkleTree.java | 3 +- .../TestContainerChecksumManager.java | 7 ++--- .../checksum/TestContainerMerkleTree.java | 3 +- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java index 9dd2ad605c83..0f02b04422a2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java @@ -16,7 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -42,6 +42,7 @@ public class ContainerChecksumManager { private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); // Used to coordinate reads and writes to each container's checksum file. + // Each container ID is mapped to a stripe. private final Striped fileLock; /** @@ -61,7 +62,7 @@ public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkle Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() + ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() .setDataMerkleTree(tree.toProto()) .build(); write(data, newChecksumInfo); @@ -80,26 +81,26 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerChecksumInfo.Builder newChecksumInfoBuilder = read(data).toBuilder(); - + ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = read(data).toBuilder(); // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. - SortedSet sortedDeletedBlockIDs = new TreeSet<>(newChecksumInfoBuilder.getDeletedBlocksList()); + SortedSet sortedDeletedBlockIDs = new TreeSet<>(checksumInfoBuilder.getDeletedBlocksList()); // Since the provided list of block IDs is already sorted, this is a linear time addition. sortedDeletedBlockIDs.addAll(deletedBlockIDs); - newChecksumInfoBuilder + checksumInfoBuilder .clearDeletedBlocks() .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - write(data, newChecksumInfoBuilder.build()); + write(data, checksumInfoBuilder.build()); LOG.debug("Deleted block list for container {} updated", data.getContainerID()); } finally { writeLock.unlock(); } } - public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerChecksumInfo otherInfo) throws IOException { + public ContainerDiff diff(KeyValueContainerData thisContainer, ContainerProtos.ContainerChecksumInfo otherInfo) + throws IOException { // TODO HDDS-10928 compare the checksum info of the two containers and return a summary. // Callers can act on this summary to repair their container replica using the peer's replica. // This method will use the read lock, which is unused in the current implementation. @@ -114,7 +115,7 @@ private Lock getWriteLock(long containerID) { return fileLock.get(containerID).writeLock(); } - private ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { long containerID = data.getContainerID(); Lock readLock = getReadLock(containerID); readLock.lock(); @@ -124,20 +125,22 @@ private ContainerChecksumInfo read(KeyValueContainerData data) throws IOExceptio // Since all writes happen as part of an atomic read-modify-write cycle that requires a write lock, two empty // instances for the same container obtained only under the read lock will not conflict. if (!checksumFile.exists()) { - LOG.debug("Creating initial checksum file for container {} at {}", containerID, checksumFile); - return ContainerChecksumInfo.newBuilder() + LOG.debug("No checksum file currently exists for container {} at the path {}. Returning an empty instance.", + containerID, checksumFile); + return ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(containerID) .build(); } try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerChecksumInfo.parseFrom(inStream); + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); } } finally { readLock.unlock(); } } - private void write(KeyValueContainerData data, ContainerChecksumInfo checksumInfo) throws IOException { + private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) + throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try (FileOutputStream outStream = new FileOutputStream(getContainerChecksumFile(data))) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 274ff7645adb..9eeb50b6498c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -76,10 +76,9 @@ public ContainerProtos.ContainerMerkleTree toProto() { ByteBuffer containerChecksumBuffer = ByteBuffer.allocate(Long.BYTES * id2Block.size()); for (BlockMerkleTree blockTree: id2Block.values()) { - // Add block's checksum tree to the proto. ContainerProtos.BlockMerkleTree blockTreeProto = blockTree.toProto(); containerTreeBuilder.addBlockMerkleTree(blockTreeProto); - // Add the block's checksum to the buffer to calculate the container checksum. + // Add the block's checksum to the buffer that will be used to calculate the container checksum. containerChecksumBuffer.putLong(blockTreeProto.getBlockChecksum()); } containerChecksumBuffer.flip(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java index 53dc9f13e693..ddad744872b6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java @@ -66,7 +66,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { } @Test - public void testWriteToFileTreeOnly() throws Exception { + public void testWriteOnlyTreeToFile() throws Exception { ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerMerkleTree(container, tree); @@ -80,7 +80,7 @@ public void testWriteToFileTreeOnly() throws Exception { } @Test - public void testWriteToFileDeletedBlocksOnly() throws Exception { + public void testWriteOnlyDeletedBlocksToFile() throws Exception { List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); @@ -108,7 +108,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { } @Test - public void testTreePreservedWithDeletedBlocks() throws Exception { + public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerMerkleTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); @@ -122,7 +122,6 @@ public void testTreePreservedWithDeletedBlocks() throws Exception { } private ContainerMerkleTree buildTestTree() throws Exception { - // Seed the expected and actual trees with the same chunks. final long blockID1 = 1; final long blockID2 = 2; final long blockID3 = 3; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 766736eb7ecc..efcb3843d7d2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -110,7 +110,8 @@ public void testBuildTreeWithNonContiguousBlockIDs() throws Exception { Arrays.asList(buildExpectedChunkTree(b1c1), buildExpectedChunkTree(b1c2))); ContainerProtos.BlockMerkleTree blockTree3 = buildExpectedBlockTree(blockID3, Arrays.asList(buildExpectedChunkTree(b3c1), buildExpectedChunkTree(b3c2))); - ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Arrays.asList(blockTree1, blockTree3)); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree( + Arrays.asList(blockTree1, blockTree3)); // Use the ContainerMerkleTree to build the same tree. // Add blocks and chunks out of order to test sorting. From 47cd213474920689122b6582c3ca9153b8905a57 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 24 Jun 2024 13:49:10 -0400 Subject: [PATCH 09/13] Rename checksum manager and file. Fix findbugs and Rat --- ...java => ContainerChecksumTreeManager.java} | 10 ++-- ... => TestContainerChecksumTreeManager.java} | 50 ++++++++++++------- .../checksum/TestContainerMerkleTree.java | 16 ++++++ 3 files changed, 54 insertions(+), 22 deletions(-) rename hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/{ContainerChecksumManager.java => ContainerChecksumTreeManager.java} (96%) rename hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/{TestContainerChecksumManager.java => TestContainerChecksumTreeManager.java} (75%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java similarity index 96% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 0f02b04422a2..71448097393b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -37,9 +37,9 @@ /** * This class coordinates reading and writing Container checksum information for all containers. */ -public class ContainerChecksumManager { +public class ContainerChecksumTreeManager { - private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumManager.class); + private static final Logger LOG = LoggerFactory.getLogger(ContainerChecksumTreeManager.class); // Used to coordinate reads and writes to each container's checksum file. // Each container ID is mapped to a stripe. @@ -48,7 +48,7 @@ public class ContainerChecksumManager { /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ - public ContainerChecksumManager(DatanodeConfiguration dnConf) { + public ContainerChecksumTreeManager(DatanodeConfiguration dnConf) { fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); } @@ -58,7 +58,7 @@ public ContainerChecksumManager(DatanodeConfiguration dnConf) { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerMerkleTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { + public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTree tree) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { @@ -151,7 +151,7 @@ private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksum } private File getContainerChecksumFile(KeyValueContainerData data) { - return new File(data.getMetadataPath(), data.getContainerID() + ".checksum"); + return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java similarity index 75% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index ddad744872b6..b964a0922c13 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; @@ -23,30 +39,30 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -class TestContainerChecksumManager { +class TestContainerChecksumTreeManager { - private KeyValueContainerData container; - private final long containerID = 1L; + private static final long CONTAINER_ID = 1L; @TempDir private File testDir; + private KeyValueContainerData container; private File checksumFile; - private ContainerChecksumManager checksumManager; + private ContainerChecksumTreeManager checksumManager; @BeforeEach public void init() { container = mock(KeyValueContainerData.class); - when(container.getContainerID()).thenReturn(containerID); + when(container.getContainerID()).thenReturn(CONTAINER_ID); when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); - checksumFile = new File(testDir, containerID + ".checksum"); - checksumManager = new ContainerChecksumManager(new DatanodeConfiguration()); + checksumFile = new File(testDir, CONTAINER_ID + ".tree"); + checksumManager = new ContainerChecksumTreeManager(new DatanodeConfiguration()); } @Test public void testWriteEmptyTreeToFile() throws Exception { - checksumManager.writeContainerMerkleTree(container, new ContainerMerkleTree()); + checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -58,7 +74,7 @@ public void testWriteEmptyBlockListToFile() throws Exception { checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -68,11 +84,11 @@ public void testWriteEmptyBlockListToFile() throws Exception { @Test public void testWriteOnlyTreeToFile() throws Exception { ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. // Therefore, we can use the proto version of our expected tree to check what was written to the file. @@ -86,7 +102,7 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); assertEquals(0, treeProto.getDataChecksum()); @@ -98,11 +114,11 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); } @@ -110,13 +126,13 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { @Test public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerMerkleTree tree = buildTestTree(); - checksumManager.writeContainerMerkleTree(container, tree); + checksumManager.writeContainerDataTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertEquals(containerID, checksumInfo.getContainerID()); + assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index efcb3843d7d2..a93c4f170236 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ package org.apache.hadoop.ozone.container.checksum; import org.apache.hadoop.hdds.conf.OzoneConfiguration; From 4ab7a9f02f5cdce85bb6fbc69064d2d2877c19fc Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Tue, 25 Jun 2024 14:55:04 +0530 Subject: [PATCH 10/13] HDDS-10373. Implement framework for capturing Merkle Tree Metrics. --- .../ContainerChecksumTreeManager.java | 40 +++++++-- .../checksum/ContainerMerkleTreeMetrics.java | 84 +++++++++++++++++++ .../TestContainerChecksumTreeManager.java | 54 ++++++++---- 3 files changed, 155 insertions(+), 23 deletions(-) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 71448097393b..fc31e4ee34d8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -34,6 +34,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.MetricUtil.captureLatencyNs; + /** * This class coordinates reading and writing Container checksum information for all containers. */ @@ -44,12 +46,15 @@ public class ContainerChecksumTreeManager { // Used to coordinate reads and writes to each container's checksum file. // Each container ID is mapped to a stripe. private final Striped fileLock; + private final ContainerMerkleTreeMetrics metrics; /** * Creates one instance that should be used to coordinate all container checksum info within a datanode. */ public ContainerChecksumTreeManager(DatanodeConfiguration dnConf) { fileLock = SimpleStriped.readWriteLock(dnConf.getContainerChecksumLockStripes(), true); + // TODO: TO unregister metrics on stop. + metrics = ContainerMerkleTreeMetrics.create(); } /** @@ -65,8 +70,13 @@ public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTr ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() .setDataMerkleTree(tree.toProto()) .build(); - write(data, newChecksumInfo); + captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), + () -> write(data, newChecksumInfo)); LOG.debug("Data merkle tree for container {} updated", data.getContainerID()); + } catch (IOException ex) { + metrics.incrementMerkleTreeWriteFailures(); + throw new IOException("Error occurred when writing container merkle tree for containerID " + + data.getContainerID(), ex); } finally { writeLock.unlock(); } @@ -92,8 +102,13 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele .clearDeletedBlocks() .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - write(data, checksumInfoBuilder.build()); + captureLatencyNs(metrics.getUpdateContainerMerkleTreeLatencyNS(), + () -> write(data, checksumInfoBuilder.build())); LOG.debug("Deleted block list for container {} updated", data.getContainerID()); + } catch (IOException ex) { + metrics.incrementMerkleTreeUpdateFailures(); + throw new IOException("Error occurred when updating container merkle tree for containerID " + + data.getContainerID(), ex); } finally { writeLock.unlock(); } @@ -115,7 +130,7 @@ private Lock getWriteLock(long containerID) { return fileLock.get(containerID).writeLock(); } - private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { + public ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) throws IOException { long containerID = data.getContainerID(); Lock readLock = getReadLock(containerID); readLock.lock(); @@ -131,14 +146,23 @@ private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) t .setContainerID(containerID) .build(); } - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); - } + return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), () -> + readFile(checksumFile)); + } catch (IOException ex) { + metrics.incrementMerkleTreeReadFailures(); + throw new IOException("Error occurred when reading container merkle tree for containerID " + + data.getContainerID(), ex); } finally { readLock.unlock(); } } + private ContainerProtos.ContainerChecksumInfo readFile(File checksumFile) throws IOException { + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + } + } + private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); @@ -154,6 +178,10 @@ private File getContainerChecksumFile(KeyValueContainerData data) { return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); } + public ContainerMerkleTreeMetrics getMetrics() { + return this.metrics; + } + /** * This class represents the difference between our replica of a container and a peer's replica of a container. * It summarizes the operations we need to do to reconcile our replica with the peer replica it was compared to. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java new file mode 100644 index 000000000000..8cbec125355e --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.container.checksum; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.annotation.Metric; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableRate; + +/** + * Class to collect metrics related to container merkle tree. + */ +public class ContainerMerkleTreeMetrics { + private static final String METRICS_SOURCE_NAME = ContainerMerkleTreeMetrics.class.getSimpleName(); + + public static ContainerMerkleTreeMetrics create() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + return ms.register(METRICS_SOURCE_NAME, "Container Merkle Tree Metrics", + new ContainerMerkleTreeMetrics()); + } + + public void unregister() { + MetricsSystem ms = DefaultMetricsSystem.instance(); + ms.unregisterSource(METRICS_SOURCE_NAME); + } + + @Metric(about = "Number of Merkle tree write failure") + private MutableCounterLong numMerkleTreeWriteFailure; + + @Metric(about = "Number of Merkle tree read failure") + private MutableCounterLong numMerkleTreeReadFailure; + + @Metric(about = "Number of Merkle tree update failure") + private MutableCounterLong numMerkleTreeUpdateFailure; + + @Metric(about = "Merkle tree write latency") + private MutableRate merkleTreeWriteLatencyNS; + + @Metric(about = "Merkle tree read latency") + private MutableRate merkleTreeReadLatencyNS; + + @Metric(about = "Merkle tree update latency") + private MutableRate merkleTreeUpdateLatencyNS; + + public void incrementMerkleTreeWriteFailures() { + this.numMerkleTreeWriteFailure.incr(); + } + + public void incrementMerkleTreeReadFailures() { + this.numMerkleTreeReadFailure.incr(); + } + + public void incrementMerkleTreeUpdateFailures() { + this.numMerkleTreeUpdateFailure.incr(); + } + + public MutableRate getWriteContainerMerkleTreeLatencyNS() { + return this.merkleTreeWriteLatencyNS; + } + + public MutableRate getReadContainerMerkleTreeLatencyNS() { + return this.merkleTreeReadLatencyNS; + } + + public MutableRate getUpdateContainerMerkleTreeLatencyNS() { + return this.merkleTreeUpdateLatencyNS; + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b964a0922c13..b1826b04aeda 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -25,8 +25,6 @@ import org.junit.jupiter.api.io.TempDir; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; @@ -45,23 +43,28 @@ class TestContainerChecksumTreeManager { @TempDir private File testDir; private KeyValueContainerData container; - private File checksumFile; private ContainerChecksumTreeManager checksumManager; + private ContainerMerkleTreeMetrics metrics; @BeforeEach public void init() { container = mock(KeyValueContainerData.class); when(container.getContainerID()).thenReturn(CONTAINER_ID); when(container.getMetadataPath()).thenReturn(testDir.getAbsolutePath()); - checksumFile = new File(testDir, CONTAINER_ID + ".tree"); checksumManager = new ContainerChecksumTreeManager(new DatanodeConfiguration()); + metrics = checksumManager.getMetrics(); } @Test public void testWriteEmptyTreeToFile() throws Exception { + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); @@ -71,9 +74,13 @@ public void testWriteEmptyTreeToFile() throws Exception { @Test public void testWriteEmptyBlockListToFile() throws Exception { + assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); @@ -83,11 +90,14 @@ public void testWriteEmptyBlockListToFile() throws Exception { @Test public void testWriteOnlyTreeToFile() throws Exception { + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. @@ -97,11 +107,15 @@ public void testWriteOnlyTreeToFile() throws Exception { @Test public void testWriteOnlyDeletedBlocksToFile() throws Exception { + assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getDataMerkleTree(); @@ -111,13 +125,19 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { @Test public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); @@ -125,13 +145,19 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { @Test public void testTreePreservedOnDeletedBlocksWrite() throws Exception { + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); - ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getDataMerkleTree()); @@ -155,10 +181,4 @@ private ContainerMerkleTree buildTestTree() throws Exception { return tree; } - - private ContainerProtos.ContainerChecksumInfo readFile() throws IOException { - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); - } - } } From 575c6a5db4c750206937ab20b2657e43b11090ef Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 26 Jun 2024 01:27:55 +0530 Subject: [PATCH 11/13] HDDS-10373. Add create merkle tree latency metric. --- .../container/checksum/ContainerChecksumTreeManager.java | 2 +- .../container/checksum/ContainerMerkleTreeMetrics.java | 7 +++++++ .../checksum/TestContainerChecksumTreeManager.java | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index fc31e4ee34d8..5732ff6b8ed6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -68,7 +68,7 @@ public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTr writeLock.lock(); try { ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() - .setDataMerkleTree(tree.toProto()) + .setDataMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)) .build(); captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), () -> write(data, newChecksumInfo)); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index 8cbec125355e..65d453a1fd1c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -58,6 +58,9 @@ public void unregister() { @Metric(about = "Merkle tree update latency") private MutableRate merkleTreeUpdateLatencyNS; + @Metric(about = "Merkle tree creation latency") + private MutableRate merkleTreeCreateLatencyNS; + public void incrementMerkleTreeWriteFailures() { this.numMerkleTreeWriteFailure.incr(); } @@ -81,4 +84,8 @@ public MutableRate getReadContainerMerkleTreeLatencyNS() { public MutableRate getUpdateContainerMerkleTreeLatencyNS() { return this.merkleTreeUpdateLatencyNS; } + + public MutableRate getCreateMerkleTreeLatencyNS() { + return this.merkleTreeCreateLatencyNS; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b1826b04aeda..ea35c9716e6c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -58,9 +58,11 @@ public void init() { @Test public void testWriteEmptyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); @@ -92,11 +94,13 @@ public void testWriteEmptyBlockListToFile() throws Exception { public void testWriteOnlyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); @@ -128,6 +132,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerMerkleTree tree = buildTestTree(); @@ -137,6 +142,7 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); @@ -148,6 +154,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); @@ -157,6 +164,7 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(container); + assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); From 8e874ee15502f6b7032d1cd438c196f738b44fe0 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 24 Jul 2024 15:52:43 +0530 Subject: [PATCH 12/13] removed update metrics and refactored as per review comments. --- .../ContainerChecksumTreeManager.java | 42 ++++++++----------- .../checksum/ContainerMerkleTreeMetrics.java | 14 ------- .../TestContainerChecksumTreeManager.java | 25 +++-------- 3 files changed, 23 insertions(+), 58 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 11fdbf46cda2..46dc4aa0ba2d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.ozone.container.checksum; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; @@ -67,17 +68,11 @@ public void writeContainerDataTree(KeyValueContainerData data, ContainerMerkleTr Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerProtos.ContainerChecksumInfo newChecksumInfo = captureLatencyNs( - metrics.getReadContainerMerkleTreeLatencyNS(), () -> read(data)).toBuilder() + ContainerProtos.ContainerChecksumInfo newChecksumInfo = read(data).toBuilder() .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)) .build(); - captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), - () -> write(data, newChecksumInfo)); + write(data, newChecksumInfo); LOG.debug("Data merkle tree for container {} updated", data.getContainerID()); - } catch (IOException ex) { - metrics.incrementMerkleTreeWriteFailures(); - throw new IOException("Error occurred when writing container merkle tree for containerID " - + data.getContainerID(), ex); } finally { writeLock.unlock(); } @@ -92,8 +87,7 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try { - ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = captureLatencyNs( - metrics.getReadContainerMerkleTreeLatencyNS(), () -> read(data)).toBuilder(); + ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = read(data).toBuilder(); // Although the persisted block list should already be sorted, we will sort it here to make sure. // This will automatically fix any bugs in the persisted order that may show up. SortedSet sortedDeletedBlockIDs = new TreeSet<>(checksumInfoBuilder.getDeletedBlocksList()); @@ -104,13 +98,8 @@ public void markBlocksAsDeleted(KeyValueContainerData data, SortedSet dele .clearDeletedBlocks() .addAllDeletedBlocks(sortedDeletedBlockIDs) .build(); - captureLatencyNs(metrics.getUpdateContainerMerkleTreeLatencyNS(), - () -> write(data, checksumInfoBuilder.build())); + write(data, checksumInfoBuilder.build()); LOG.debug("Deleted block list for container {} updated", data.getContainerID()); - } catch (IOException ex) { - metrics.incrementMerkleTreeUpdateFailures(); - throw new IOException("Error occurred when updating container merkle tree for containerID " - + data.getContainerID(), ex); } finally { writeLock.unlock(); } @@ -149,25 +138,29 @@ private ContainerProtos.ContainerChecksumInfo read(KeyValueContainerData data) t .build(); } try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); + return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), + () -> ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)); } + } catch (IOException ex) { + metrics.incrementMerkleTreeReadFailures(); + throw new IOException("Error occurred when reading container merkle tree for containerID " + + data.getContainerID(), ex); } finally { readLock.unlock(); } } - private ContainerProtos.ContainerChecksumInfo readFile(File checksumFile) throws IOException { - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return ContainerProtos.ContainerChecksumInfo.parseFrom(inStream); - } - } - private void write(KeyValueContainerData data, ContainerProtos.ContainerChecksumInfo checksumInfo) throws IOException { Lock writeLock = getWriteLock(data.getContainerID()); writeLock.lock(); try (FileOutputStream outStream = new FileOutputStream(getContainerChecksumFile(data))) { - checksumInfo.writeTo(outStream); + captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), + () -> checksumInfo.writeTo(outStream)); + } catch (IOException ex) { + metrics.incrementMerkleTreeWriteFailures(); + throw new IOException("Error occurred when writing container merkle tree for containerID " + + data.getContainerID(), ex); } finally { writeLock.unlock(); } @@ -177,6 +170,7 @@ public File getContainerChecksumFile(KeyValueContainerData data) { return new File(data.getMetadataPath(), data.getContainerID() + ".tree"); } + @VisibleForTesting public ContainerMerkleTreeMetrics getMetrics() { return this.metrics; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java index 65d453a1fd1c..a288e15f6bd2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeMetrics.java @@ -46,18 +46,12 @@ public void unregister() { @Metric(about = "Number of Merkle tree read failure") private MutableCounterLong numMerkleTreeReadFailure; - @Metric(about = "Number of Merkle tree update failure") - private MutableCounterLong numMerkleTreeUpdateFailure; - @Metric(about = "Merkle tree write latency") private MutableRate merkleTreeWriteLatencyNS; @Metric(about = "Merkle tree read latency") private MutableRate merkleTreeReadLatencyNS; - @Metric(about = "Merkle tree update latency") - private MutableRate merkleTreeUpdateLatencyNS; - @Metric(about = "Merkle tree creation latency") private MutableRate merkleTreeCreateLatencyNS; @@ -69,10 +63,6 @@ public void incrementMerkleTreeReadFailures() { this.numMerkleTreeReadFailure.incr(); } - public void incrementMerkleTreeUpdateFailures() { - this.numMerkleTreeUpdateFailure.incr(); - } - public MutableRate getWriteContainerMerkleTreeLatencyNS() { return this.merkleTreeWriteLatencyNS; } @@ -81,10 +71,6 @@ public MutableRate getReadContainerMerkleTreeLatencyNS() { return this.merkleTreeReadLatencyNS; } - public MutableRate getUpdateContainerMerkleTreeLatencyNS() { - return this.merkleTreeUpdateLatencyNS; - } - public MutableRate getCreateMerkleTreeLatencyNS() { return this.merkleTreeCreateLatencyNS; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index b3f4c2342049..06ed76d8217c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -63,14 +63,12 @@ public void init() { public void testWriteEmptyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); checksumManager.writeContainerDataTree(container, new ContainerMerkleTree()); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); @@ -80,13 +78,12 @@ public void testWriteEmptyTreeToFile() throws Exception { @Test public void testWriteEmptyBlockListToFile() throws Exception { - assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); checksumManager.markBlocksAsDeleted(container, new TreeSet<>()); - assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); @@ -97,7 +94,6 @@ public void testWriteEmptyBlockListToFile() throws Exception { @Test public void testWriteOnlyTreeToFile() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); @@ -105,7 +101,6 @@ public void testWriteOnlyTreeToFile() throws Exception { ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertTrue(checksumInfo.getDeletedBlocksList().isEmpty()); // TestContainerMerkleTree verifies that going from ContainerMerkleTree to its proto is consistent. @@ -115,15 +110,13 @@ public void testWriteOnlyTreeToFile() throws Exception { @Test public void testWriteOnlyDeletedBlocksToFile() throws Exception { - assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); - assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().changed()); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); ContainerProtos.ContainerMerkleTree treeProto = checksumInfo.getContainerMerkleTree(); @@ -134,20 +127,16 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { @Test public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); @@ -156,20 +145,16 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { @Test public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total(), 0); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getUpdateContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); assertTrue(metrics.getCreateMerkleTreeLatencyNS().lastStat().total() > 0); - assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertEquals(CONTAINER_ID, checksumInfo.getContainerID()); assertEquals(expectedBlocksToDelete, checksumInfo.getDeletedBlocksList()); assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); From c9003090256b7d3a3d133f30c0557df820d1533e Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 24 Jul 2024 22:19:29 +0530 Subject: [PATCH 13/13] Add read metrics test. --- .../TestContainerChecksumTreeManager.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 06ed76d8217c..56a5dbfd55f7 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -128,11 +128,14 @@ public void testWriteOnlyDeletedBlocksToFile() throws Exception { public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); @@ -146,11 +149,14 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); assertEquals(metrics.getCreateMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTree tree = buildTestTree(); checksumManager.writeContainerDataTree(container, tree); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new TreeSet<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); ContainerProtos.ContainerChecksumInfo checksumInfo = readFile(); @@ -160,6 +166,18 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertTreesSortedAndMatch(tree.toProto(), checksumInfo.getContainerMerkleTree()); } + @Test + public void testReadContainerMerkleTreeMetric() throws Exception { + assertEquals(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total(), 0); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + ContainerMerkleTree tree = buildTestTree(); + checksumManager.writeContainerDataTree(container, tree); + assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); + checksumManager.writeContainerDataTree(container, tree); + assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); + assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); + } + @Test public void testChecksumTreeFilePath() { assertEquals(checksumFile.getAbsolutePath(),