From e294940feb318da02b0ce0dcac101dd4c2c34083 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 22 Nov 2024 15:45:46 -0800 Subject: [PATCH 01/21] HDDS-11763. Implement container repair logic within datanodes --- .../checksum/DNContainerOperationClient.java | 4 + .../container/keyvalue/KeyValueHandler.java | 147 ++++++++++++++++-- 2 files changed, 137 insertions(+), 14 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java index 3c08e58f9bf2..5cf135a2f838 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java @@ -85,6 +85,10 @@ public XceiverClientManager getXceiverClientManager() { return xceiverClientManager; } + public TokenHelper getTokenHelper() { + return tokenHelper; + } + /** * Reads {@link ContainerProtos.ContainerChecksumInfo} for a specified container for the specified datanode. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index d587748e6f80..55bc1c986c89 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -35,6 +35,7 @@ import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.function.Function; +import java.util.stream.Collectors; import com.google.common.util.concurrent.Striped; import org.apache.hadoop.fs.FileUtil; @@ -54,24 +55,27 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.WriteChunkRequestProto; import org.apache.hadoop.hdds.scm.ByteStringConversion; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.utils.FaultInjector; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.common.Checksum; -import org.apache.hadoop.ozone.common.ChecksumByteBuffer; -import org.apache.hadoop.ozone.common.ChecksumByteBufferFactory; import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.common.OzoneChecksumException; import org.apache.hadoop.ozone.common.utils.BufferUtils; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.checksum.ContainerDiffReport; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; +import org.apache.hadoop.ozone.container.common.helpers.TokenHelper; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -137,8 +141,12 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import static org.apache.hadoop.ozone.ClientVersion.EC_REPLICA_INDEX_REQUIRED_IN_BLOCK_REQUEST; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT; import static org.apache.hadoop.ozone.OzoneConsts.INCREMENTAL_CHUNK_LIST; +import static org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient.createSingleNodePipeline; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Time; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -1427,21 +1435,132 @@ public void deleteContainer(Container container, boolean force) @Override public void reconcileContainer(DNContainerOperationClient dnClient, Container container, Set peers) throws IOException { - // TODO Just a deterministic placeholder hash for testing until actual implementation is finished. - ContainerData data = container.getContainerData(); - long id = data.getContainerID(); - ByteBuffer byteBuffer = ByteBuffer.allocate(Long.BYTES) - .putLong(id) - .asReadOnlyBuffer(); - byteBuffer.rewind(); - ChecksumByteBuffer checksumImpl = ChecksumByteBufferFactory.crc32Impl(); - checksumImpl.update(byteBuffer); - long dataChecksum = checksumImpl.getValue(); - LOG.info("Generated data checksum of container {} for testing: {}", id, dataChecksum); - data.setDataChecksum(dataChecksum); + KeyValueContainer kvContainer = (KeyValueContainer) container; + KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + + for (DatanodeDetails peer : peers) { + ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( + containerData.getContainerID(), peer); + if (peerChecksumInfo == null) { + LOG.warn("Checksum not yet generated for peer: {}", peer); + return; + } + + long scmBlockSize = (long) conf.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT, + StorageUnit.BYTES); + ContainerDiffReport diffReport = checksumManager.diff(containerData, peerChecksumInfo); + TokenHelper tokenHelper = dnClient.getTokenHelper(); + XceiverClientSpi xceiverClient = dnClient.getXceiverClientManager() + .acquireClient(createSingleNodePipeline(peer)); + + try { + // Handle missing blocks + for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { + handleMissingBlock(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, missingBlock); + } + + // Handle missing chunks + for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { + reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, entry); + } + + // Handle corrupt chunks + for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { + reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, entry); + } + } finally { + dnClient.getXceiverClientManager().releaseClient(xceiverClient, false); + } + } + + updateContainerChecksum(containerData); + long dataChecksum = updateContainerChecksum(containerData); + LOG.info("Checksum data for container {} is updated to {}", containerData.getContainerID(), dataChecksum); + containerData.setDataChecksum(dataChecksum); sendICR(container); } + private long updateContainerChecksum(KeyValueContainerData containerData) throws IOException { + ContainerMerkleTree merkleTree = new ContainerMerkleTree(); + try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); + BlockIterator blockIterator = dbHandle.getStore(). + getBlockIterator(containerData.getContainerID())) { + while (blockIterator.hasNext()) { + BlockData blockData = blockIterator.nextBlock(); + List chunkInfos = blockData.getChunks(); + merkleTree.addChunks(blockData.getLocalID(), chunkInfos); + } + } + checksumManager.writeContainerDataTree(containerData, merkleTree); + return merkleTree.toProto().getDataChecksum(); + } + + private void handleMissingBlock(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, + long scmBlockSize, XceiverClientSpi xceiverClient, + ContainerProtos.BlockMerkleTree missingBlock) throws IOException { + BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); + Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); + // TODo: Cache the blockResponse to reuse it again. + ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, + blockToken, new HashMap<>()); + // TODO: Add BcsId in BlockMerkleTree to avoid this call + ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = + ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); + List chunksList = blockResponse.getBlockData().getChunksList(); + + for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { + ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); + writeChunkForClosedContainer(ChunkInfo.getFromProtoBuf(chunkInfoProto), blockID, chunkBuffer, container); + } + + putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), + blockLengthResponse.getBlockLength()); + } + + private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos.ChunkInfo chunkInfoProto, + BlockID blockID, Token blockToken) throws IOException { + ContainerProtos.ReadChunkResponseProto response = + ContainerProtocolCalls.readChunk(xceiverClient, chunkInfoProto, blockID.getDatanodeBlockIDProtobuf(), + null, blockToken); + + if (response.hasData()) { + return response.getData(); + } else if (response.hasDataBuffers()) { + return BufferUtils.concatByteStrings(response.getDataBuffers().getBuffersList()); + } else { + throw new IOException("Error reading chunk data: No data returned."); + } + } + + private void reconcileChunk(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, + long scmBlockSize, XceiverClientSpi xceiverClient, + Map.Entry> mapEntry) throws IOException { + long blockId = mapEntry.getKey(); + List chunkList = mapEntry.getValue(); + Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) + .collect(Collectors.toSet()); + BlockID blockID = new BlockID(containerData.getContainerID(), blockId); + Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); + ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, + blockToken, new HashMap<>()); + // TODO: Add BcsId in BlockMerkleTree to avoid this call + ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = + ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); + List chunksList = blockResponse.getBlockData().getChunksList(); + + for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { + if (offsets.contains(chunkInfoProto.getOffset())) { + ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); + writeChunkForClosedContainer(ChunkInfo.getFromProtoBuf(chunkInfoProto), blockID, chunkBuffer, container); + } + } + + putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), + blockLengthResponse.getBlockLength()); + } + /** * Called by BlockDeletingService to delete all the chunks in a block * before proceeding to delete the block info from DB. From 99de480d2f3ebbeea30528c9948a28b477e9fc6f Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 6 Jan 2025 13:21:44 -0800 Subject: [PATCH 02/21] HDDS-11763. Added test cases and fixed bugs. --- .../statemachine/DatanodeStateMachine.java | 7 +- .../container/keyvalue/KeyValueHandler.java | 17 +- .../keyvalue/impl/BlockManagerImpl.java | 2 +- .../container/ozoneimpl/OzoneContainer.java | 4 + .../TestContainerCommandReconciliation.java | 178 +++++++++++++++++- 5 files changed, 192 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index 9d157cc99129..c2773f401a40 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -133,6 +133,7 @@ public class DatanodeStateMachine implements Closeable { private final DatanodeQueueMetrics queueMetrics; private final ReconfigurationHandler reconfigurationHandler; + private final DNContainerOperationClient dnClient; /** * Constructs a datanode state machine. * @param datanodeDetails - DatanodeDetails used to identify a datanode @@ -230,7 +231,7 @@ public DatanodeStateMachine(HddsDatanodeService hddsDatanodeService, // TODO HDDS-11218 combine the clients used for reconstruction and reconciliation so they share the same cache of // datanode clients. - DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient); + dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient); ThreadFactory threadFactory = new ThreadFactoryBuilder() .setNameFormat(threadNamePrefix + "PipelineCommandHandlerThread-%d") @@ -753,4 +754,8 @@ public DatanodeQueueMetrics getQueueMetrics() { public ReconfigurationHandler getReconfigurationHandler() { return reconfigurationHandler; } + + public DNContainerOperationClient getDnContainerOperationClientClient() { + return dnClient; + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 55bc1c986c89..435bf4e70acc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hdds.utils.FaultInjector; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.common.OzoneChecksumException; @@ -555,7 +556,7 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } - private void createContainerMerkleTree(Container container) { + public void createContainerMerkleTree(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { return; } @@ -1075,7 +1076,7 @@ public void putBlockForClosedContainer(List chunkInfo // To be set from the Replica's BCSId blockData.setBlockCommitSequenceId(blockCommitSequenceId); - blockManager.putBlock(kvContainer, blockData, false); + blockManager.putBlock(kvContainer, blockData, true); ContainerProtos.BlockData blockDataProto = blockData.getProtoBufMessage(); final long numBytes = blockDataProto.getSerializedSize(); // Increment write stats for PutBlock after write. @@ -1468,12 +1469,12 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, entry); } + updateContainerChecksum(containerData); } finally { dnClient.getXceiverClientManager().releaseClient(xceiverClient, false); } } - updateContainerChecksum(containerData); long dataChecksum = updateContainerChecksum(containerData); LOG.info("Checksum data for container {} is updated to {}", containerData.getContainerID(), dataChecksum); containerData.setDataChecksum(dataChecksum); @@ -1500,7 +1501,7 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta ContainerProtos.BlockMerkleTree missingBlock) throws IOException { BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); - // TODo: Cache the blockResponse to reuse it again. + // TODO: Cache the blockResponse to reuse it again. ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); // TODO: Add BcsId in BlockMerkleTree to avoid this call @@ -1511,7 +1512,9 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); - writeChunkForClosedContainer(ChunkInfo.getFromProtoBuf(chunkInfoProto), blockID, chunkBuffer, container); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); + writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); } putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), @@ -1553,7 +1556,9 @@ private void reconcileChunk(KeyValueContainer container, ContainerData container if (offsets.contains(chunkInfoProto.getOffset())) { ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); - writeChunkForClosedContainer(ChunkInfo.getFromProtoBuf(chunkInfoProto), blockID, chunkBuffer, container); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); + writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index 6232b843567c..bcc38cf876b0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -121,7 +121,7 @@ public long persistPutBlock(KeyValueContainer container, // default blockCommitSequenceId for any block is 0. It the putBlock // request is not coming via Ratis(for test scenarios), it will be 0. // In such cases, we should overwrite the block as well - if ((bcsId != 0) && (bcsId <= containerBCSId)) { + if ((bcsId != 0) && (bcsId < containerBCSId)) { // Since the blockCommitSequenceId stored in the db is greater than // equal to blockCommitSequenceId to be updated, it means the putBlock // transaction is reapplied in the ContainerStateMachine on restart. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 8ae838a7e536..bd686ee903ea 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -615,6 +615,10 @@ public ReplicationServer getReplicationServer() { return replicationServer; } + public ContainerChecksumTreeManager getChecksumTreeManager() { + return checksumTreeManager; + } + public void compactDb() { for (StorageVolume volume : volumeSet.getVolumesList()) { HddsVolume hddsVolume = (HddsVolume) volume; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 64af0148a872..ff15fc9e3912 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -18,8 +18,17 @@ package org.apache.hadoop.ozone.dn.checksum; +import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -35,7 +44,6 @@ import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.interfaces.Container; @@ -44,6 +52,7 @@ import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -51,17 +60,26 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; +import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -87,6 +105,8 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); + conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 1024 * 1024, StorageUnit.BYTES); + conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 2 * 1024 * 1024, StorageUnit.BYTES); // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); cluster = MiniOzoneCluster.newBuilder(conf) @@ -155,7 +175,7 @@ public void testGetChecksumInfoNonexistentFile() throws Exception { HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() .getContainerSet().getContainer(containerID); - File treeFile = ContainerChecksumTreeManager.getContainerChecksumFile(container.getContainerData()); + File treeFile = getContainerChecksumFile(container.getContainerData()); // Closing the container should have generated the tree file. assertTrue(treeFile.exists()); assertTrue(treeFile.delete()); @@ -178,7 +198,7 @@ public void testGetChecksumInfoServerIOError() throws Exception { HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() .getContainerSet().getContainer(containerID); - File treeFile = ContainerChecksumTreeManager.getContainerChecksumFile(container.getContainerData()); + File treeFile = getContainerChecksumFile(container.getContainerData()); assertTrue(treeFile.exists()); // Make the server unable to read the file. assertTrue(treeFile.setReadable(false)); @@ -201,7 +221,7 @@ public void testGetCorruptChecksumInfo() throws Exception { HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() .getContainerSet().getContainer(containerID); - File treeFile = ContainerChecksumTreeManager.getContainerChecksumFile(container.getContainerData()); + File treeFile = getContainerChecksumFile(container.getContainerData()); Files.write(treeFile.toPath(), new byte[]{1, 2, 3}, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); @@ -218,7 +238,7 @@ public void testGetEmptyChecksumInfo() throws Exception { HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() .getContainerSet().getContainer(containerID); - File treeFile = ContainerChecksumTreeManager.getContainerChecksumFile(container.getContainerData()); + File treeFile = getContainerChecksumFile(container.getContainerData()); // TODO After HDDS-10379 the file will already exist and need to be overwritten. assertTrue(treeFile.exists()); Files.write(treeFile.toPath(), new byte[]{}, @@ -252,7 +272,145 @@ public void testGetChecksumInfoSuccess() throws Exception { } } - private long writeDataAndGetContainer(boolean close) throws Exception { + @Test + public void testContainerChecksumWithBlockMissing() throws Exception { + // 1. Write data to a container. + long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); + Set peerNodes = cluster.getHddsDatanodes().stream().map( + HddsDatanodeService::getDatanodeDetails).collect(Collectors.toSet()); + HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); + DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); + Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); + KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + ContainerProtos.ContainerChecksumInfo oldContainerChecksumInfo = readChecksumFile(container.getContainerData()); + KeyValueHandler kvHandler = (KeyValueHandler) datanodeStateMachine.getContainer().getDispatcher() + .getHandler(ContainerProtos.ContainerType.KeyValueContainer); + + BlockManager blockManager = kvHandler.getBlockManager(); + List blockDatas = blockManager.listBlock(container, -1, 100); + List deletedBlocks = new ArrayList<>(); + String chunksPath = container.getContainerData().getChunksPath(); + long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); + + // 2. Delete some blocks to simulate missing blocks. + try (DBHandle db = BlockUtils.getDB(containerData, conf); + BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { + for (int i = 0; i < blockDatas.size(); i += 2) { + BlockData blockData = blockDatas.get(i); + // Delete the block metadata from the container db + db.getStore().getBlockDataTable().deleteWithBatch(op, containerData.getBlockKey(blockData.getLocalID())); + // Delete the block file. + Files.deleteIfExists(Paths.get(chunksPath + "/" + blockData.getBlockID().getLocalID() + ".block")); + deletedBlocks.add(blockData); + } + db.getStore().getBatchHandler().commitBatchOperation(op); + db.getStore().flushDB(); + } + + Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); + kvHandler.createContainerMerkleTree(container); + ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = + readChecksumFile(container.getContainerData()); + long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); + // Checksum should have changed after block delete. + Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); + + // 3. Reconcile the container. + kvHandler.reconcileContainer(datanodeStateMachine.getDnContainerOperationClientClient(), container, peerNodes); + ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); + long newDataChecksum = newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); + assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), + newContainerChecksumInfo.getContainerMerkleTree()); + Assertions.assertEquals(oldDataChecksum, newDataChecksum); + } + + @Test + public void testContainerChecksumChunkCorruption() throws Exception { + // 1. Write data to a container. + long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); + Set peerNodes = cluster.getHddsDatanodes().stream().map( + HddsDatanodeService::getDatanodeDetails).collect(Collectors.toSet()); + HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); + DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); + Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); + KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + ContainerProtos.ContainerChecksumInfo oldContainerChecksumInfo = readChecksumFile(container.getContainerData()); + KeyValueHandler kvHandler = (KeyValueHandler) datanodeStateMachine.getContainer().getDispatcher() + .getHandler(ContainerProtos.ContainerType.KeyValueContainer); + + BlockManager blockManager = kvHandler.getBlockManager(); + List blockDatas = blockManager.listBlock(container, -1, 100); + long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); + + // 2. Corrupt first chunk for all the blocks + try (DBHandle db = BlockUtils.getDB(containerData, conf); + BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { + for (BlockData blockData : blockDatas) { + // Modify the block metadata to simulate chunk corruption. + ContainerProtos.BlockData.Builder blockDataBuilder = blockData.getProtoBufMessage().toBuilder(); + blockDataBuilder.clearChunks(); + + ContainerProtos.ChunkInfo chunkInfo = blockData.getChunks().get(0); + ContainerProtos.ChecksumData.Builder checksumDataBuilder = ContainerProtos.ChecksumData.newBuilder() + .setBytesPerChecksum(chunkInfo.getChecksumData().getBytesPerChecksum()) + .setType(chunkInfo.getChecksumData().getType()); + + for (ByteString checksum : chunkInfo.getChecksumData().getChecksumsList()) { + byte[] checksumBytes = checksum.toByteArray(); + // Modify the checksum bytes to simulate corruption. + checksumBytes[0] = (byte) (checksumBytes[0] - 1); + checksumDataBuilder.addChecksums(ByteString.copyFrom(checksumBytes)).build(); + } + chunkInfo = chunkInfo.toBuilder().setChecksumData(checksumDataBuilder.build()).build(); + blockDataBuilder.addChunks(chunkInfo); + for (int i = 1; i < blockData.getChunks().size(); i++) { + blockDataBuilder.addChunks(blockData.getChunks().get(i)); + } + + // Modify the block metadata from the container db to simulate chunk corruption. + db.getStore().getBlockDataTable().putWithBatch(op, containerData.getBlockKey(blockData.getLocalID()), + BlockData.getFromProtoBuf(blockDataBuilder.build())); + } + db.getStore().getBatchHandler().commitBatchOperation(op); + db.getStore().flushDB(); + } + + Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); + kvHandler.createContainerMerkleTree(container); + // To set unhealthy for chunks that are corrupted. + ContainerProtos.ContainerChecksumInfo containerChecksumAfterChunkCorruption = + readChecksumFile(container.getContainerData()); + long dataChecksumAfterAfterChunkCorruption = containerChecksumAfterChunkCorruption + .getContainerMerkleTree().getDataChecksum(); + // Checksum should have changed after chunk corruption. + Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterAfterChunkCorruption); + + // 3. Set Unhealthy for first chunk of all blocks. This should be done by the scanner, Until then this is a + // manual step. + ContainerProtos.ContainerChecksumInfo.Builder builder = containerChecksumAfterChunkCorruption.toBuilder(); + List blockMerkleTreeList = builder.getContainerMerkleTree() + .getBlockMerkleTreeList(); + builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); + for (ContainerProtos.BlockMerkleTree blockMerkleTree : blockMerkleTreeList) { + ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTree.toBuilder(); + List chunkMerkleTreeBuilderList = + blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); + chunkMerkleTreeBuilderList.get(0).setIsHealthy(false); + builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); + } + Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); + writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); + + // 4. Reconcile the container. + kvHandler.reconcileContainer(datanodeStateMachine.getDnContainerOperationClientClient(), container, peerNodes); + ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); + long newDataChecksum = newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); + assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), + newContainerChecksumInfo.getContainerMerkleTree()); + Assertions.assertEquals(oldDataChecksum, newDataChecksum); + } + + private long writeDataAndGetContainer(boolean close, int dataLen) throws Exception { String volumeName = UUID.randomUUID().toString(); String bucketName = UUID.randomUUID().toString(); store.createVolume(volumeName); @@ -260,9 +418,9 @@ private long writeDataAndGetContainer(boolean close) throws Exception { volume.createBucket(bucketName); OzoneBucket bucket = volume.getBucket(bucketName); - byte[] data = "Test content".getBytes(UTF_8); + byte[] data = randomAlphabetic(dataLen).getBytes(UTF_8); // Write Key - try (OzoneOutputStream os = TestHelper.createKey("testkey", RATIS, THREE, 0, store, volumeName, bucketName)) { + try (OzoneOutputStream os = TestHelper.createKey("testkey", RATIS, THREE, dataLen, store, volumeName, bucketName)) { IOUtils.write(data, os); } @@ -274,6 +432,10 @@ private long writeDataAndGetContainer(boolean close) throws Exception { return containerID; } + private long writeDataAndGetContainer(boolean close) throws Exception { + return writeDataAndGetContainer(close, 5); + } + public static void writeChecksumFileToDatanodes(long containerID, ContainerMerkleTree tree) throws Exception { // Write Container Merkle Tree for (HddsDatanodeService dn : cluster.getHddsDatanodes()) { From f8606fed87111a8385ff00788f88aa539f1776f8 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 8 Jan 2025 12:30:14 -0800 Subject: [PATCH 03/21] HDDS-11763. Fix tests. --- .../ContainerMerkleTreeTestUtils.java | 32 +++++++++++++++++++ .../TestReconcileContainerCommandHandler.java | 14 ++++++++ .../keyvalue/TestKeyValueHandler.java | 4 +++ 3 files changed, 50 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index db2a8c319b67..e3626df8c1c8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -17,16 +17,25 @@ */ package org.apache.hadoop.ozone.container.checksum; +import com.google.common.collect.Lists; import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.container.ContainerTestHelper; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -355,4 +364,27 @@ public static void writeContainerDataTreeProto(ContainerData data, ContainerProt + data.getContainerID(), ex); } } + + /** + * Creates block metadata for the given container with the specified number of blocks and chunks per block. + */ + public static void createBlockMetaData(KeyValueContainerData data, int numOfBlocksPerContainer, + int numOfChunksPerBlock) throws IOException { + try (DBHandle metadata = BlockUtils.getDB(data, new OzoneConfiguration())) { + for (int j = 0; j < numOfBlocksPerContainer; j++) { + BlockID blockID = new BlockID(data.getContainerID(), j); + String blockKey = data.getBlockKey(blockID.getLocalID()); + BlockData kd = new BlockData(blockID); + List chunks = Lists.newArrayList(); + for (int k = 0; k < numOfChunksPerBlock; k++) { + long dalaLen = 10L; + ChunkInfo chunkInfo = ContainerTestHelper.getChunk(blockID.getLocalID(), k, k * dalaLen, dalaLen); + ContainerTestHelper.setDataChecksum(chunkInfo, ContainerTestHelper.getData((int) dalaLen)); + chunks.add(chunkInfo.getProtoBufMessage()); + } + kd.setChunks(chunks); + metadata.getStore().getBlockDataTable().put(blockKey, kd); + } + } + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java index f27ed097d2f7..18f75ee1f717 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java @@ -44,9 +44,11 @@ import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.hadoop.ozone.container.replication.ReplicationSupervisor; import org.apache.hadoop.ozone.protocol.commands.ReconcileContainerCommand; +import org.junit.jupiter.api.io.TempDir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.nio.file.Path; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -54,6 +56,7 @@ import static java.util.Collections.singletonMap; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; import static org.apache.hadoop.ozone.OzoneConsts.GB; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.createBlockMetaData; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -74,6 +77,10 @@ public class TestReconcileContainerCommandHandler { private OzoneContainer ozoneContainer; private StateContext context; private ReconcileContainerCommandHandler subject; + @TempDir + private Path tempDir; + @TempDir + private Path dbFile; public void init(ContainerLayoutVersion layout, IncrementalReportSender icrSender) throws Exception { @@ -94,6 +101,8 @@ public void init(ContainerLayoutVersion layout, IncrementalReportSender Date: Fri, 24 Jan 2025 16:12:10 -0800 Subject: [PATCH 04/21] Address partial review comments. --- .../org/apache/hadoop/hdds/HddsUtils.java | 5 ++ .../scm/container/ContainerReplicaInfo.java | 4 +- .../ContainerChecksumTreeManager.java | 2 +- .../statemachine/DatanodeStateMachine.java | 1 + .../common/utils/ContainerLogger.java | 14 +++++ .../container/keyvalue/KeyValueHandler.java | 61 ++++++++++--------- .../container/ozoneimpl/OzoneContainer.java | 4 -- .../TestContainerCommandReconciliation.java | 1 + 8 files changed, 58 insertions(+), 34 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java index 42aaa18a3176..a009b9e44889 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java @@ -886,6 +886,11 @@ public static HddsProtos.UUID toProtobuf(UUID uuid) { : null; } + /** @return Hex string representation of {@code value} */ + public static String getHexString(long value) { + return Long.toHexString(value); + } + /** * Logs a warning to report that the class is not closed properly. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java index a239cbfdba96..fafe64f3d0f9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.util.UUID; +import static org.apache.hadoop.hdds.HddsUtils.getHexString; + /** * Class which stores ContainerReplica details on the client. */ @@ -102,7 +104,7 @@ public long getDataChecksum() { private static class LongToHexJsonSerializer extends JsonSerializer { @Override public void serialize(Long value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeString(Long.toHexString(value)); + gen.writeString(getHexString(value)); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 2c10313c2fc9..e42e833984bb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -315,7 +315,7 @@ private Lock getLock(long containerID) { * Callers are not required to hold a lock while calling this since writes are done to a tmp file and atomically * swapped into place. */ - private Optional read(ContainerData data) throws IOException { + public Optional read(ContainerData data) throws IOException { long containerID = data.getContainerID(); File checksumFile = getContainerChecksumFile(data); try { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index c2773f401a40..02b4ea8f8dc8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -755,6 +755,7 @@ public ReconfigurationHandler getReconfigurationHandler() { return reconfigurationHandler; } + @VisibleForTesting public DNContainerOperationClient getDnContainerOperationClientClient() { return dnClient; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java index 92940b01940c..b3362f7d5227 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java @@ -24,6 +24,8 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import static org.apache.hadoop.hdds.HddsUtils.getHexString; + /** * Utility class defining methods to write to the datanode container log. * @@ -146,6 +148,18 @@ public static void logRecovered(ContainerData containerData) { LOG.info(getMessage(containerData)); } + /** + * Logged when a container is reconciled. + * + * @param containerData The container that was reconciled on this datanode. + * @param oldDataChecksum The old data checksum. + * @param newDataChecksum The new data checksum. + */ + public static void logReconciled(ContainerData containerData, long oldDataChecksum, long newDataChecksum) { + LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " + getHexString(oldDataChecksum) + + " , New checksum is " + getHexString(newDataChecksum))); + } + private static String getMessage(ContainerData containerData, String message) { return String.join(FIELD_SEPARATOR, getMessage(containerData), message); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index bc45ebb3e2e0..47794bb246ed 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -32,6 +32,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.function.Function; @@ -593,17 +594,7 @@ public void createContainerMerkleTree(Container container) { try { KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - ContainerMerkleTree merkleTree = new ContainerMerkleTree(); - try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); - BlockIterator blockIterator = dbHandle.getStore(). - getBlockIterator(containerData.getContainerID())) { - while (blockIterator.hasNext()) { - BlockData blockData = blockIterator.nextBlock(); - List chunkInfos = blockData.getChunks(); - merkleTree.addChunks(blockData.getLocalID(), chunkInfos); - } - } - checksumManager.writeContainerDataTree(containerData, merkleTree); + updateContainerChecksum(containerData); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); @@ -1458,13 +1449,20 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container Set peers) throws IOException { KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + Optional checksumInfo = checksumManager.read(containerData); + long oldDataChecksum = 0; + + if (checksumInfo.isPresent()) { + oldDataChecksum = checksumInfo.get().getContainerMerkleTree().getDataChecksum(); + } for (DatanodeDetails peer : peers) { ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( containerData.getContainerID(), peer); if (peerChecksumInfo == null) { - LOG.warn("Checksum not yet generated for peer: {}", peer); - return; + LOG.warn("Cannot reconcile container {} with peer {} which has not yet generated a checksum", + containerData.getContainerID(), peer); + continue; } long scmBlockSize = (long) conf.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT, @@ -1480,14 +1478,17 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container handleMissingBlock(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, missingBlock); } + // Handling missing chunks and corrupt chunks are done the same way. Separate here to differentiate them. // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { - reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, entry); + reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, + entry.getKey(), entry.getValue()); } // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { - reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, entry); + reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, + entry.getKey(), entry.getValue()); } updateContainerChecksum(containerData); } finally { @@ -1497,7 +1498,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container long dataChecksum = updateContainerChecksum(containerData); LOG.info("Checksum data for container {} is updated to {}", containerData.getContainerID(), dataChecksum); - containerData.setDataChecksum(dataChecksum); + ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, dataChecksum); sendICR(container); } @@ -1513,7 +1514,9 @@ private long updateContainerChecksum(KeyValueContainerData containerData) throws } } checksumManager.writeContainerDataTree(containerData, merkleTree); - return merkleTree.toProto().getDataChecksum(); + long dataChecksum = merkleTree.toProto().getDataChecksum(); + containerData.setDataChecksum(dataChecksum); + return dataChecksum; } private void handleMissingBlock(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, @@ -1524,9 +1527,11 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta // TODO: Cache the blockResponse to reuse it again. ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); - // TODO: Add BcsId in BlockMerkleTree to avoid this call ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); + long blockCommitSequenceId = getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId(); + // Check the local bcsId with the one from the bcsId from the peer datanode. + long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); List chunksList = blockResponse.getBlockData().getChunksList(); for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { @@ -1538,7 +1543,7 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta } putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), - blockLengthResponse.getBlockLength()); + maxBlockCommitSequenceId); } private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos.ChunkInfo chunkInfoProto, @@ -1557,22 +1562,22 @@ private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos } private void reconcileChunk(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, - long scmBlockSize, XceiverClientSpi xceiverClient, - Map.Entry> mapEntry) throws IOException { - long blockId = mapEntry.getKey(); - List chunkList = mapEntry.getValue(); + long scmBlockSize, XceiverClientSpi xceiverClient, long blockId, + List chunkList) throws IOException { Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) .collect(Collectors.toSet()); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); - // TODO: Add BcsId in BlockMerkleTree to avoid this call ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); - List chunksList = blockResponse.getBlockData().getChunksList(); + long blockCommitSequenceId = getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId(); + // Check the local bcsId with the one from the bcsId from the peer datanode. + long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); + List chunksListFromPeer = blockResponse.getBlockData().getChunksList(); - for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { + for (ContainerProtos.ChunkInfo chunkInfoProto : chunksListFromPeer) { if (offsets.contains(chunkInfoProto.getOffset())) { ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); @@ -1582,8 +1587,8 @@ private void reconcileChunk(KeyValueContainer container, ContainerData container } } - putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), - blockLengthResponse.getBlockLength()); + putBlockForClosedContainer(chunksListFromPeer, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), + maxBlockCommitSequenceId); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index ec211c941318..c6eaeebfd4a7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -654,10 +654,6 @@ public ReplicationServer getReplicationServer() { return replicationServer; } - public ContainerChecksumTreeManager getChecksumTreeManager() { - return checksumTreeManager; - } - public void compactDb() { for (StorageVolume volume : volumeSet.getVolumesList()) { HddsVolume hddsVolume = (HddsVolume) volume; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index ff15fc9e3912..1d8542baf6c1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -307,6 +307,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { db.getStore().flushDB(); } + // TODO: Use On-demand container scanner to build the new container merkle tree. Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); kvHandler.createContainerMerkleTree(container); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = From 04aa8a04ded81c7a90b0ebd05ca78221591b7011 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 31 Jan 2025 13:01:31 -0800 Subject: [PATCH 05/21] Address review comments. --- .../container/keyvalue/KeyValueHandler.java | 6 +- .../keyvalue/impl/BlockManagerImpl.java | 13 +++++ .../keyvalue/interfaces/BlockManager.java | 9 +++ .../keyvalue/TestKeyValueHandler.java | 12 +++- .../hadoop/ozone/container/TestHelper.java | 29 ++++++++++ .../TestContainerCommandReconciliation.java | 55 +++++++++++++++---- 6 files changed, 110 insertions(+), 14 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 47794bb246ed..3ea143f4e5f4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1529,7 +1529,8 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta blockToken, new HashMap<>()); ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); - long blockCommitSequenceId = getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId(); + long blockCommitSequenceId = getBlockManager().blockExists(container, blockID) ? + getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId() : 0; // Check the local bcsId with the one from the bcsId from the peer datanode. long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); List chunksList = blockResponse.getBlockData().getChunksList(); @@ -1572,7 +1573,8 @@ private void reconcileChunk(KeyValueContainer container, ContainerData container blockToken, new HashMap<>()); ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); - long blockCommitSequenceId = getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId(); + long blockCommitSequenceId = getBlockManager().blockExists(container, blockID) ? + getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId() : 0; // Check the local bcsId with the one from the bcsId from the peer datanode. long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); List chunksListFromPeer = blockResponse.getBlockData().getChunksList(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index bcc38cf876b0..f3486acd7df9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -361,6 +361,19 @@ public List listBlock(Container container, long startLocalID, int } } + @Override + public boolean blockExists(Container container, BlockID blockID) throws IOException { + KeyValueContainerData containerData = (KeyValueContainerData) container + .getContainerData(); + try (DBHandle db = BlockUtils.getDB(containerData, config)) { + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, DB_NULL_ERR_MSG); + String blockKey = containerData.getBlockKey(blockID.getLocalID()); + return db.getStore().getBlockDataTable().isExist(blockKey); + } + } + /** * Shutdown KeyValueContainerManager. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java index 256d357a31dc..658c27bcf3ba 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java @@ -80,6 +80,15 @@ long putBlock(Container container, BlockData data, boolean endOfBlock) List listBlock(Container container, long startLocalID, int count) throws IOException; + /** + * Check if a block exists in the container. + * + * @param container - Container from which blocks need to be listed. + * @param blockID - BlockID of the Block. + * @return True if block exists, false otherwise. + */ + boolean blockExists(Container container, BlockID blockID) throws IOException; + /** * Returns last committed length of the block. * diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 32d2eb576253..2d13799f3b41 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; @@ -60,6 +61,7 @@ import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.util.Sets; import org.apache.ozone.test.GenericTestUtils; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSING_POLICY; @@ -77,6 +79,7 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.any; import org.junit.jupiter.api.Assertions; @@ -498,7 +501,14 @@ public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws Assertions.assertEquals(0, icrCount.get()); // This should trigger container report validation in the ICR handler above. - keyValueHandler.reconcileContainer(mock(DNContainerOperationClient.class), container, Collections.emptySet()); + DNContainerOperationClient mockDnClient = mock(DNContainerOperationClient.class); + DatanodeDetails peer1 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails peer2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails peer3 = MockDatanodeDetails.randomDatanodeDetails(); + when(mockDnClient.getContainerChecksumInfo(anyLong(), any())).thenReturn(null); + keyValueHandler.reconcileContainer(mockDnClient, container, Sets.newHashSet(peer1, peer2, peer3)); + // Make sure all the replicas are used for reconciliation. + Mockito.verify(mockDnClient, times(3)).getContainerChecksumInfo(anyLong(), any()); Assertions.assertEquals(1, icrCount.get()); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java index cb8173b2f075..0a18e3260d64 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.ratis.RatisHelper; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -380,6 +381,34 @@ public static void waitForContainerClose(MiniOzoneCluster cluster, } } + public static void waitForScmContainerState(MiniOzoneCluster cluster, long containerID, + HddsProtos.LifeCycleState lifeCycleState) + throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(() -> { + try { + HddsProtos.LifeCycleState state = cluster.getStorageContainerManager().getContainerManager() + .getContainer(ContainerID.valueOf(containerID)).getState(); + return state == lifeCycleState; + } catch (ContainerNotFoundException e) { + return false; + } + }, 500, 100 * 1000); + } + + public static void waitForReplicasContainerState(MiniOzoneCluster cluster, long containerID, + ContainerReplicaProto.State state) + throws InterruptedException, TimeoutException { + GenericTestUtils.waitFor(() -> { + try { + Set replicas = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)); + return replicas.stream().allMatch(r -> r.getState() == state); + } catch (ContainerNotFoundException e) { + return false; + } + }, 500, 100 * 1000); + } + public static StateMachine getStateMachine(MiniOzoneCluster cluster) throws Exception { return getStateMachine(cluster.getHddsDatanodes().get(0), null); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 1d8542baf6c1..57357f810fee 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -19,9 +19,12 @@ package org.apache.hadoop.ozone.dn.checksum; import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; @@ -110,7 +113,7 @@ public static void init() throws Exception { // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(3) + .setNumDatanodes(5) .build(); cluster.waitForClusterToBeReady(); rpcClient = OzoneClientFactory.getRpcClient(conf); @@ -124,6 +127,10 @@ public static void stop() throws IOException { rpcClient.close(); } + if (dnClient != null) { + dnClient.close(); + } + if (cluster != null) { cluster.shutdown(); } @@ -276,8 +283,7 @@ public void testGetChecksumInfoSuccess() throws Exception { public void testContainerChecksumWithBlockMissing() throws Exception { // 1. Write data to a container. long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); - Set peerNodes = cluster.getHddsDatanodes().stream().map( - HddsDatanodeService::getDatanodeDetails).collect(Collectors.toSet()); + TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); @@ -317,20 +323,32 @@ public void testContainerChecksumWithBlockMissing() throws Exception { Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); // 3. Reconcile the container. - kvHandler.reconcileContainer(datanodeStateMachine.getDnContainerOperationClientClient(), container, peerNodes); + cluster.getStorageContainerManager().getClientProtocolServer().reconcileContainer(containerID); + GenericTestUtils.waitFor(() -> { + try { + ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); + return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum; + } catch (Exception ex) { + return false; + } + }, 500, 20000); ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - long newDataChecksum = newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); - Assertions.assertEquals(oldDataChecksum, newDataChecksum); + List containerReplicas = cluster.getStorageContainerManager() + .getClientProtocolServer().getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); + // Compare and check if dataChecksum is same on all replicas. + Set dataChecksums = containerReplicas.stream() + .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) + .collect(Collectors.toSet()); + assertEquals(1, dataChecksums.size()); } @Test public void testContainerChecksumChunkCorruption() throws Exception { // 1. Write data to a container. long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); - Set peerNodes = cluster.getHddsDatanodes().stream().map( - HddsDatanodeService::getDatanodeDetails).collect(Collectors.toSet()); + TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); @@ -403,12 +421,26 @@ public void testContainerChecksumChunkCorruption() throws Exception { writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); // 4. Reconcile the container. - kvHandler.reconcileContainer(datanodeStateMachine.getDnContainerOperationClientClient(), container, peerNodes); + cluster.getStorageContainerManager().getClientProtocolServer().reconcileContainer(containerID); + GenericTestUtils.waitFor(() -> { + try { + ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); + return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum; + } catch (Exception ex) { + return false; + } + }, 500, 20000); ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - long newDataChecksum = newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); - Assertions.assertEquals(oldDataChecksum, newDataChecksum); + Assertions.assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + List containerReplicas = cluster.getStorageContainerManager() + .getClientProtocolServer().getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); + // Compare and check if dataChecksum is same on all replicas. + Set dataChecksums = containerReplicas.stream() + .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) + .collect(Collectors.toSet()); + assertEquals(1, dataChecksums.size()); } private long writeDataAndGetContainer(boolean close, int dataLen) throws Exception { @@ -429,6 +461,7 @@ private long writeDataAndGetContainer(boolean close, int dataLen) throws Excepti .findFirst().get().getContainerID(); if (close) { TestHelper.waitForContainerClose(cluster, containerID); + TestHelper.waitForScmContainerState(cluster, containerID, HddsProtos.LifeCycleState.CLOSED); } return containerID; } From 112762d2b4a59ed2660021f88ef2884012e45ce9 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Tue, 18 Feb 2025 12:47:26 -0800 Subject: [PATCH 06/21] Address review. --- .../org/apache/hadoop/hdds/HddsUtils.java | 2 +- .../scm/container/ContainerReplicaInfo.java | 4 +- .../common/helpers/ContainerMetrics.java | 10 ++ .../common/utils/ContainerLogger.java | 11 +- .../container/keyvalue/KeyValueHandler.java | 152 ++++++++++++------ .../keyvalue/impl/BlockManagerImpl.java | 82 +++++++++- .../keyvalue/interfaces/BlockManager.java | 11 ++ .../impl/TestFilePerBlockStrategy.java | 13 +- .../TestContainerCommandReconciliation.java | 96 ++++++++--- 9 files changed, 293 insertions(+), 88 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java index a009b9e44889..9435cbc5430f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java @@ -887,7 +887,7 @@ public static HddsProtos.UUID toProtobuf(UUID uuid) { } /** @return Hex string representation of {@code value} */ - public static String getHexString(long value) { + public static String checksumToString(long value) { return Long.toHexString(value); } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java index fafe64f3d0f9..e158712fd619 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaInfo.java @@ -27,7 +27,7 @@ import java.io.IOException; import java.util.UUID; -import static org.apache.hadoop.hdds.HddsUtils.getHexString; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; /** * Class which stores ContainerReplica details on the client. @@ -104,7 +104,7 @@ public long getDataChecksum() { private static class LongToHexJsonSerializer extends JsonSerializer { @Override public void serialize(Long value, JsonGenerator gen, SerializerProvider provider) throws IOException { - gen.writeString(getHexString(value)); + gen.writeString(checksumToString(value)); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java index 03dbce061bb2..eb7e3751071b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerMetrics.java @@ -58,6 +58,8 @@ public class ContainerMetrics implements Closeable { @Metric private MutableCounterLong containerForceDelete; @Metric private MutableCounterLong numReadStateMachine; @Metric private MutableCounterLong bytesReadStateMachine; + @Metric private MutableCounterLong numContainerReconciledWithoutChanges; + @Metric private MutableCounterLong numContainerReconciledWithChanges; private final EnumMap numOpsArray; @@ -174,4 +176,12 @@ public void incBytesReadStateMachine(long bytes) { public long getBytesReadStateMachine() { return bytesReadStateMachine.value(); } + + public void incContainerReconciledWithoutChanges() { + numContainerReconciledWithoutChanges.incr(); + } + + public void incContainerReconciledWithChanges() { + numContainerReconciledWithChanges.incr(); + } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java index b3362f7d5227..7cb340aa6c2c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java @@ -24,7 +24,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; -import static org.apache.hadoop.hdds.HddsUtils.getHexString; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; /** * Utility class defining methods to write to the datanode container log. @@ -153,11 +153,9 @@ public static void logRecovered(ContainerData containerData) { * * @param containerData The container that was reconciled on this datanode. * @param oldDataChecksum The old data checksum. - * @param newDataChecksum The new data checksum. */ - public static void logReconciled(ContainerData containerData, long oldDataChecksum, long newDataChecksum) { - LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " + getHexString(oldDataChecksum) + - " , New checksum is " + getHexString(newDataChecksum))); + public static void logReconciled(ContainerData containerData, long oldDataChecksum) { + LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " + checksumToString(oldDataChecksum))); } private static String getMessage(ContainerData containerData, @@ -170,6 +168,7 @@ private static String getMessage(ContainerData containerData) { "ID=" + containerData.getContainerID(), "Index=" + containerData.getReplicaIndex(), "BCSID=" + containerData.getBlockCommitSequenceId(), - "State=" + containerData.getState()); + "State=" + containerData.getState(), + "DataChecksum=" + checksumToString(containerData.getDataChecksum())); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 2c7b9ed7a850..b097922252f0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -35,6 +35,8 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; import java.util.concurrent.locks.Lock; import java.util.function.Function; import java.util.stream.Collectors; @@ -104,11 +106,13 @@ import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerFactory; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; +import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.QUASI_CLOSED; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; @@ -147,8 +151,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE_DEFAULT; import static org.apache.hadoop.ozone.OzoneConsts.INCREMENTAL_CHUNK_LIST; import static org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient.createSingleNodePipeline; import static org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion.DEFAULT_LAYOUT; @@ -1108,9 +1110,9 @@ public void writeChunkForClosedContainer(ChunkInfo chunkInfo, BlockID blockID, * Handle Put Block operation for closed container. Calls BlockManager to process the request. * */ - public void putBlockForClosedContainer(List chunkInfos, KeyValueContainer kvContainer, - BlockData blockData, long blockCommitSequenceId) - throws IOException { + public void putBlockForClosedContainer(KeyValueContainer kvContainer, BlockData blockData, + long blockCommitSequenceId, boolean overwriteBscId) + throws IOException { Preconditions.checkNotNull(kvContainer); Preconditions.checkNotNull(blockData); long startTime = Time.monotonicNowNanos(); @@ -1119,11 +1121,12 @@ public void putBlockForClosedContainer(List chunkInfo throw new IOException("Container #" + kvContainer.getContainerData().getContainerID() + " is not in closed state, Container state is " + kvContainer.getContainerState()); } - blockData.setChunks(chunkInfos); // To be set from the Replica's BCSId - blockData.setBlockCommitSequenceId(blockCommitSequenceId); + if (overwriteBscId) { + blockData.setBlockCommitSequenceId(blockCommitSequenceId); + } - blockManager.putBlock(kvContainer, blockData, true); + blockManager.putBlockForClosedContainer(kvContainer, blockData, overwriteBscId); ContainerProtos.BlockData blockDataProto = blockData.getProtoBufMessage(); final long numBytes = blockDataProto.getSerializedSize(); // Increment write stats for PutBlock after write. @@ -1480,6 +1483,7 @@ public void deleteContainer(Container container, boolean force) deleteInternal(container, force); } + // Update Java Doc steps @Override public void reconcileContainer(DNContainerOperationClient dnClient, Container container, Set peers) throws IOException { @@ -1490,6 +1494,13 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container if (checksumInfo.isPresent()) { oldDataChecksum = checksumInfo.get().getContainerMerkleTree().getDataChecksum(); + } else { + // Try creating the checksum info from RocksDB metadata if it is not present. + createContainerMerkleTree(container); + checksumInfo = checksumManager.read(containerData); + if (checksumInfo.isPresent()) { + oldDataChecksum = checksumInfo.get().getContainerMerkleTree().getDataChecksum(); + } } for (DatanodeDetails peer : peers) { @@ -1501,8 +1512,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container continue; } - long scmBlockSize = (long) conf.getStorageSize(OZONE_SCM_BLOCK_SIZE, OZONE_SCM_BLOCK_SIZE_DEFAULT, - StorageUnit.BYTES); + // Check block token usage. How it is used in DN ContainerDiffReport diffReport = checksumManager.diff(containerData, peerChecksumInfo); TokenHelper tokenHelper = dnClient.getTokenHelper(); XceiverClientSpi xceiverClient = dnClient.getXceiverClientManager() @@ -1511,20 +1521,32 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container try { // Handle missing blocks for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { - handleMissingBlock(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, missingBlock); + try { + handleMissingBlock(kvContainer, containerData, tokenHelper, xceiverClient, missingBlock); + } catch (IOException e) { + LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), + containerData.getContainerID(), e); + } } - // Handling missing chunks and corrupt chunks are done the same way. Separate here to differentiate them. // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { - reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, - entry.getKey(), entry.getValue()); + try { + reconcileChunk(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); + } catch (IOException e) { + LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), + containerData.getContainerID(), e); + } } // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { - reconcileChunk(kvContainer, containerData, tokenHelper, scmBlockSize, xceiverClient, - entry.getKey(), entry.getValue()); + try { + reconcileChunk(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); + } catch (IOException e) { + LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), + containerData.getContainerID(), e); + } } updateContainerChecksum(containerData); } finally { @@ -1532,9 +1554,20 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container } } + // Update checksum based on RocksDB metadata long dataChecksum = updateContainerChecksum(containerData); - LOG.info("Checksum data for container {} is updated to {}", containerData.getContainerID(), dataChecksum); - ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, dataChecksum); + // Trigger manual on demand scanner + OnDemandContainerDataScanner.scanContainer(container); + if (dataChecksum == oldDataChecksum) { + metrics.incContainerReconciledWithoutChanges(); + LOG.info("Container {} reconciled without changes, Current checksum {}", containerData.getContainerID(), + checksumToString(dataChecksum)); + } else { + metrics.incContainerReconciledWithChanges(); + LOG.warn("Container {} reconciled, Checksum updated from {} to {}", containerData.getContainerID(), + checksumToString(oldDataChecksum), checksumToString(dataChecksum)); + } + ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum); sendICR(container); } @@ -1556,31 +1589,42 @@ private long updateContainerChecksum(KeyValueContainerData containerData) throws } private void handleMissingBlock(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, - long scmBlockSize, XceiverClientSpi xceiverClient, - ContainerProtos.BlockMerkleTree missingBlock) throws IOException { + XceiverClientSpi xceiverClient, ContainerProtos.BlockMerkleTree missingBlock) + throws IOException { BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); - Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); - // TODO: Cache the blockResponse to reuse it again. + Token blockToken = tokenHelper.getBlockToken(blockID, 0L); + // TODO: Re-use the blockResponse for the same block again. ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); - ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = - ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); - long blockCommitSequenceId = getBlockManager().blockExists(container, blockID) ? + ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); + long bcsId = getBlockManager().blockExists(container, blockID) ? getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId() : 0; // Check the local bcsId with the one from the bcsId from the peer datanode. - long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); - List chunksList = blockResponse.getBlockData().getChunksList(); - - for (ContainerProtos.ChunkInfo chunkInfoProto : chunksList) { - ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); - chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), bcsId); + List peerChunksList = peerBlockData.getChunksList(); + List successfullChunksList = new ArrayList<>(); + // Update BcsId only if all chunks are successfully written. + boolean overwriteBcsId = true; + + // Don't update bcsId if chunk read fails + for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { + try { + ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); + writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + successfullChunksList.add(chunkInfoProto); + } catch (IOException ex) { + overwriteBcsId = false; + LOG.error("Error while reconciling missing block {} for offset {} in container {}", + blockID, chunkInfoProto.getOffset(), containerData.getContainerID(), ex); + } } - putBlockForClosedContainer(chunksList, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), - maxBlockCommitSequenceId); + BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); + putBlockData.setChunks(successfullChunksList); + putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); } private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos.ChunkInfo chunkInfoProto, @@ -1599,34 +1643,48 @@ private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos } private void reconcileChunk(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, - long scmBlockSize, XceiverClientSpi xceiverClient, long blockId, + XceiverClientSpi xceiverClient, long blockId, List chunkList) throws IOException { Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) .collect(Collectors.toSet()); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); - Token blockToken = tokenHelper.getBlockToken(blockID, scmBlockSize); + Token blockToken = tokenHelper.getBlockToken(blockID, 0L); ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); - ContainerProtos.GetCommittedBlockLengthResponseProto blockLengthResponse = - ContainerProtocolCalls.getCommittedBlockLength(xceiverClient, blockID, blockToken); - long blockCommitSequenceId = getBlockManager().blockExists(container, blockID) ? - getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId() : 0; + ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); + BlockData localBlockData = getBlockManager().getBlock(container, blockID); // Check the local bcsId with the one from the bcsId from the peer datanode. - long maxBlockCommitSequenceId = Math.max(blockLengthResponse.getBlockLength(), blockCommitSequenceId); - List chunksListFromPeer = blockResponse.getBlockData().getChunksList(); + long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), + localBlockData.getBlockCommitSequenceId()); + List chunksListFromPeer = peerBlockData.getChunksList(); + + SortedMap localChunksMap = localBlockData.getChunks().stream() + .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, + Function.identity(), (chunk1, chunk2) -> chunk1, TreeMap::new)); + boolean overwriteBcsId = true; for (ContainerProtos.ChunkInfo chunkInfoProto : chunksListFromPeer) { - if (offsets.contains(chunkInfoProto.getOffset())) { + try { + if (!offsets.contains(chunkInfoProto.getOffset())) { + continue; + } + ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); + } catch (IOException ex) { + overwriteBcsId = false; + LOG.error("Error while reconciling chunk {} for block {} in container {}", + chunkInfoProto.getOffset(), blockID, containerData.getContainerID(), ex); } } - putBlockForClosedContainer(chunksListFromPeer, container, BlockData.getFromProtoBuf(blockResponse.getBlockData()), - maxBlockCommitSequenceId); + List localChunkList = new ArrayList<>(localChunksMap.values()); + localBlockData.setChunks(localChunkList); + putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index c23d405d4a0b..760a17baf62f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -102,6 +102,86 @@ public long putBlock(Container container, BlockData data, data, endOfBlock); } + @Override + public long putBlockForClosedContainer(Container container, BlockData data, boolean overwriteBcsId) + throws IOException { + return persistPutBlockForClosedContainer((KeyValueContainer) container, data, overwriteBcsId); + } + + private long persistPutBlockForClosedContainer(KeyValueContainer container, BlockData data, boolean overwriteBcsId) + throws IOException { + Preconditions.checkNotNull(data, "BlockData cannot be null for put " + + "operation."); + Preconditions.checkState(data.getContainerID() >= 0, "Container Id " + + "cannot be negative"); + + KeyValueContainerData containerData = container.getContainerData(); + + // We are not locking the key manager since RocksDB serializes all actions + // against a single DB. We rely on DB level locking to avoid conflicts. + try (DBHandle db = BlockUtils.getDB(containerData, config)) { + // This is a post condition that acts as a hint to the user. + // Should never fail. + Preconditions.checkNotNull(db, DB_NULL_ERR_MSG); + + long bcsId = data.getBlockCommitSequenceId(); + long containerBCSId = containerData.getBlockCommitSequenceId(); + + // Check if the block is already present in the DB of the container to determine whether + // the blockCount is already incremented for this block in the DB or not. + long localID = data.getLocalID(); + boolean incrBlockCount = false; + + // update the blockData as well as BlockCommitSequenceId here + try (BatchOperation batch = db.getStore().getBatchHandler() + .initBatchOperation()) { + // If block exists in cache, blockCount should not be incremented. + if (db.getStore().getBlockDataTable().get(containerData.getBlockKey(localID)) == null) { + // Block does not exist in DB => blockCount needs to be + // incremented when the block is added into DB. + incrBlockCount = true; + } + + db.getStore().getBlockDataTable().putWithBatch(batch, containerData.getBlockKey(localID), data); + if (overwriteBcsId && bcsId > containerBCSId) { + db.getStore().getMetadataTable().putWithBatch(batch, containerData.getBcsIdKey(), bcsId); + } + + // Set Bytes used, this bytes used will be updated for every write and + // only get committed for every put block. In this way, when datanode + // is up, for computation of disk space by container only committed + // block length is used, And also on restart the blocks committed to DB + // is only used to compute the bytes used. This is done to keep the + // current behavior and avoid DB write during write chunk operation. + db.getStore().getMetadataTable().putWithBatch(batch, containerData.getBytesUsedKey(), + containerData.getBytesUsed()); + + // Set Block Count for a container. + if (incrBlockCount) { + db.getStore().getMetadataTable().putWithBatch(batch, containerData.getBlockCountKey(), + containerData.getBlockCount() + 1); + } + + db.getStore().getBatchHandler().commitBatchOperation(batch); + } + + if (overwriteBcsId && bcsId > containerBCSId) { + container.updateBlockCommitSequenceId(bcsId); + } + + // Increment block count in-memory after the DB update. + if (incrBlockCount) { + containerData.incrBlockCount(); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Block " + data.getBlockID() + " successfully committed with bcsId " + + bcsId + " chunk size " + data.getChunks().size()); + } + return data.getSize(); + } + } + public long persistPutBlock(KeyValueContainer container, BlockData data, boolean endOfBlock) throws IOException { @@ -125,7 +205,7 @@ public long persistPutBlock(KeyValueContainer container, // default blockCommitSequenceId for any block is 0. It the putBlock // request is not coming via Ratis(for test scenarios), it will be 0. // In such cases, we should overwrite the block as well - if ((bcsId != 0) && (bcsId < containerBCSId)) { + if ((bcsId != 0) && (bcsId <= containerBCSId)) { // Since the blockCommitSequenceId stored in the db is greater than // equal to blockCommitSequenceId to be updated, it means the putBlock // transaction is reapplied in the ContainerStateMachine on restart. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java index 4655dbebb277..07dc1113fefc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/interfaces/BlockManager.java @@ -50,6 +50,17 @@ public interface BlockManager { long putBlock(Container container, BlockData data, boolean endOfBlock) throws IOException; + /** + * Puts or overwrites a block to a closed container. + * + * @param container - Container for which block need to be added. + * @param data - Block Data. + * @param overwriteBcsId - To overwrite bcsId in the block data. + * @return length of the Block. + */ + long putBlockForClosedContainer(Container container, BlockData data, boolean overwriteBcsId) + throws IOException; + /** * Gets an existing block. * diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java index 9333ba999e76..ef3a5feec9ca 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java @@ -165,8 +165,8 @@ public void testWriteChunkAndPutBlockFailureForNonClosedContainer( ChunkBuffer.wrap(getData()); Assertions.assertThrows(IOException.class, () -> keyValueHandler.writeChunkForClosedContainer( getChunkInfo(), getBlockID(), ChunkBuffer.wrap(getData()), keyValueContainer)); - Assertions.assertThrows(IOException.class, () -> keyValueHandler.putBlockForClosedContainer( - null, keyValueContainer, new BlockData(getBlockID()), 0L)); + Assertions.assertThrows(IOException.class, () -> keyValueHandler.putBlockForClosedContainer(keyValueContainer, + new BlockData(getBlockID()), 0L, true)); } @Test @@ -228,7 +228,8 @@ public void testPutBlockForClosedContainer() throws IOException { List chunkInfoList = new ArrayList<>(); chunkInfoList.add(getChunkInfo().getProtoBufMessage()); BlockData putBlockData = new BlockData(getBlockID()); - keyValueHandler.putBlockForClosedContainer(chunkInfoList, kvContainer, putBlockData, 1L); + putBlockData.setChunks(chunkInfoList); + keyValueHandler.putBlockForClosedContainer(kvContainer, putBlockData, 1L, true); Assertions.assertEquals(containerData.getBlockCommitSequenceId(), 1L); Assertions.assertEquals(containerData.getBlockCount(), 1L); @@ -243,7 +244,8 @@ public void testPutBlockForClosedContainer() throws IOException { ChunkInfo newChunkInfo = new ChunkInfo(String.format("%d.data.%d", getBlockID() .getLocalID(), 1L), 0, 20L); chunkInfoList.add(newChunkInfo.getProtoBufMessage()); - keyValueHandler.putBlockForClosedContainer(chunkInfoList, kvContainer, putBlockData, 2L); + putBlockData.setChunks(chunkInfoList); + keyValueHandler.putBlockForClosedContainer(kvContainer, putBlockData, 2L, true); Assertions.assertEquals(containerData.getBlockCommitSequenceId(), 2L); Assertions.assertEquals(containerData.getBlockCount(), 1L); @@ -254,8 +256,7 @@ public void testPutBlockForClosedContainer() throws IOException { Assertions.assertTrue(blockDataEquals(putBlockData, getBlockData)); } - // Put block on bcsId <= containerBcsId should be a no-op - keyValueHandler.putBlockForClosedContainer(chunkInfoList, kvContainer, putBlockData, 2L); + keyValueHandler.putBlockForClosedContainer(kvContainer, putBlockData, 2L, true); Assertions.assertEquals(containerData.getBlockCommitSequenceId(), 2L); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index e2b5e51d7f13..333008e7526b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -18,19 +18,28 @@ package org.apache.hadoop.ozone.dn.checksum; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClientTestImpl; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; +import org.apache.hadoop.ozone.security.SecretKeyTestClient; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.commons.io.IOUtils; @@ -40,11 +49,6 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; -import org.apache.hadoop.ozone.client.ObjectStore; -import org.apache.hadoop.ozone.client.OzoneBucket; -import org.apache.hadoop.ozone.client.OzoneClient; -import org.apache.hadoop.ozone.client.OzoneClientFactory; -import org.apache.hadoop.ozone.client.OzoneVolume; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; @@ -73,6 +77,8 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; @@ -97,6 +103,7 @@ public class TestContainerCommandReconciliation { private static ObjectStore store; private static OzoneConfiguration conf; private static DNContainerOperationClient dnClient; + private static final String KEY_NAME = "testkey"; @TempDir private static File testDir; @@ -106,6 +113,13 @@ public static void init() throws Exception { testDir = GenericTestUtils.getTestDir( TestContainerCommandReconciliation.class.getSimpleName()); conf = new OzoneConfiguration(); + // Add security configuration. + // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); + conf.setBoolean(HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED, true); + conf.setBoolean(HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED, true); + conf.setBoolean(HDDS_GRPC_TLS_ENABLED, true); + conf.setInt(HDDS_BLOCK_TOKEN_EXPIRY_TIME, 1000); + // conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.name()); conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 1024 * 1024, StorageUnit.BYTES); @@ -113,8 +127,10 @@ public static void init() throws Exception { // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); cluster = MiniOzoneCluster.newBuilder(conf) - .setNumDatanodes(5) - .build(); + .setCertificateClient(new CertificateClientTestImpl(conf)) + .setSecretKeyClient(new SecretKeyTestClient()) + .setNumDatanodes(3) + .build(); cluster.waitForClusterToBeReady(); rpcClient = OzoneClientFactory.getRpcClient(conf); store = rpcClient.getObjectStore(); @@ -142,7 +158,9 @@ public static void stop() throws IOException { */ @Test public void testGetChecksumInfoOpenReplica() throws Exception { - long containerID = writeDataAndGetContainer(false); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(false, volume, bucket); HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); StorageContainerException ex = assertThrows(StorageContainerException.class, () -> dnClient.getContainerChecksumInfo(containerID, targetDN.getDatanodeDetails())); @@ -177,7 +195,9 @@ public void testGetChecksumInfoNonexistentReplica() { */ @Test public void testGetChecksumInfoNonexistentFile() throws Exception { - long containerID = writeDataAndGetContainer(true); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(true, volume, bucket); // Pick a datanode and remove its checksum file. HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() @@ -200,7 +220,9 @@ public void testGetChecksumInfoNonexistentFile() throws Exception { */ @Test public void testGetChecksumInfoServerIOError() throws Exception { - long containerID = writeDataAndGetContainer(true); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(true, volume, bucket); // Pick a datanode and remove its checksum file. HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); Container container = targetDN.getDatanodeStateMachine().getContainer() @@ -222,7 +244,9 @@ public void testGetChecksumInfoServerIOError() throws Exception { */ @Test public void testGetCorruptChecksumInfo() throws Exception { - long containerID = writeDataAndGetContainer(true); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(true, volume, bucket); // Pick a datanode and corrupt its checksum file. HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); @@ -239,7 +263,9 @@ public void testGetCorruptChecksumInfo() throws Exception { @Test public void testGetEmptyChecksumInfo() throws Exception { - long containerID = writeDataAndGetContainer(true); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(true, volume, bucket); // Pick a datanode and truncate its checksum file to zero length. HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0); @@ -261,7 +287,9 @@ public void testGetEmptyChecksumInfo() throws Exception { @Test public void testGetChecksumInfoSuccess() throws Exception { - long containerID = writeDataAndGetContainer(true); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + long containerID = writeDataAndGetContainer(true, volume, bucket); // Overwrite the existing tree with a custom one for testing. We will check that it is returned properly from the // API. ContainerMerkleTreeWriter tree = buildTestTree(conf); @@ -282,7 +310,12 @@ public void testGetChecksumInfoSuccess() throws Exception { @Test public void testContainerChecksumWithBlockMissing() throws Exception { // 1. Write data to a container. - long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); + // Read the key back check it's hash. + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); + long containerID = containerAndData.getLeft(); + byte[] data = containerAndData.getRight(); TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); @@ -323,7 +356,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); // 3. Reconcile the container. - cluster.getStorageContainerManager().getClientProtocolServer().reconcileContainer(containerID); + cluster.getStorageContainerLocationClient().reconcileContainer(containerID); GenericTestUtils.waitFor(() -> { try { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); @@ -342,12 +375,21 @@ public void testContainerChecksumWithBlockMissing() throws Exception { .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) .collect(Collectors.toSet()); assertEquals(1, dataChecksums.size()); + cluster.getHddsDatanodes().get(1).stop(); + cluster.getHddsDatanodes().get(2).stop(); + TestHelper.validateData(KEY_NAME, data, store, volume, bucket); + cluster.getHddsDatanodes().get(1).start(); + cluster.getHddsDatanodes().get(2).start(); } @Test public void testContainerChecksumChunkCorruption() throws Exception { // 1. Write data to a container. - long containerID = writeDataAndGetContainer(true, 20 * 1024 * 1024); + String volume = UUID.randomUUID().toString(); + String bucket = UUID.randomUUID().toString(); + Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); + long containerID = containerAndData.getLeft(); + byte[] data = containerAndData.getRight(); TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); @@ -421,7 +463,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); // 4. Reconcile the container. - cluster.getStorageContainerManager().getClientProtocolServer().reconcileContainer(containerID); + cluster.getStorageContainerLocationClient().reconcileContainer(containerID); GenericTestUtils.waitFor(() -> { try { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); @@ -441,11 +483,15 @@ public void testContainerChecksumChunkCorruption() throws Exception { .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) .collect(Collectors.toSet()); assertEquals(1, dataChecksums.size()); + cluster.getHddsDatanodes().get(1).stop(); + cluster.getHddsDatanodes().get(2).stop(); + TestHelper.validateData(KEY_NAME, data, store, volume, bucket); + cluster.getHddsDatanodes().get(1).start(); + cluster.getHddsDatanodes().get(2).start(); } - private long writeDataAndGetContainer(boolean close, int dataLen) throws Exception { - String volumeName = UUID.randomUUID().toString(); - String bucketName = UUID.randomUUID().toString(); + private Pair getDataAndContainer(boolean close, int dataLen, String volumeName, String bucketName) + throws Exception { store.createVolume(volumeName); OzoneVolume volume = store.getVolume(volumeName); volume.createBucket(bucketName); @@ -453,21 +499,21 @@ private long writeDataAndGetContainer(boolean close, int dataLen) throws Excepti byte[] data = randomAlphabetic(dataLen).getBytes(UTF_8); // Write Key - try (OzoneOutputStream os = TestHelper.createKey("testkey", RATIS, THREE, dataLen, store, volumeName, bucketName)) { + try (OzoneOutputStream os = TestHelper.createKey(KEY_NAME, RATIS, THREE, dataLen, store, volumeName, bucketName)) { IOUtils.write(data, os); } - long containerID = bucket.getKey("testkey").getOzoneKeyLocations().stream() + long containerID = bucket.getKey(KEY_NAME).getOzoneKeyLocations().stream() .findFirst().get().getContainerID(); if (close) { TestHelper.waitForContainerClose(cluster, containerID); TestHelper.waitForScmContainerState(cluster, containerID, HddsProtos.LifeCycleState.CLOSED); } - return containerID; + return Pair.of(containerID, data); } - private long writeDataAndGetContainer(boolean close) throws Exception { - return writeDataAndGetContainer(close, 5); + private long writeDataAndGetContainer(boolean close, String volume, String bucket) throws Exception { + return getDataAndContainer(close, 5, volume, bucket).getLeft(); } public static void writeChecksumFileToDatanodes(long containerID, ContainerMerkleTreeWriter tree) throws Exception { From 07c40e4048d2417b7ffbcfd926ffeccaa068ff6f Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 19 Feb 2025 14:56:14 -0800 Subject: [PATCH 07/21] Fix tests. --- .../main/smoketest/admincli/container.robot | 5 +- .../hadoop/ozone/container/TestHelper.java | 33 +-- .../TestContainerCommandReconciliation.java | 274 ++++++++++++------ 3 files changed, 197 insertions(+), 115 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index b17973e1f364..55132123cde9 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -150,11 +150,10 @@ Close container Reconcile closed container # Check that info does not show replica checksums, since manual reconciliation has not yet been triggered. - # TODO When the scanner is computing checksums automatically, this test may need to be updated. ${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 - # 0 is the hex value of an empty checksum. - Should Be Equal As Strings 0 ${data_checksum} + # 0 is the hex value of an empty checksum. After container close the data checksum should not be 0. + Should Not Be Equal As Strings 0 ${data_checksum} # When reconciliation finishes, replica checksums should be shown. Execute ozone admin container reconcile ${container} Wait until keyword succeeds 1min 5sec Reconciliation complete ${container} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java index 0a18e3260d64..fe3b5f7d4198 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java @@ -18,6 +18,14 @@ package org.apache.hadoop.ozone.container; +import static java.util.stream.Collectors.toList; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.IOException; import java.security.MessageDigest; import java.util.ArrayList; @@ -27,12 +35,10 @@ import java.util.List; import java.util.Set; import java.util.concurrent.TimeoutException; - import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.ratis.RatisHelper; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; @@ -59,7 +65,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi; import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis; - import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo; import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.server.RaftServer; @@ -67,14 +72,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static java.util.stream.Collectors.toList; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertEquals; - /** * Helpers for container tests. */ @@ -395,20 +392,6 @@ public static void waitForScmContainerState(MiniOzoneCluster cluster, long conta }, 500, 100 * 1000); } - public static void waitForReplicasContainerState(MiniOzoneCluster cluster, long containerID, - ContainerReplicaProto.State state) - throws InterruptedException, TimeoutException { - GenericTestUtils.waitFor(() -> { - try { - Set replicas = cluster.getStorageContainerManager().getContainerManager() - .getContainerReplicas(ContainerID.valueOf(containerID)); - return replicas.stream().allMatch(r -> r.getState() == state); - } catch (ContainerNotFoundException e) { - return false; - } - }, 500, 100 * 1000); - } - public static StateMachine getStateMachine(MiniOzoneCluster cluster) throws Exception { return getStateMachine(cluster.getHddsDatanodes().get(0), null); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 333008e7526b..777061e5c9c7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -18,81 +18,103 @@ package org.apache.hadoop.ozone.dn.checksum; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; +import static org.apache.hadoop.hdds.DFSConfigKeysLegacy.DFS_DATANODE_KERBEROS_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdds.DFSConfigKeysLegacy.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_EXPIRY_DURATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_ROTATE_CHECK_DURATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_ROTATE_DURATION; +import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; +import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; +import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; +import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_KERBEROS_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY; +import static org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig.ConfigStrings.HDDS_SCM_HTTP_KERBEROS_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig.ConfigStrings.HDDS_SCM_HTTP_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SECURITY_ENABLED_KEY; +import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_KEYTAB_FILE; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_KERBEROS_KEYTAB_FILE_KEY; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.net.InetAddress; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.scm.ScmConfig; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClientTestImpl; +import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; +import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient; +import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.ozone.ClientVersion; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; import org.apache.hadoop.ozone.client.OzoneClientFactory; import org.apache.hadoop.ozone.client.OzoneVolume; -import org.apache.hadoop.ozone.container.common.helpers.BlockData; -import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; -import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; -import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; -import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; -import org.apache.hadoop.ozone.security.SecretKeyTestClient; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; -import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.ozone.HddsDatanodeService; -import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.TestHelper; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.ratis.util.ExitUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.UUID; -import java.util.stream.Collectors; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; -import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_EXPIRY_TIME; -import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_GRPC_TLS_ENABLED; -import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; -import static org.apache.hadoop.hdds.client.ReplicationFactor.THREE; -import static org.apache.hadoop.hdds.client.ReplicationType.RATIS; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_BLOCK_SIZE; -import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - /** * This class tests container commands for reconciliation. */ @@ -107,34 +129,32 @@ public class TestContainerCommandReconciliation { @TempDir private static File testDir; + @TempDir + private static File workDir; + private static MiniKdc miniKdc; + private static File ozoneKeytab; + private static File spnegoKeytab; + private static File testUserKeytab; + private static String testUserPrincipal; + private static String host; @BeforeAll public static void init() throws Exception { - testDir = GenericTestUtils.getTestDir( - TestContainerCommandReconciliation.class.getSimpleName()); conf = new OzoneConfiguration(); - // Add security configuration. - // conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); - conf.setBoolean(HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED, true); - conf.setBoolean(HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED, true); - conf.setBoolean(HDDS_GRPC_TLS_ENABLED, true); - conf.setInt(HDDS_BLOCK_TOKEN_EXPIRY_TIME, 1000); - // conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.name()); - conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 1); + conf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, "localhost"); conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 1024 * 1024, StorageUnit.BYTES); conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 2 * 1024 * 1024, StorageUnit.BYTES); // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); - cluster = MiniOzoneCluster.newBuilder(conf) - .setCertificateClient(new CertificateClientTestImpl(conf)) - .setSecretKeyClient(new SecretKeyTestClient()) - .setNumDatanodes(3) - .build(); - cluster.waitForClusterToBeReady(); - rpcClient = OzoneClientFactory.getRpcClient(conf); - store = rpcClient.getObjectStore(); - dnClient = new DNContainerOperationClient(conf, null, null); + + ExitUtils.disableSystemExit(); + + startMiniKdc(); + setSecureConfig(); + createCredentialsInKDC(); + setSecretKeysConfig(); + startCluster(); } @AfterAll @@ -147,8 +167,12 @@ public static void stop() throws IOException { dnClient.close(); } + if (miniKdc != null) { + miniKdc.stop(); + } + if (cluster != null) { - cluster.shutdown(); + cluster.stop(); } } @@ -316,8 +340,13 @@ public void testContainerChecksumWithBlockMissing() throws Exception { Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); long containerID = containerAndData.getLeft(); byte[] data = containerAndData.getRight(); - TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); - HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); + // Get the datanodes where the container replicas are stored. + List dataNodeDetails = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)) + .stream().map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + Assertions.assertEquals(3, dataNodeDetails.size()); + HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); @@ -326,21 +355,19 @@ public void testContainerChecksumWithBlockMissing() throws Exception { .getHandler(ContainerProtos.ContainerType.KeyValueContainer); BlockManager blockManager = kvHandler.getBlockManager(); - List blockDatas = blockManager.listBlock(container, -1, 100); - List deletedBlocks = new ArrayList<>(); + List blockDataList = blockManager.listBlock(container, -1, 100); String chunksPath = container.getContainerData().getChunksPath(); long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); // 2. Delete some blocks to simulate missing blocks. try (DBHandle db = BlockUtils.getDB(containerData, conf); BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { - for (int i = 0; i < blockDatas.size(); i += 2) { - BlockData blockData = blockDatas.get(i); + for (int i = 0; i < blockDataList.size(); i += 2) { + BlockData blockData = blockDataList.get(i); // Delete the block metadata from the container db db.getStore().getBlockDataTable().deleteWithBatch(op, containerData.getBlockKey(blockData.getLocalID())); // Delete the block file. Files.deleteIfExists(Paths.get(chunksPath + "/" + blockData.getBlockID().getLocalID() + ".block")); - deletedBlocks.add(blockData); } db.getStore().getBatchHandler().commitBatchOperation(op); db.getStore().flushDB(); @@ -375,11 +402,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) .collect(Collectors.toSet()); assertEquals(1, dataChecksums.size()); - cluster.getHddsDatanodes().get(1).stop(); - cluster.getHddsDatanodes().get(2).stop(); TestHelper.validateData(KEY_NAME, data, store, volume, bucket); - cluster.getHddsDatanodes().get(1).start(); - cluster.getHddsDatanodes().get(2).start(); } @Test @@ -390,8 +413,13 @@ public void testContainerChecksumChunkCorruption() throws Exception { Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); long containerID = containerAndData.getLeft(); byte[] data = containerAndData.getRight(); - TestHelper.waitForReplicasContainerState(cluster, containerID, ContainerReplicaProto.State.CLOSED); - HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0); + // Get the datanodes where the container replicas are stored. + List dataNodeDetails = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)) + .stream().map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + Assertions.assertEquals(3, dataNodeDetails.size()); + HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); @@ -483,11 +511,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) .collect(Collectors.toSet()); assertEquals(1, dataChecksums.size()); - cluster.getHddsDatanodes().get(1).stop(); - cluster.getHddsDatanodes().get(2).stop(); TestHelper.validateData(KEY_NAME, data, store, volume, bucket); - cluster.getHddsDatanodes().get(1).start(); - cluster.getHddsDatanodes().get(2).start(); } private Pair getDataAndContainer(boolean close, int dataLen, String volumeName, String bucketName) @@ -525,8 +549,84 @@ public static void writeChecksumFileToDatanodes(long containerID, ContainerMerkl KeyValueContainer keyValueContainer = (KeyValueContainer) dn.getDatanodeStateMachine().getContainer().getController() .getContainer(containerID); - keyValueHandler.getChecksumManager().writeContainerDataTree( - keyValueContainer.getContainerData(), tree); + if (keyValueContainer != null) { + keyValueHandler.getChecksumManager().writeContainerDataTree( + keyValueContainer.getContainerData(), tree); + } } } + + private static void setSecretKeysConfig() { + // Secret key lifecycle configs. + conf.set(HDDS_SECRET_KEY_ROTATE_CHECK_DURATION, "500s"); + conf.set(HDDS_SECRET_KEY_ROTATE_DURATION, "500s"); + conf.set(HDDS_SECRET_KEY_EXPIRY_DURATION, "500s"); + + // enable tokens + conf.setBoolean(HDDS_BLOCK_TOKEN_ENABLED, true); + conf.setBoolean(HDDS_CONTAINER_TOKEN_ENABLED, true); + } + + private static void createCredentialsInKDC() throws Exception { + ScmConfig scmConfig = conf.getObject(ScmConfig.class); + SCMHTTPServerConfig httpServerConfig = + conf.getObject(SCMHTTPServerConfig.class); + createPrincipal(ozoneKeytab, scmConfig.getKerberosPrincipal()); + createPrincipal(spnegoKeytab, httpServerConfig.getKerberosPrincipal()); + createPrincipal(testUserKeytab, testUserPrincipal); + } + + private static void createPrincipal(File keytab, String... principal) + throws Exception { + miniKdc.createPrincipal(keytab, principal); + } + + private static void startMiniKdc() throws Exception { + Properties securityProperties = MiniKdc.createConf(); + miniKdc = new MiniKdc(securityProperties, workDir); + miniKdc.start(); + } + + private static void setSecureConfig() throws IOException { + conf.setBoolean(OZONE_SECURITY_ENABLED_KEY, true); + host = InetAddress.getLocalHost().getCanonicalHostName() + .toLowerCase(); + conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.name()); + String curUser = UserGroupInformation.getCurrentUser().getUserName(); + conf.set(OZONE_ADMINISTRATORS, curUser); + String realm = miniKdc.getRealm(); + String hostAndRealm = host + "@" + realm; + conf.set(HDDS_SCM_KERBEROS_PRINCIPAL_KEY, "scm/" + hostAndRealm); + conf.set(HDDS_SCM_HTTP_KERBEROS_PRINCIPAL_KEY, "HTTP_SCM/" + hostAndRealm); + conf.set(OZONE_OM_KERBEROS_PRINCIPAL_KEY, "scm/" + hostAndRealm); + conf.set(OZONE_OM_HTTP_KERBEROS_PRINCIPAL_KEY, "HTTP_OM/" + hostAndRealm); + conf.set(DFS_DATANODE_KERBEROS_PRINCIPAL_KEY, "scm/" + hostAndRealm); + + ozoneKeytab = new File(workDir, "scm.keytab"); + spnegoKeytab = new File(workDir, "http.keytab"); + testUserKeytab = new File(workDir, "testuser.keytab"); + testUserPrincipal = "test@" + realm; + + conf.set(HDDS_SCM_KERBEROS_KEYTAB_FILE_KEY, ozoneKeytab.getAbsolutePath()); + conf.set(HDDS_SCM_HTTP_KERBEROS_KEYTAB_FILE_KEY, spnegoKeytab.getAbsolutePath()); + conf.set(OZONE_OM_KERBEROS_KEYTAB_FILE_KEY, ozoneKeytab.getAbsolutePath()); + conf.set(OZONE_OM_HTTP_KERBEROS_KEYTAB_FILE, spnegoKeytab.getAbsolutePath()); + conf.set(DFS_DATANODE_KERBEROS_KEYTAB_FILE_KEY, ozoneKeytab.getAbsolutePath()); + } + + private static void startCluster() throws Exception { + OzoneManager.setTestSecureOmFlag(true); + cluster = MiniOzoneCluster.newHABuilder(conf) + .setSCMServiceId("SecureSCM") + .setNumOfStorageContainerManagers(3) + .setNumOfOzoneManagers(1) + .setNumDatanodes(3) + .build(); + cluster.waitForClusterToBeReady(); + rpcClient = OzoneClientFactory.getRpcClient(conf); + store = rpcClient.getObjectStore(); + SecretKeyClient secretKeyClient = cluster.getStorageContainerManager().getSecretKeyManager(); + CertificateClient certClient = cluster.getStorageContainerManager().getScmCertificateClient(); + dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient); + } } From aa34c57c4c8b9bce1c1429726b4175d6ffed4ef5 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 21 Feb 2025 12:49:42 -0800 Subject: [PATCH 08/21] Address Review Comments --- .../ContainerChecksumTreeManager.java | 36 +++---- .../common/utils/ContainerLogger.java | 12 ++- .../container/keyvalue/KeyValueHandler.java | 99 ++++++++++++------- .../keyvalue/impl/BlockManagerImpl.java | 22 ++--- .../TestContainerChecksumTreeManager.java | 36 ++++--- .../TestContainerCommandReconciliation.java | 4 +- 6 files changed, 127 insertions(+), 82 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index fe36d3553905..c4b60f10cef8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -82,7 +82,9 @@ public void stop() { * file remains unchanged. * Concurrent writes to the same file are coordinated internally. */ - public void writeContainerDataTree(ContainerData data, ContainerMerkleTreeWriter tree) throws IOException { + public ContainerProtos.ContainerChecksumInfo writeContainerDataTree(ContainerData data, + ContainerMerkleTreeWriter tree) + throws IOException { long containerID = data.getContainerID(); Lock writeLock = getLock(containerID); writeLock.lock(); @@ -98,11 +100,13 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTreeWriter checksumInfoBuilder = ContainerProtos.ContainerChecksumInfo.newBuilder(); } - checksumInfoBuilder + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumInfoBuilder .setContainerID(containerID) - .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)); - write(data, checksumInfoBuilder.build()); + .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)) + .build(); + write(data, checksumInfo); LOG.debug("Data merkle tree for container {} updated", containerID); + return checksumInfo; } finally { writeLock.unlock(); } @@ -146,33 +150,31 @@ public void markBlocksAsDeleted(KeyValueContainerData data, Collection del } } - public ContainerDiffReport diff(KeyValueContainerData thisContainer, + /** + * Compares the checksum info of the container with the peer's checksum info and returns a report of the differences. + * @param thisChecksumInfo The checksum info of the container on this datanode. + * @param peerChecksumInfo The checksum info of the container on the peer datanode. + */ + public ContainerDiffReport diff(ContainerProtos.ContainerChecksumInfo thisChecksumInfo, ContainerProtos.ContainerChecksumInfo peerChecksumInfo) throws StorageContainerException { ContainerDiffReport report = new ContainerDiffReport(); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { - Preconditions.assertNotNull(thisContainer, "Container data is null"); + Preconditions.assertNotNull(thisChecksumInfo, "Our checksum info is null"); Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); - Optional thisContainerChecksumInfo = read(thisContainer); - if (!thisContainerChecksumInfo.isPresent()) { - throw new StorageContainerException("The container #" + thisContainer.getContainerID() + - " doesn't have container checksum", ContainerProtos.Result.IO_EXCEPTION); - } - - if (thisContainer.getContainerID() != peerChecksumInfo.getContainerID()) { + if (thisChecksumInfo.getContainerID() != peerChecksumInfo.getContainerID()) { throw new StorageContainerException("Container Id does not match for container " - + thisContainer.getContainerID(), ContainerProtos.Result.CONTAINER_ID_MISMATCH); + + thisChecksumInfo.getContainerID(), ContainerProtos.Result.CONTAINER_ID_MISMATCH); } - ContainerProtos.ContainerChecksumInfo thisChecksumInfo = thisContainerChecksumInfo.get(); compareContainerMerkleTree(thisChecksumInfo, peerChecksumInfo, report); }); } catch (IOException ex) { metrics.incrementMerkleTreeDiffFailures(); - throw new StorageContainerException("Container Diff failed for container #" + thisContainer.getContainerID(), ex, - ContainerProtos.Result.IO_EXCEPTION); + throw new StorageContainerException("Container Diff failed for container #" + thisChecksumInfo.getContainerID(), + ex, ContainerProtos.Result.IO_EXCEPTION); } // Update Container Diff metrics based on the diff report. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java index b02cb5195d14..38d6d6b5d6f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hdds.HddsUtils.checksumToString; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.logging.log4j.LogManager; @@ -153,8 +154,15 @@ public static void logRecovered(ContainerData containerData) { * @param containerData The container that was reconciled on this datanode. * @param oldDataChecksum The old data checksum. */ - public static void logReconciled(ContainerData containerData, long oldDataChecksum) { - LOG.info(getMessage(containerData, "Container reconciled. Old checksum is " + checksumToString(oldDataChecksum))); + public static void logReconciled(ContainerData containerData, long oldDataChecksum, DatanodeDetails peer) { + if (containerData.getDataChecksum() == oldDataChecksum) { + LOG.info(getMessage(containerData, "Container reconciled with peer " + peer.toString() + + ". No change in checksum. Current checksum is " + checksumToString(containerData.getDataChecksum()))); + } else { + LOG.warn(getMessage(containerData, "Container reconciled with peer " + peer.toString() + + ". Checksum updated from " + checksumToString(oldDataChecksum) + " to " + + checksumToString(containerData.getDataChecksum()))); + } } private static String getMessage(ContainerData containerData, diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 5ad1c6ca88d4..ae849c8d250b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -69,6 +69,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.Clock; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; @@ -612,18 +614,19 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } - public void createContainerMerkleTree(Container container) { + public ContainerProtos.ContainerChecksumInfo createContainerMerkleTree(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { - return; + return null; } try { KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - updateContainerChecksum(containerData); + return updateContainerChecksum(containerData); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); } + return null; } /** @@ -1476,27 +1479,32 @@ public void deleteContainer(Container container, boolean force) deleteInternal(container, force); } - // Update Java Doc steps @Override public void reconcileContainer(DNContainerOperationClient dnClient, Container container, Set peers) throws IOException { KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - Optional checksumInfo = checksumManager.read(containerData); + Optional optionalChecksumInfo = checksumManager.read(containerData); long oldDataChecksum = 0; + long dataChecksum = 0; + ContainerProtos.ContainerChecksumInfo checksumInfo; - if (checksumInfo.isPresent()) { - oldDataChecksum = checksumInfo.get().getContainerMerkleTree().getDataChecksum(); + if (optionalChecksumInfo.isPresent()) { + checksumInfo = optionalChecksumInfo.get(); + oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - createContainerMerkleTree(container); - checksumInfo = checksumManager.read(containerData); - if (checksumInfo.isPresent()) { - oldDataChecksum = checksumInfo.get().getContainerMerkleTree().getDataChecksum(); + checksumInfo = createContainerMerkleTree(container); + if (checksumInfo == null) { + LOG.error("Failed to reconcile container {} as checksum info is not available", + containerData.getContainerID()); + return; } + oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } for (DatanodeDetails peer : peers) { + Instant start = Instant.now(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( containerData.getContainerID(), peer); if (peerChecksumInfo == null) { @@ -1505,8 +1513,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container continue; } - // Check block token usage. How it is used in DN - ContainerDiffReport diffReport = checksumManager.diff(containerData, peerChecksumInfo); + ContainerDiffReport diffReport = checksumManager.diff(checksumInfo, peerChecksumInfo); TokenHelper tokenHelper = dnClient.getTokenHelper(); XceiverClientSpi xceiverClient = dnClient.getXceiverClientManager() .acquireClient(createSingleNodePipeline(peer)); @@ -1525,7 +1532,8 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { try { - reconcileChunk(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); + reconcileChunksPerBlock(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), + entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), containerData.getContainerID(), e); @@ -1535,36 +1543,42 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { try { - reconcileChunk(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); + reconcileChunksPerBlock(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), + entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), containerData.getContainerID(), e); } } - updateContainerChecksum(containerData); + // Update checksum based on RocksDB metadata + ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateContainerChecksum(containerData); + dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); + + long duration = Duration.between(start, Instant.now()).toMillis(); + if (dataChecksum == oldDataChecksum) { + metrics.incContainerReconciledWithoutChanges(); + LOG.info("Container {} reconciled without changes, Current checksum {}. Time taken {} ms", + containerData.getContainerID(), checksumToString(dataChecksum), duration); + } else { + metrics.incContainerReconciledWithChanges(); + LOG.warn("Container {} reconciled, Checksum updated from {} to {}. Time taken {} ms", + containerData.getContainerID(), checksumToString(oldDataChecksum), + checksumToString(dataChecksum), duration); + } + ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); } finally { dnClient.getXceiverClientManager().releaseClient(xceiverClient, false); } } - // Update checksum based on RocksDB metadata - long dataChecksum = updateContainerChecksum(containerData); // Trigger manual on demand scanner OnDemandContainerDataScanner.scanContainer(container); - if (dataChecksum == oldDataChecksum) { - metrics.incContainerReconciledWithoutChanges(); - LOG.info("Container {} reconciled without changes, Current checksum {}", containerData.getContainerID(), - checksumToString(dataChecksum)); - } else { - metrics.incContainerReconciledWithChanges(); - LOG.warn("Container {} reconciled, Checksum updated from {} to {}", containerData.getContainerID(), - checksumToString(oldDataChecksum), checksumToString(dataChecksum)); - } - ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum); sendICR(container); } - private long updateContainerChecksum(KeyValueContainerData containerData) throws IOException { + // Return the entire tree instead of just the checksum + private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueContainerData containerData) + throws IOException { ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); BlockIterator blockIterator = dbHandle.getStore(). @@ -1575,16 +1589,22 @@ private long updateContainerChecksum(KeyValueContainerData containerData) throws merkleTree.addChunks(blockData.getLocalID(), chunkInfos); } } - checksumManager.writeContainerDataTree(containerData, merkleTree); - long dataChecksum = merkleTree.toProto().getDataChecksum(); - containerData.setDataChecksum(dataChecksum); - return dataChecksum; + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager + .writeContainerDataTree(containerData, merkleTree); + containerData.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); + return checksumInfo; } + /** + * Handle missing block. It reads the missing block data from the peer datanode and writes it to the local container. + * If the block write fails, the block commit sequence id is not updated. + */ private void handleMissingBlock(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, XceiverClientSpi xceiverClient, ContainerProtos.BlockMerkleTree missingBlock) throws IOException { BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); + // The length of the block is not known, so instead of passing the default block length we pass 0. As the length + // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); // TODO: Re-use the blockResponse for the same block again. ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, @@ -1635,12 +1655,19 @@ private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos } } - private void reconcileChunk(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, - XceiverClientSpi xceiverClient, long blockId, - List chunkList) throws IOException { + /** + * This method reconciles chunks per block. It reads the missing/corrupt chunk data from the peer + * datanode and writes it to the local container. If the chunk write fails, the block commit sequence + * id is not updated. + */ + private void reconcileChunksPerBlock(KeyValueContainer container, ContainerData containerData, + TokenHelper tokenHelper, XceiverClientSpi xceiverClient, long blockId, + List chunkList) throws IOException { Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) .collect(Collectors.toSet()); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); + // The length of the block is not known, so instead of passing the default block length we pass 0. As the length + // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index b8dbd93a2732..2e6086d391c4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -101,17 +101,12 @@ public long putBlock(Container container, BlockData data, @Override public long putBlockForClosedContainer(Container container, BlockData data, boolean overwriteBcsId) throws IOException { - return persistPutBlockForClosedContainer((KeyValueContainer) container, data, overwriteBcsId); - } - - private long persistPutBlockForClosedContainer(KeyValueContainer container, BlockData data, boolean overwriteBcsId) - throws IOException { Preconditions.checkNotNull(data, "BlockData cannot be null for put " + - "operation."); + "operation."); Preconditions.checkState(data.getContainerID() >= 0, "Container Id " + - "cannot be negative"); + "cannot be negative"); - KeyValueContainerData containerData = container.getContainerData(); + KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); // We are not locking the key manager since RocksDB serializes all actions // against a single DB. We rely on DB level locking to avoid conflicts. @@ -130,7 +125,7 @@ private long persistPutBlockForClosedContainer(KeyValueContainer container, Bloc // update the blockData as well as BlockCommitSequenceId here try (BatchOperation batch = db.getStore().getBatchHandler() - .initBatchOperation()) { + .initBatchOperation()) { // If block exists in cache, blockCount should not be incremented. if (db.getStore().getBlockDataTable().get(containerData.getBlockKey(localID)) == null) { // Block does not exist in DB => blockCount needs to be @@ -149,13 +144,14 @@ private long persistPutBlockForClosedContainer(KeyValueContainer container, Bloc // block length is used, And also on restart the blocks committed to DB // is only used to compute the bytes used. This is done to keep the // current behavior and avoid DB write during write chunk operation. + // Write UTs for this db.getStore().getMetadataTable().putWithBatch(batch, containerData.getBytesUsedKey(), - containerData.getBytesUsed()); + containerData.getBytesUsed()); // Set Block Count for a container. if (incrBlockCount) { db.getStore().getMetadataTable().putWithBatch(batch, containerData.getBlockCountKey(), - containerData.getBlockCount() + 1); + containerData.getBlockCount() + 1); } db.getStore().getBatchHandler().commitBatchOperation(batch); @@ -171,8 +167,8 @@ private long persistPutBlockForClosedContainer(KeyValueContainer container, Bloc } if (LOG.isDebugEnabled()) { - LOG.debug("Block " + data.getBlockID() + " successfully committed with bcsId " - + bcsId + " chunk size " + data.getChunks().size()); + LOG.debug("Block {} successfully persisted for closed container {} with bcsId {} chunk size {}", + data.getBlockID(), containerData.getContainerID(), bcsId, data.getChunks().size()); } return data.getSize(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index e0e8930c9466..987ff7cf81f2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -39,6 +39,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.stream.Stream; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -339,7 +340,8 @@ public void testContainerWithNoDiff() throws Exception { ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); @@ -362,7 +364,8 @@ public void testContainerDiffWithMismatches(int numMissingBlock, int numMissingC ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree.toProto()).build(); - ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); assertTrue(metrics.getMerkleTreeDiffLatencyNS().lastStat().total() > 0); assertContainerDiffMatch(expectedDiff, diff); assertEquals(checksumManager.getMetrics().getRepairContainerDiffs(), 1); @@ -385,15 +388,19 @@ public void testPeerWithMismatchesHasNoDiff(int numMissingBlock, int numMissingC ContainerProtos.ContainerChecksumInfo peerChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(container.getContainerID()) .setContainerMerkleTree(peerMerkleTree).build(); - ContainerDiffReport diff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); assertFalse(diff.needsRepair()); assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1); } @Test - public void testFailureContainerMerkleTreeMetric() { + public void testFailureContainerMerkleTreeMetric() throws IOException { ContainerProtos.ContainerChecksumInfo peerChecksum = ContainerProtos.ContainerChecksumInfo.newBuilder().build(); - assertThrows(StorageContainerException.class, () -> checksumManager.diff(container, peerChecksum)); + ContainerMerkleTreeWriter ourMerkleTree = buildTestTree(config); + checksumManager.writeContainerDataTree(container, ourMerkleTree); + Optional checksumInfo = checksumManager.read(container); + assertThrows(StorageContainerException.class, () -> checksumManager.diff(checksumInfo.get(), peerChecksum)); assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1); } @@ -413,7 +420,8 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { .addAllDeletedBlocks(deletedBlockList).build(); writeContainerDataTreeProto(container, ourMerkleTree); - ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. @@ -423,7 +431,8 @@ void testDeletedBlocksInPeerAndBoth() throws Exception { // Delete blocks in our merkle tree as well. checksumManager.markBlocksAsDeleted(container, deletedBlockList); - containerDiff = checksumManager.diff(container, peerChecksumInfo); + checksumInfo = checksumManager.read(container); + containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in both merkle tree. @@ -449,7 +458,8 @@ void testDeletedBlocksInOurContainerOnly() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); checksumManager.markBlocksAsDeleted(container, deletedBlockList); - ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in our merkle tree. @@ -475,7 +485,8 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); - ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. @@ -499,8 +510,8 @@ void testContainerDiffWithBlockDeletionInPeer() throws Exception { writeContainerDataTreeProto(container, ourMerkleTree); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = peerChecksumInfoBuilder.build(); - - ContainerDiffReport containerDiff = checksumManager.diff(container, peerChecksumInfo); + Optional checksumInfo = checksumManager.read(container); + ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); // The diff should not have any missing block/missing chunk/corrupt chunks as the blocks are deleted // in peer merkle tree. assertFalse(containerDiff.getMissingBlocks().isEmpty()); @@ -512,7 +523,8 @@ void testContainerDiffWithBlockDeletionInPeer() throws Exception { // Clear deleted blocks to add them in missing blocks. peerChecksumInfo = peerChecksumInfoBuilder.clearDeletedBlocks().build(); - containerDiff = checksumManager.diff(container, peerChecksumInfo); + checksumInfo = checksumManager.read(container); + containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo); assertFalse(containerDiff.getMissingBlocks().isEmpty()); // Missing block does not contain the deleted blocks 6L to 10L diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 722eeb9367cd..dbed040e4a39 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -142,8 +142,8 @@ public static void init() throws Exception { conf = new OzoneConfiguration(); conf.set(OZONE_SCM_CLIENT_ADDRESS_KEY, "localhost"); conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); - conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 1024 * 1024, StorageUnit.BYTES); - conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 2 * 1024 * 1024, StorageUnit.BYTES); + conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 128 * 1024, StorageUnit.BYTES); + conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 512 * 1024, StorageUnit.BYTES); // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); From 546bd6f9af285f0e0ebf9d6cff70f3dff9ebf598 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 10 Mar 2025 13:14:43 -0700 Subject: [PATCH 09/21] Add unit test suite --- .../container/keyvalue/KeyValueHandler.java | 19 + .../keyvalue/TestContainerCorruptions.java | 2 +- .../keyvalue/TestKeyValueHandler.java | 362 ++++++++++++++++-- .../TestContainerCommandReconciliation.java | 6 +- 4 files changed, 365 insertions(+), 24 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a1674963849b..a6ef20d0553f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.QUASI_CLOSED; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR; @@ -1695,6 +1696,7 @@ private void reconcileChunksPerBlock(KeyValueContainer container, ContainerData continue; } + verifyChunksLength(chunkInfoProto, localChunksMap.get(chunkInfoProto.getOffset())); ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); @@ -1713,6 +1715,23 @@ private void reconcileChunksPerBlock(KeyValueContainer container, ContainerData putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); } + private void verifyChunksLength(ContainerProtos.ChunkInfo peerChunkInfo, ContainerProtos.ChunkInfo localChunkInfo) + throws StorageContainerException { + if (localChunkInfo == null || peerChunkInfo == null) { + return; + } + + if (peerChunkInfo.getOffset() != localChunkInfo.getOffset()) { + throw new StorageContainerException("Offset mismatch for chunk. Expected: " + localChunkInfo.getOffset() + + ", Actual: " + peerChunkInfo.getOffset(), CHUNK_FILE_INCONSISTENCY); + } + + if (peerChunkInfo.getLen() != localChunkInfo.getLen()) { + throw new StorageContainerException("Length mismatch for chunk at offset " + localChunkInfo.getOffset() + + ". Expected: " + localChunkInfo.getLen() + ", Actual: " + peerChunkInfo.getLen(), CHUNK_FILE_INCONSISTENCY); + } + } + /** * Called by BlockDeletingService to delete all the chunks in a block * before proceeding to delete the block info from DB. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java index 543bfc17b5bd..20ad00676b23 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java @@ -195,7 +195,7 @@ private static void corruptFile(File file) { } } - private static File getBlock(Container container, long blockID) { + public static File getBlock(Container container, long blockID) { File blockFile; File chunksDir = new File(container.getContainerData().getContainerPath(), "chunks"); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index d7ea7c5b3038..934750941b26 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -17,6 +17,7 @@ package org.apache.hadoop.ozone.container.keyvalue; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSING_POLICY; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; @@ -26,37 +27,60 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY; import static org.apache.hadoop.ozone.OzoneConsts.GB; +import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.createBlockMetaData; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.COMMIT_STAGE; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.getBlock; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.time.Clock; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Optional; +import java.util.Random; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -65,17 +89,29 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.token.TokenVerifier; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.ozone.common.Checksum; +import org.apache.hadoop.ozone.common.ChecksumData; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.helpers.TokenHelper; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -83,8 +119,10 @@ import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.util.Sets; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Assertions; @@ -92,7 +130,12 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.MockedStatic; import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; /** * Unit tests for {@link KeyValueHandler}. @@ -104,18 +147,44 @@ public class TestKeyValueHandler { private Path tempDir; @TempDir private Path dbFile; - - private static final String DATANODE_UUID = UUID.randomUUID().toString(); + @TempDir + private Path testRoot; private static final long DUMMY_CONTAINER_ID = 9999; private static final String DUMMY_PATH = "dummy/dir/doesnt/exist"; + private static final int UNIT_LEN = 1024; + private static final int CHUNK_LEN = 3 * UNIT_LEN; + private static final int CHUNKS_PER_BLOCK = 4; + private static final String DATANODE_UUID = UUID.randomUUID().toString(); + private static final String CLUSTER_ID = UUID.randomUUID().toString(); private HddsDispatcher dispatcher; private KeyValueHandler handler; + private OzoneConfiguration conf; + + public static Stream corruptionValues() { + return Stream.of( + Arguments.of(5, 0), + Arguments.of(0, 5), + Arguments.of(0, 10), + Arguments.of(10, 0), + Arguments.of(5, 10), + Arguments.of(10, 5), + Arguments.of(2, 3), + Arguments.of(3, 2), + Arguments.of(4, 6), + Arguments.of(6, 4), + Arguments.of(6, 9), + Arguments.of(9, 6) + ); + } @BeforeEach - public void setup() throws StorageContainerException { + public void setup() throws IOException { // Create mock HddsDispatcher and KeyValueHandler. + conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, testRoot.toString()); + conf.set(OZONE_METADATA_DIRS, testRoot.toString()); handler = mock(KeyValueHandler.class); HashMap handlers = new HashMap<>(); @@ -287,7 +356,7 @@ public void testVolumeSetInKeyValueHandler() throws Exception { File metadataDir = Files.createDirectory(tempDir.resolve("metadataDir")).toFile(); - OzoneConfiguration conf = new OzoneConfiguration(); + conf = new OzoneConfiguration(); conf.set(HDDS_DATANODE_DIR_KEY, datanodeDir.getAbsolutePath()); conf.set(OZONE_METADATA_DIRS, metadataDir.getAbsolutePath()); MutableVolumeSet @@ -341,8 +410,8 @@ private ContainerCommandRequestProto getDummyCommandRequestProto( @ContainerLayoutTestInfo.ContainerTest public void testCloseInvalidContainer(ContainerLayoutVersion layoutVersion) throws IOException { - KeyValueHandler keyValueHandler = createKeyValueHandler(); - OzoneConfiguration conf = new OzoneConfiguration(); + KeyValueHandler keyValueHandler = createKeyValueHandler(tempDir); + conf = new OzoneConfiguration(); KeyValueContainerData kvData = new KeyValueContainerData(DUMMY_CONTAINER_ID, layoutVersion, (long) StorageUnit.GB.toBytes(1), UUID.randomUUID().toString(), @@ -386,7 +455,7 @@ public void testDeleteContainer() throws IOException { final long containerID = 1L; final String clusterId = UUID.randomUUID().toString(); final String datanodeId = UUID.randomUUID().toString(); - final ConfigurationSource conf = new OzoneConfiguration(); + conf = new OzoneConfiguration(); final ContainerSet containerSet = new ContainerSet(1000); final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); @@ -475,7 +544,7 @@ public void testDeleteContainer() throws IOException { @ContainerLayoutTestInfo.ContainerTest public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); + conf = new OzoneConfiguration(); KeyValueContainerData data = new KeyValueContainerData(123L, layoutVersion, GB, PipelineID.randomId().toString(), randomDatanodeDetails().getUuidString()); @@ -496,7 +565,7 @@ public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws Assertions.assertEquals(container.getContainerData().getContainerID(), reportedID); long reportDataChecksum = report.getDataChecksum(); - Assertions.assertNotEquals(0, reportDataChecksum, + assertNotEquals(0, reportDataChecksum, "Container report should have populated the checksum field with a non-zero value."); icrCount.incrementAndGet(); }; @@ -517,6 +586,105 @@ public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws Assertions.assertEquals(1, icrCount.get()); } + @ParameterizedTest + @MethodSource("corruptionValues") + public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { + KeyValueHandler kvHandler = createKeyValueHandler(testRoot); + ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); + DNContainerOperationClient dnClient = mock(DNContainerOperationClient.class); + XceiverClientManager xceiverClientManager = mock(XceiverClientManager.class); + TokenHelper tokenHelper = new TokenHelper(new SecurityConfig(conf), null); + when(dnClient.getTokenHelper()).thenReturn(tokenHelper); + when(dnClient.getXceiverClientManager()).thenReturn(xceiverClientManager); + final long containerID = 100L; + // Create 3 containers with 10 blocks each and 3 replicas. + List containers = createContainerWithBlocks(kvHandler, containerID, 15, 3); + assertEquals(3, containers.size()); + + // Introduce corruption in each container on different replicas. + introduceCorruption(kvHandler, containers.get(1), numBlocks, numChunks, false); + introduceCorruption(kvHandler, containers.get(2), numBlocks, numChunks, true); + + // Without reconciliation, checksums should be different because of the corruption. + Set checksumsBeforeReconciliation = new HashSet<>(); + for (KeyValueContainer kvContainer : containers) { + kvHandler.createContainerMerkleTree(kvContainer); + Optional containerChecksumInfo = + checksumManager.read(kvContainer.getContainerData()); + assertTrue(containerChecksumInfo.isPresent()); + checksumsBeforeReconciliation.add(containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum()); + } + // There should be more than 1 checksum because of the corruption. + assertTrue(checksumsBeforeReconciliation.size() > 1); + + List datanodes = Lists.newArrayList(randomDatanodeDetails(), randomDatanodeDetails(), + randomDatanodeDetails()); + + // Setup mock for each datanode network calls needed for reconciliation. + try (MockedStatic containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class); + MockedStatic dnClientMock = Mockito.mockStatic(DNContainerOperationClient.class)) { + + for (int i = 0; i < datanodes.size(); i++) { + DatanodeDetails datanode = datanodes.get(i); + KeyValueContainer container = containers.get(i); + + Pipeline pipeline = mock(Pipeline.class); + XceiverClientSpi client = mock(XceiverClientSpi.class); + + dnClientMock.when(() -> DNContainerOperationClient.createSingleNodePipeline(datanode)).thenReturn(pipeline); + when(xceiverClientManager.acquireClient(pipeline)).thenReturn(client); + doNothing().when(xceiverClientManager).releaseClient(eq(client), anyBoolean()); + when(client.getPipeline()).thenReturn(pipeline); + + // Mock checksum info + when(dnClient.getContainerChecksumInfo(containerID, datanode)) + .thenReturn(checksumManager.read(container.getContainerData()).get()); + + // Mock getBlock + containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(eq(client), any(), any(), anyMap())) + .thenAnswer(inv -> ContainerProtos.GetBlockResponseProto.newBuilder() + .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(1)).getProtoBufMessage()) + .build()); + + // Mock readChunk + containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(eq(client), any(), any(), any(), any())) + .thenAnswer(inv -> createReadChunkResponse(inv, container, kvHandler)); + } + + kvHandler.reconcileContainer(dnClient, containers.get(0), Sets.newHashSet(datanodes)); + kvHandler.reconcileContainer(dnClient, containers.get(1), Sets.newHashSet(datanodes)); + kvHandler.reconcileContainer(dnClient, containers.get(2), Sets.newHashSet(datanodes)); + + // After reconciliation, checksums should be the same for all containers. + ContainerProtos.ContainerChecksumInfo prevContainerChecksumInfo = null; + for (KeyValueContainer kvContainer : containers) { + kvHandler.createContainerMerkleTree(kvContainer); + Optional containerChecksumInfo = + checksumManager.read(kvContainer.getContainerData()); + assertTrue(containerChecksumInfo.isPresent()); + if (prevContainerChecksumInfo != null) { + assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), + containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum()); + } + prevContainerChecksumInfo = containerChecksumInfo.get(); + } + } + } + + // Helper method to create readChunk responses + private ContainerProtos.ReadChunkResponseProto createReadChunkResponse(InvocationOnMock inv, + KeyValueContainer container, + KeyValueHandler kvHandler) throws IOException { + ContainerProtos.DatanodeBlockID blockId = inv.getArgument(2); + ContainerProtos.ChunkInfo chunkInfo = inv.getArgument(1); + return ContainerProtos.ReadChunkResponseProto.newBuilder() + .setBlockID(blockId) + .setChunkData(chunkInfo) + .setData(kvHandler.getChunkManager().readChunk(container, BlockID.getFromProtobuf(blockId), + ChunkInfo.getFromProtoBuf(chunkInfo), null).toByteString()) + .build(); + } + @Test public void testGetContainerChecksumInfoOnInvalidContainerStates() { when(handler.handleGetContainerChecksumInfo(any(), any())).thenCallRealMethod(); @@ -551,7 +719,7 @@ public void testDeleteContainerTimeout() throws IOException { final long containerID = 1L; final String clusterId = UUID.randomUUID().toString(); final String datanodeId = UUID.randomUUID().toString(); - final ConfigurationSource conf = new OzoneConfiguration(); + conf = new OzoneConfiguration(); final ContainerSet containerSet = new ContainerSet(1000); final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); final Clock clock = mock(Clock.class); @@ -619,24 +787,174 @@ private static ContainerCommandRequestProto createContainerRequest( .build(); } - private KeyValueHandler createKeyValueHandler() throws IOException { - final String clusterId = UUID.randomUUID().toString(); - final String datanodeId = UUID.randomUUID().toString(); - final ConfigurationSource conf = new OzoneConfiguration(); + private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final ContainerSet containerSet = new ContainerSet(1000); final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); - HddsVolume hddsVolume = new HddsVolume.Builder(tempDir.toString()).conf(conf) - .clusterID(clusterId).datanodeUuid(datanodeId) + HddsVolume hddsVolume = new HddsVolume.Builder(path.toString()).conf(conf) + .clusterID(CLUSTER_ID).datanodeUuid(DATANODE_UUID) .volumeSet(volumeSet) .build(); - hddsVolume.format(clusterId); - hddsVolume.createWorkingDir(clusterId, null); - hddsVolume.createTmpDirs(clusterId); + hddsVolume.format(CLUSTER_ID); + hddsVolume.createWorkingDir(CLUSTER_ID, null); + hddsVolume.createTmpDirs(CLUSTER_ID); when(volumeSet.getVolumesList()).thenReturn(Collections.singletonList(hddsVolume)); final KeyValueHandler kvHandler = ContainerTestUtils.getKeyValueHandler(conf, - datanodeId, containerSet, volumeSet); - kvHandler.setClusterID(clusterId); + DATANODE_UUID, containerSet, volumeSet); + kvHandler.setClusterID(CLUSTER_ID); return kvHandler; } + + /** + * Creates a container with normal and deleted blocks. + * First it will insert normal blocks, and then it will insert + * deleted blocks. + */ + protected List createContainerWithBlocks(KeyValueHandler kvHandler, long containerId, + int blocks, int numContainerCopy) + throws Exception { + String strBlock = "block"; + String strChunk = "chunkFile"; + List containers = new ArrayList<>(); + MutableVolumeSet volumeSet = new MutableVolumeSet(DATANODE_UUID, conf, null, + StorageVolume.VolumeType.DATA_VOLUME, null); + createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); + int bytesPerChecksum = 2 * UNIT_LEN; + Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, + bytesPerChecksum); + byte[] chunkData = RandomStringUtils.randomAscii(CHUNK_LEN).getBytes(UTF_8); + ChecksumData checksumData = checksum.computeChecksum(chunkData); + + for (int j = 0; j < numContainerCopy; j++) { + KeyValueContainerData containerData = new KeyValueContainerData(containerId, + ContainerLayoutVersion.FILE_PER_BLOCK, (long) CHUNKS_PER_BLOCK * CHUNK_LEN * blocks, + UUID.randomUUID().toString(), UUID.randomUUID().toString()); + Path kvContainerPath = Files.createDirectory(testRoot.resolve(containerId + "-" + j)); + containerData.setMetadataPath(kvContainerPath.toString()); + containerData.setDbFile(kvContainerPath.toFile()); + + KeyValueContainer container = new KeyValueContainer(containerData, conf); + StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) + .forEach(hddsVolume -> hddsVolume.setDbParentDir(kvContainerPath.toFile())); + container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), UUID.randomUUID().toString()); + assertNotNull(containerData.getChunksPath()); + File chunksPath = new File(containerData.getChunksPath()); + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, 0, 0); + + List chunkList = new ArrayList<>(); + for (int i = 0; i < blocks; i++) { + BlockID blockID = new BlockID(containerId, i); + BlockData blockData = new BlockData(blockID); + + chunkList.clear(); + for (long chunkCount = 0; chunkCount < CHUNKS_PER_BLOCK; chunkCount++) { + String chunkName = strBlock + i + strChunk + chunkCount; + long offset = chunkCount * CHUNK_LEN; + ChunkInfo info = new ChunkInfo(chunkName, offset, CHUNK_LEN); + info.setChecksumData(checksumData); + chunkList.add(info.getProtoBufMessage()); + kvHandler.getChunkManager().writeChunk(container, blockID, info, + ByteBuffer.wrap(chunkData), WRITE_STAGE); + kvHandler.getChunkManager().writeChunk(container, blockID, info, + ByteBuffer.wrap(chunkData), COMMIT_STAGE); + } + blockData.setChunks(chunkList); + blockData.setBlockCommitSequenceId(i); + kvHandler.getBlockManager().putBlock(container, blockData); + } + + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, blocks * CHUNKS_PER_BLOCK); + container.close(); + kvHandler.closeContainer(container); + containers.add(container); + } + + return containers; + } + + private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer keyValueContainer, int numBlocks, + int numChunks, boolean reverse) throws IOException { + Random random = new Random(); + KeyValueContainerData containerData = keyValueContainer.getContainerData(); + // Simulate missing blocks + try (DBHandle handle = BlockUtils.getDB(containerData, conf); + BatchOperation batch = handle.getStore().getBatchHandler().initBatchOperation()) { + List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); + int size = blockDataList.size(); + for (int i = 0; i < numBlocks; i++) { + BlockData blockData = reverse ? blockDataList.get(size - 1 - i) : blockDataList.get(i); + File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); + Assertions.assertTrue(blockFile.delete()); + handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); + } + handle.getStore().getBatchHandler().commitBatchOperation(batch); + } + Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); + kvHandler.createContainerMerkleTree(keyValueContainer); + + // Corrupt chunks at an offset. + List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); + int size = blockDataList.size(); + for (int i = 0; i < numChunks; i++) { + int blockIndex = reverse ? size - 1 - (i % size) : i % size; + BlockData blockData = blockDataList.get(blockIndex); + int chunkIndex = i / size; + File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); + List chunks = new ArrayList<>(blockData.getChunks()); + ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); + corruptFileAtOffset(blockFile, (int) chunkInfo.getOffset(), (int) chunkInfo.getLen()); + + // TODO: On-demand scanner should detect this corruption and generate container merkle tree. + ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() + .read(containerData).get().toBuilder(); + List blockMerkleTreeList = builder.getContainerMerkleTree() + .getBlockMerkleTreeList(); + assertEquals(size, blockMerkleTreeList.size()); + + builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); + for (int j = 0; j < blockMerkleTreeList.size(); j++) { + ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); + if (j == blockIndex) { + List chunkMerkleTreeBuilderList = + blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); + chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); + blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); + } + builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); + } + builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); + Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); + writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); + } + } + + /** + * Overwrite the file with random bytes at an offset within the given length. + */ + public static void corruptFileAtOffset(File file, int offset, int chunkLength) { + try { + final int fileLength = (int) file.length(); + assertTrue(fileLength >= offset + chunkLength); + final int chunkEnd = offset + chunkLength; + + Path path = file.toPath(); + final byte[] original = IOUtils.readFully(Files.newInputStream(path), fileLength); + + // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. + final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); + corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); + final int chunkMid = offset + (chunkLength - offset) / 2; + corruptedBytes[chunkMid / 2] = (byte) (original[chunkMid / 2] << 1); + + Files.write(path, corruptedBytes, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + + assertThat(IOUtils.readFully(Files.newInputStream(path), fileLength)) + .isEqualTo(corruptedBytes) + .isNotEqualTo(original); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index dbed040e4a39..593ae8cb61bf 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -61,6 +61,7 @@ import java.nio.file.StandardOpenOption; import java.util.List; import java.util.Properties; +import java.util.Random; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -475,6 +476,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { // 3. Set Unhealthy for first chunk of all blocks. This should be done by the scanner, Until then this is a // manual step. + Random random = new Random(); ContainerProtos.ContainerChecksumInfo.Builder builder = containerChecksumAfterChunkCorruption.toBuilder(); List blockMerkleTreeList = builder.getContainerMerkleTree() .getBlockMerkleTreeList(); @@ -483,9 +485,11 @@ public void testContainerChecksumChunkCorruption() throws Exception { ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTree.toBuilder(); List chunkMerkleTreeBuilderList = blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); - chunkMerkleTreeBuilderList.get(0).setIsHealthy(false); + chunkMerkleTreeBuilderList.get(0).setIsHealthy(false).setDataChecksum(random.nextLong()); + blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); } + builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); From 89a98484ce37f21e8caa244c6d4da18d3e39cfa1 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 10 Mar 2025 13:38:24 -0700 Subject: [PATCH 10/21] Fix findbugs. --- .../ozone/container/keyvalue/TestKeyValueHandler.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 934750941b26..d45f02e40893 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -863,7 +863,7 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa kvHandler.getBlockManager().putBlock(container, blockData); } - ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, blocks * CHUNKS_PER_BLOCK); + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); container.close(); kvHandler.closeContainer(container); containers.add(container); @@ -942,9 +942,9 @@ public static void corruptFileAtOffset(File file, int offset, int chunkLength) { // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); - corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); + corruptedBytes[chunkEnd - 1] = (byte) ((original[chunkEnd - 1] << 1) & 0xFF); final int chunkMid = offset + (chunkLength - offset) / 2; - corruptedBytes[chunkMid / 2] = (byte) (original[chunkMid / 2] << 1); + corruptedBytes[chunkMid / 2] = (byte) ((original[chunkMid / 2] << 1) & 0xFF); Files.write(path, corruptedBytes, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); From 2b272e7efc796c8125ee885d5d3fd174fec8a609 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 10 Mar 2025 14:32:42 -0700 Subject: [PATCH 11/21] Fix findbugs. --- .../ozone/container/keyvalue/TestKeyValueHandler.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index d45f02e40893..80ea74ab3bba 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -942,9 +942,10 @@ public static void corruptFileAtOffset(File file, int offset, int chunkLength) { // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); - corruptedBytes[chunkEnd - 1] = (byte) ((original[chunkEnd - 1] << 1) & 0xFF); - final int chunkMid = offset + (chunkLength - offset) / 2; - corruptedBytes[chunkMid / 2] = (byte) ((original[chunkMid / 2] << 1) & 0xFF); + corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); + final long chunkMid = offset + ((long) chunkLength - offset) / 2; + corruptedBytes[(int) (chunkMid / 2)] = (byte) (original[(int) (chunkMid / 2)] << 1); + Files.write(path, corruptedBytes, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); From 75700050e7bcb43fc435253084cb45275a151e54 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Tue, 18 Mar 2025 15:38:38 +0530 Subject: [PATCH 12/21] Address review comments. --- .../statemachine/DatanodeStateMachine.java | 8 +--- .../common/utils/ContainerLogger.java | 2 +- .../container/keyvalue/KeyValueHandler.java | 45 +++++++++++-------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java index 251efaee1a04..127588cf0602 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java @@ -132,7 +132,6 @@ public class DatanodeStateMachine implements Closeable { private final DatanodeQueueMetrics queueMetrics; private final ReconfigurationHandler reconfigurationHandler; - private final DNContainerOperationClient dnClient; /** * Constructs a datanode state machine. * @param datanodeDetails - DatanodeDetails used to identify a datanode @@ -230,7 +229,7 @@ public DatanodeStateMachine(HddsDatanodeService hddsDatanodeService, // TODO HDDS-11218 combine the clients used for reconstruction and reconciliation so they share the same cache of // datanode clients. - dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient); + DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, certClient, secretKeyClient); ThreadFactory threadFactory = new ThreadFactoryBuilder() .setNameFormat(threadNamePrefix + "PipelineCommandHandlerThread-%d") @@ -753,9 +752,4 @@ public DatanodeQueueMetrics getQueueMetrics() { public ReconfigurationHandler getReconfigurationHandler() { return reconfigurationHandler; } - - @VisibleForTesting - public DNContainerOperationClient getDnContainerOperationClientClient() { - return dnClient; - } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java index 38d6d6b5d6f1..6f20f22a8bb3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java @@ -157,7 +157,7 @@ public static void logRecovered(ContainerData containerData) { public static void logReconciled(ContainerData containerData, long oldDataChecksum, DatanodeDetails peer) { if (containerData.getDataChecksum() == oldDataChecksum) { LOG.info(getMessage(containerData, "Container reconciled with peer " + peer.toString() + - ". No change in checksum. Current checksum is " + checksumToString(containerData.getDataChecksum()))); + ". No change in checksum.")); } else { LOG.warn(getMessage(containerData, "Container reconciled with peer " + peer.toString() + ". Checksum updated from " + checksumToString(oldDataChecksum) + " to " diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a6ef20d0553f..58c3b3c8cf63 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS; +import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_CHECKSUM_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER; @@ -1503,9 +1504,8 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Try creating the checksum info from RocksDB metadata if it is not present. checksumInfo = createContainerMerkleTree(container); if (checksumInfo == null) { - LOG.error("Failed to reconcile container {} as checksum info is not available", - containerData.getContainerID()); - return; + throw new StorageContainerException("Failed to reconcile container " + containerData.getContainerID() + + " as checksum info is not available", CONTAINER_CHECKSUM_ERROR); } oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } @@ -1529,7 +1529,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle missing blocks for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { try { - handleMissingBlock(kvContainer, containerData, tokenHelper, xceiverClient, missingBlock); + handleMissingBlock(kvContainer, tokenHelper, xceiverClient, missingBlock); } catch (IOException e) { LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), containerData.getContainerID(), e); @@ -1539,7 +1539,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), + reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), @@ -1550,26 +1550,27 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, containerData, tokenHelper, xceiverClient, entry.getKey(), + reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, entry.getKey(), entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), containerData.getContainerID(), e); } } - // Update checksum based on RocksDB metadata + // Update checksum based on RocksDB metadata, The read chunk validates the checksum of the data + // we read. So we can update the checksum only based on the RocksDB metadata. ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateContainerChecksum(containerData); dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); long duration = Duration.between(start, Instant.now()).toMillis(); if (dataChecksum == oldDataChecksum) { metrics.incContainerReconciledWithoutChanges(); - LOG.info("Container {} reconciled without changes, Current checksum {}. Time taken {} ms", - containerData.getContainerID(), checksumToString(dataChecksum), duration); + LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", + containerData.getContainerID(), peer.toString(), checksumToString(dataChecksum), duration); } else { metrics.incContainerReconciledWithChanges(); - LOG.warn("Container {} reconciled, Checksum updated from {} to {}. Time taken {} ms", - containerData.getContainerID(), checksumToString(oldDataChecksum), + LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", + containerData.getContainerID(), peer.toString(), checksumToString(oldDataChecksum), checksumToString(dataChecksum), duration); } ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); @@ -1583,7 +1584,10 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container sendICR(container); } - // Return the entire tree instead of just the checksum + /** + * Updates the container merkle tree based on the RocksDb's block metadata and returns the updated checksum info. + * @param containerData - Container data for which the container merkle tree needs to be updated. + */ private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueContainerData containerData) throws IOException { ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); @@ -1606,14 +1610,15 @@ private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueCo * Handle missing block. It reads the missing block data from the peer datanode and writes it to the local container. * If the block write fails, the block commit sequence id is not updated. */ - private void handleMissingBlock(KeyValueContainer container, ContainerData containerData, TokenHelper tokenHelper, + private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHelper, XceiverClientSpi xceiverClient, ContainerProtos.BlockMerkleTree missingBlock) throws IOException { + ContainerData containerData = container.getContainerData(); BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); - // TODO: Re-use the blockResponse for the same block again. + // TODO: Re-use the blockResponse for the same block again. https://issues.apache.org/jira/browse/HDDS-12623 ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); @@ -1622,7 +1627,7 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta // Check the local bcsId with the one from the bcsId from the peer datanode. long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), bcsId); List peerChunksList = peerBlockData.getChunksList(); - List successfullChunksList = new ArrayList<>(); + List successfulChunksList = new ArrayList<>(); // Update BcsId only if all chunks are successfully written. boolean overwriteBcsId = true; @@ -1634,7 +1639,7 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); - successfullChunksList.add(chunkInfoProto); + successfulChunksList.add(chunkInfoProto); } catch (IOException ex) { overwriteBcsId = false; LOG.error("Error while reconciling missing block {} for offset {} in container {}", @@ -1643,7 +1648,7 @@ private void handleMissingBlock(KeyValueContainer container, ContainerData conta } BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); - putBlockData.setChunks(successfullChunksList); + putBlockData.setChunks(successfulChunksList); putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); } @@ -1667,15 +1672,17 @@ private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos * datanode and writes it to the local container. If the chunk write fails, the block commit sequence * id is not updated. */ - private void reconcileChunksPerBlock(KeyValueContainer container, ContainerData containerData, - TokenHelper tokenHelper, XceiverClientSpi xceiverClient, long blockId, + private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper tokenHelper, + XceiverClientSpi xceiverClient, long blockId, List chunkList) throws IOException { + ContainerData containerData = container.getContainerData(); Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) .collect(Collectors.toSet()); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); + // TODO: Re-use the blockResponse for the same block again. https://issues.apache.org/jira/browse/HDDS-12623 ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); From 47b5fefea839d710b9d3b5f6bf968f6dcdafb00d Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 20 Mar 2025 13:49:20 +0530 Subject: [PATCH 13/21] Use BlockInputStream to read data. --- .../hdds/scm/storage/BlockInputStream.java | 4 +- .../hdds/scm/storage/ChunkInputStream.java | 6 +- .../hadoop/hdds/client/ReplicationConfig.java | 6 + .../checksum/DNContainerOperationClient.java | 2 +- .../container/keyvalue/KeyValueHandler.java | 189 +++++++++++------- .../keyvalue/TestKeyValueHandler.java | 17 +- 6 files changed, 145 insertions(+), 79 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java index 4867a2aa69a7..8ed099f53cb4 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java @@ -554,8 +554,7 @@ public long getLength() { return length; } - @VisibleForTesting - synchronized int getChunkIndex() { + public synchronized int getChunkIndex() { return chunkIndex; } @@ -618,7 +617,6 @@ private void handleReadError(IOException cause) throws IOException { refreshBlockInfo(cause); } - @VisibleForTesting public synchronized List getChunkStreams() { return chunkStreams; } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java index 23c96fc7d6a7..ca7d15342e78 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java @@ -727,7 +727,7 @@ String getChunkName() { return chunkInfo.getChunkName(); } - protected long getLength() { + public long getLength() { return length; } @@ -747,4 +747,8 @@ public synchronized void unbuffer() { public ByteBuffer[] getCachedBuffers() { return BufferUtils.getReadOnlyByteBuffers(buffers); } + + public ChunkInfo getChunkInfo() { + return chunkInfo; + } } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java index 4bd470971633..33f545b40ed3 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java @@ -112,6 +112,12 @@ static HddsProtos.ReplicationFactor getLegacyFactor( return ((ReplicatedReplicationConfig) replicationConfig) .getReplicationFactor(); } + + if (replicationConfig instanceof StandaloneReplicationConfig) { + return ((StandaloneReplicationConfig) replicationConfig) + .getReplicationFactor(); + } + throw new UnsupportedOperationException( "Replication configuration of type " + replicationConfig.getReplicationType() diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java index d5ba243dd12a..2310e7d61fe7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java @@ -65,7 +65,7 @@ public DNContainerOperationClient(ConfigurationSource conf, } @Nonnull - private static XceiverClientManager createClientManager( + public static XceiverClientManager createClientManager( ConfigurationSource conf, CertificateClient certificateClient) throws IOException { ClientTrustManager trustManager = null; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 58c3b3c8cf63..8f132f850300 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -88,6 +88,7 @@ import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; @@ -101,10 +102,16 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.PutSmallFileRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.WriteChunkRequestProto; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ByteStringConversion; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.storage.BlockInputStream; +import org.apache.hadoop.hdds.scm.storage.BlockLocationInfo; +import org.apache.hadoop.hdds.scm.storage.ChunkInputStream; import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; @@ -112,6 +119,7 @@ import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.ozone.OzoneConfigKeys; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.client.io.BlockInputStreamFactoryImpl; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChunkBuffer; import org.apache.hadoop.ozone.common.ChunkBufferToByteString; @@ -179,6 +187,7 @@ public class KeyValueHandler extends Handler { private final ContainerChecksumTreeManager checksumManager; private static FaultInjector injector; private final Clock clock; + private final BlockInputStreamFactoryImpl blockInputStreamFactory; public KeyValueHandler(ConfigurationSource config, String datanodeId, @@ -240,6 +249,8 @@ public KeyValueHandler(ConfigurationSource config, ByteStringConversion .createByteBufferConversion(isUnsafeByteBufferConversionEnabled); + blockInputStreamFactory = new BlockInputStreamFactoryImpl(); + if (ContainerLayoutVersion.getConfiguredVersion(conf) == ContainerLayoutVersion.FILE_PER_CHUNK) { LOG.warn("FILE_PER_CHUNK layout is not supported. Falling back to default : {}.", @@ -616,19 +627,19 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } - public ContainerProtos.ContainerChecksumInfo createContainerMerkleTree(Container container) { + public Optional createContainerMerkleTree(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { - return null; + return Optional.empty(); } try { KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - return updateContainerChecksum(containerData); + return Optional.of(updateContainerChecksum(containerData)); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); } - return null; + return Optional.empty(); } /** @@ -1502,11 +1513,12 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - checksumInfo = createContainerMerkleTree(container); - if (checksumInfo == null) { + optionalChecksumInfo = createContainerMerkleTree(container); + if (!optionalChecksumInfo.isPresent()) { throw new StorageContainerException("Failed to reconcile container " + containerData.getContainerID() + " as checksum info is not available", CONTAINER_CHECKSUM_ERROR); } + checksumInfo = optionalChecksumInfo.get(); oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } @@ -1522,14 +1534,15 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container ContainerDiffReport diffReport = checksumManager.diff(checksumInfo, peerChecksumInfo); TokenHelper tokenHelper = dnClient.getTokenHelper(); + Pipeline pipeline = createSingleNodePipeline(peer); XceiverClientSpi xceiverClient = dnClient.getXceiverClientManager() - .acquireClient(createSingleNodePipeline(peer)); + .acquireClient(pipeline); try { // Handle missing blocks for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { try { - handleMissingBlock(kvContainer, tokenHelper, xceiverClient, missingBlock); + handleMissingBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, missingBlock); } catch (IOException e) { LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), containerData.getContainerID(), e); @@ -1539,7 +1552,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, entry.getKey(), + reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, entry.getKey(), entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), @@ -1550,7 +1563,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, entry.getKey(), + reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, entry.getKey(), entry.getValue()); } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), @@ -1611,7 +1624,8 @@ private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueCo * If the block write fails, the block commit sequence id is not updated. */ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHelper, - XceiverClientSpi xceiverClient, ContainerProtos.BlockMerkleTree missingBlock) + XceiverClientSpi xceiverClient, Pipeline pipeline, + DNContainerOperationClient dnClient, ContainerProtos.BlockMerkleTree missingBlock) throws IOException { ContainerData containerData = container.getContainerData(); BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); @@ -1622,48 +1636,57 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, blockToken, new HashMap<>()); ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); - long bcsId = getBlockManager().blockExists(container, blockID) ? - getBlockManager().getBlock(container, blockID).getBlockCommitSequenceId() : 0; - // Check the local bcsId with the one from the bcsId from the peer datanode. - long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), bcsId); + if (getBlockManager().blockExists(container, blockID)) { + LOG.warn("Block {} already exists in container {}. Skipping reconciliation for block.", blockID, + containerData.getContainerID()); + return; + } + + // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. + long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); List peerChunksList = peerBlockData.getChunksList(); List successfulChunksList = new ArrayList<>(); // Update BcsId only if all chunks are successfully written. boolean overwriteBcsId = true; - // Don't update bcsId if chunk read fails - for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { - try { - ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); - chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); - successfulChunksList.add(chunkInfoProto); - } catch (IOException ex) { - overwriteBcsId = false; - LOG.error("Error while reconciling missing block {} for offset {} in container {}", - blockID, chunkInfoProto.getOffset(), containerData.getContainerID(), ex); - } - } - - BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); - putBlockData.setChunks(successfulChunksList); - putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); - } - private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos.ChunkInfo chunkInfoProto, - BlockID blockID, Token blockToken) throws IOException { - ContainerProtos.ReadChunkResponseProto response = - ContainerProtocolCalls.readChunk(xceiverClient, chunkInfoProto, blockID.getDatanodeBlockIDProtobuf(), - null, blockToken); + BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() + .setBlockID(blockID) + .setLength(peerBlockData.getSize()) + .setPipeline(pipeline) + .setToken(blockToken) + .build(); + try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( + RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), + blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), + null, conf.getObject(OzoneClientConfig.class))) { + // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk + blockInputStream.initialize(); + + // Don't update bcsId if chunk read fails + for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { + try { + // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. + blockInputStream.seek(chunkInfoProto.getOffset()); + + // Read the chunk data from the BlockInputStream and write it to the container. + byte[] chunkData = new byte[(int) chunkInfoProto.getLen()]; + blockInputStream.read(chunkData, 0, (int) chunkInfoProto.getLen()); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); + writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + successfulChunksList.add(chunkInfoProto); + } catch (IOException ex) { + overwriteBcsId = false; + LOG.error("Error while reconciling missing block {} for offset {} in container {}", + blockID, chunkInfoProto.getOffset(), containerData.getContainerID(), ex); + } + } - if (response.hasData()) { - return response.getData(); - } else if (response.hasDataBuffers()) { - return BufferUtils.concatByteStrings(response.getDataBuffers().getBuffersList()); - } else { - throw new IOException("Error reading chunk data: No data returned."); + BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); + putBlockData.setChunks(successfulChunksList); + putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); } } @@ -1673,11 +1696,12 @@ private ByteString readChunkData(XceiverClientSpi xceiverClient, ContainerProtos * id is not updated. */ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper tokenHelper, - XceiverClientSpi xceiverClient, long blockId, + XceiverClientSpi xceiverClient, Pipeline pipeline, + DNContainerOperationClient dnClient, long blockId, List chunkList) throws IOException { ContainerData containerData = container.getContainerData(); - Set offsets = chunkList.stream().map(ContainerProtos.ChunkMerkleTree::getOffset) - .collect(Collectors.toSet()); + Map offsetLengthMap = chunkList.stream().collect(Collectors.toMap( + ContainerProtos.ChunkMerkleTree::getOffset, ContainerProtos.ChunkMerkleTree::getLength)); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. @@ -1689,37 +1713,56 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to BlockData localBlockData = getBlockManager().getBlock(container, blockID); // Check the local bcsId with the one from the bcsId from the peer datanode. long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), - localBlockData.getBlockCommitSequenceId()); - List chunksListFromPeer = peerBlockData.getChunksList(); + localBlockData.getBlockCommitSequenceId()); SortedMap localChunksMap = localBlockData.getChunks().stream() - .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, - Function.identity(), (chunk1, chunk2) -> chunk1, TreeMap::new)); + .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, + Function.identity(), (chunk1, chunk2) -> chunk1, TreeMap::new)); boolean overwriteBcsId = true; - for (ContainerProtos.ChunkInfo chunkInfoProto : chunksListFromPeer) { - try { - if (!offsets.contains(chunkInfoProto.getOffset())) { - continue; + BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() + .setBlockID(blockID) + .setLength(peerBlockData.getSize()) + .setPipeline(pipeline) + .setToken(blockToken) + .build(); + try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( + RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), + blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), + null, conf.getObject(OzoneClientConfig.class))) { + // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk + blockInputStream.initialize(); + + for (Long offset : offsetLengthMap.keySet()) { + try { + // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. + blockInputStream.seek(offset); + ChunkInputStream currentChunkStream = blockInputStream.getChunkStreams().get( + blockInputStream.getChunkIndex()); + ContainerProtos.ChunkInfo chunkInfoProto = currentChunkStream.getChunkInfo(); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); + + // Verify the chunk offset and length. + verifyChunksLength(chunkInfoProto, localChunksMap.get(offset)); + + // Read the chunk data from the block input stream and write it to the container. + byte[] chunkData = new byte[offsetLengthMap.get(offset).intValue()]; + blockInputStream.read(chunkData, offset.intValue(), offsetLengthMap.get(offset).intValue()); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); + writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); + } catch (IOException ex) { + overwriteBcsId = false; + LOG.error("Error while reconciling chunk {} for block {} in container {}", + offset, blockID, containerData.getContainerID(), ex); } - - verifyChunksLength(chunkInfoProto, localChunksMap.get(chunkInfoProto.getOffset())); - ByteString chunkData = readChunkData(xceiverClient, chunkInfoProto, blockID, blockToken); - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkData.asReadOnlyByteBuffer()); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); - chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); - localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); - } catch (IOException ex) { - overwriteBcsId = false; - LOG.error("Error while reconciling chunk {} for block {} in container {}", - chunkInfoProto.getOffset(), blockID, containerData.getContainerID(), ex); } - } - List localChunkList = new ArrayList<>(localChunksMap.values()); - localBlockData.setChunks(localChunkList); - putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); + List localChunkList = new ArrayList<>(localChunksMap.values()); + localBlockData.setChunks(localChunkList); + putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); + } } private void verifyChunksLength(ContainerProtos.ChunkInfo peerChunkInfo, ContainerProtos.ChunkInfo localChunkInfo) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 80ea74ab3bba..8d8da1423594 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -53,6 +53,7 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; @@ -81,6 +82,7 @@ import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -88,6 +90,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientSpi; @@ -628,12 +631,19 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws DatanodeDetails datanode = datanodes.get(i); KeyValueContainer container = containers.get(i); - Pipeline pipeline = mock(Pipeline.class); + Pipeline pipeline = Pipeline.newBuilder() + .setNodes(ImmutableList.of(datanode)) + .setId(PipelineID.valueOf(datanode.getUuid())) + .setState(Pipeline.PipelineState.CLOSED) + .setReplicationConfig(StandaloneReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.ONE)).build(); XceiverClientSpi client = mock(XceiverClientSpi.class); dnClientMock.when(() -> DNContainerOperationClient.createSingleNodePipeline(datanode)).thenReturn(pipeline); when(xceiverClientManager.acquireClient(pipeline)).thenReturn(client); + when(xceiverClientManager.acquireClientForReadData(pipeline)).thenReturn(client); doNothing().when(xceiverClientManager).releaseClient(eq(client), anyBoolean()); + doNothing().when(xceiverClientManager).releaseClientForReadData(eq(client), anyBoolean()); when(client.getPipeline()).thenReturn(pipeline); // Mock checksum info @@ -646,6 +656,11 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(1)).getProtoBufMessage()) .build()); + containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(eq(client), any(), any(), any(), anyMap())) + .thenAnswer(inv -> ContainerProtos.GetBlockResponseProto.newBuilder() + .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)).getProtoBufMessage()) + .build()); + // Mock readChunk containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(eq(client), any(), any(), any(), any())) .thenAnswer(inv -> createReadChunkResponse(inv, container, kvHandler)); From caffe21c0b5acd1786b7150e828a926ecaeac52f Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 20 Mar 2025 15:23:31 +0530 Subject: [PATCH 14/21] Fix findbugs --- .../container/keyvalue/KeyValueHandler.java | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 8f132f850300..13838e1bf458 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1670,8 +1670,14 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe blockInputStream.seek(chunkInfoProto.getOffset()); // Read the chunk data from the BlockInputStream and write it to the container. - byte[] chunkData = new byte[(int) chunkInfoProto.getLen()]; - blockInputStream.read(chunkData, 0, (int) chunkInfoProto.getLen()); + int chunkLength = (int) chunkInfoProto.getLen(); + byte[] chunkData = new byte[chunkLength]; + int bytesRead = blockInputStream.read(chunkData, 0, chunkLength); + if (bytesRead != chunkLength) { + throw new IOException("Error while reading chunk data from block input stream. Expected length: " + + chunkLength + ", Actual length: " + bytesRead); + } + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); @@ -1733,7 +1739,9 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk blockInputStream.initialize(); - for (Long offset : offsetLengthMap.keySet()) { + for (Map.Entry offsetLength : offsetLengthMap.entrySet()) { + Long offset = offsetLength.getKey(); + Long length = offsetLength.getValue(); try { // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. blockInputStream.seek(offset); @@ -1747,8 +1755,13 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to verifyChunksLength(chunkInfoProto, localChunksMap.get(offset)); // Read the chunk data from the block input stream and write it to the container. - byte[] chunkData = new byte[offsetLengthMap.get(offset).intValue()]; - blockInputStream.read(chunkData, offset.intValue(), offsetLengthMap.get(offset).intValue()); + byte[] chunkData = new byte[length.intValue()]; + int bytesRead = blockInputStream.read(chunkData, 0, length.intValue()); + if (bytesRead != length) { + throw new IOException("Error while reading chunk data from block input stream. Expected length: " + + length + ", Actual length: " + bytesRead); + } + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); From 12b94438a8402f1cb41b858e6fb95cb415e382cb Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 20 Mar 2025 17:26:16 +0530 Subject: [PATCH 15/21] Use existing blockData from BlockInputStream --- .../hdds/scm/storage/BlockInputStream.java | 7 +- .../container/keyvalue/KeyValueHandler.java | 133 ++++++++---------- .../keyvalue/TestKeyValueHandler.java | 7 - 3 files changed, 68 insertions(+), 79 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java index 8ed099f53cb4..c9fe4ec67971 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java @@ -112,6 +112,8 @@ public class BlockInputStream extends BlockExtendedInputStream { private final Function refreshFunction; + private BlockData blockData; + public BlockInputStream( BlockLocationInfo blockInfo, Pipeline pipeline, @@ -153,7 +155,6 @@ public synchronized void initialize() throws IOException { return; } - BlockData blockData = null; List chunks = null; IOException catchEx = null; do { @@ -621,4 +622,8 @@ public synchronized List getChunkStreams() { return chunkStreams; } + public BlockData getStreamBlockData() { + return blockData; + } + } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 13838e1bf458..d5e68b88ff66 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -106,13 +106,11 @@ import org.apache.hadoop.hdds.scm.ByteStringConversion; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.BlockInputStream; import org.apache.hadoop.hdds.scm.storage.BlockLocationInfo; import org.apache.hadoop.hdds.scm.storage.ChunkInputStream; -import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier; import org.apache.hadoop.hdds.upgrade.HDDSLayoutFeature; import org.apache.hadoop.hdds.utils.FaultInjector; @@ -1535,61 +1533,55 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container ContainerDiffReport diffReport = checksumManager.diff(checksumInfo, peerChecksumInfo); TokenHelper tokenHelper = dnClient.getTokenHelper(); Pipeline pipeline = createSingleNodePipeline(peer); - XceiverClientSpi xceiverClient = dnClient.getXceiverClientManager() - .acquireClient(pipeline); - try { - // Handle missing blocks - for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { - try { - handleMissingBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, missingBlock); - } catch (IOException e) { - LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), - containerData.getContainerID(), e); - } + // Handle missing blocks + for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { + try { + handleMissingBlock(kvContainer, tokenHelper, pipeline, dnClient, missingBlock); + } catch (IOException e) { + LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), + containerData.getContainerID(), e); } + } - // Handle missing chunks - for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { - try { - reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, entry.getKey(), - entry.getValue()); - } catch (IOException e) { - LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); - } + // Handle missing chunks + for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { + try { + reconcileChunksPerBlock(kvContainer, tokenHelper, pipeline, dnClient, entry.getKey(), + entry.getValue()); + } catch (IOException e) { + LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), + containerData.getContainerID(), e); } + } - // Handle corrupt chunks - for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { - try { - reconcileChunksPerBlock(kvContainer, tokenHelper, xceiverClient, pipeline, dnClient, entry.getKey(), - entry.getValue()); - } catch (IOException e) { - LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); - } - } - // Update checksum based on RocksDB metadata, The read chunk validates the checksum of the data - // we read. So we can update the checksum only based on the RocksDB metadata. - ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateContainerChecksum(containerData); - dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); - - long duration = Duration.between(start, Instant.now()).toMillis(); - if (dataChecksum == oldDataChecksum) { - metrics.incContainerReconciledWithoutChanges(); - LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", - containerData.getContainerID(), peer.toString(), checksumToString(dataChecksum), duration); - } else { - metrics.incContainerReconciledWithChanges(); - LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", - containerData.getContainerID(), peer.toString(), checksumToString(oldDataChecksum), - checksumToString(dataChecksum), duration); + // Handle corrupt chunks + for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { + try { + reconcileChunksPerBlock(kvContainer, tokenHelper, pipeline, dnClient, entry.getKey(), + entry.getValue()); + } catch (IOException e) { + LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), + containerData.getContainerID(), e); } - ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); - } finally { - dnClient.getXceiverClientManager().releaseClient(xceiverClient, false); } + // Update checksum based on RocksDB metadata, The read chunk validates the checksum of the data + // we read. So we can update the checksum only based on the RocksDB metadata. + ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateContainerChecksum(containerData); + dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); + + long duration = Duration.between(start, Instant.now()).toMillis(); + if (dataChecksum == oldDataChecksum) { + metrics.incContainerReconciledWithoutChanges(); + LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", + containerData.getContainerID(), peer.toString(), checksumToString(dataChecksum), duration); + } else { + metrics.incContainerReconciledWithChanges(); + LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", + containerData.getContainerID(), peer.toString(), checksumToString(oldDataChecksum), + checksumToString(dataChecksum), duration); + } + ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); } // Trigger manual on demand scanner @@ -1624,38 +1616,30 @@ private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueCo * If the block write fails, the block commit sequence id is not updated. */ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHelper, - XceiverClientSpi xceiverClient, Pipeline pipeline, - DNContainerOperationClient dnClient, ContainerProtos.BlockMerkleTree missingBlock) + Pipeline pipeline, DNContainerOperationClient dnClient, + ContainerProtos.BlockMerkleTree missingBlock) throws IOException { ContainerData containerData = container.getContainerData(); BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); - // TODO: Re-use the blockResponse for the same block again. https://issues.apache.org/jira/browse/HDDS-12623 - ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, - blockToken, new HashMap<>()); - ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); if (getBlockManager().blockExists(container, blockID)) { LOG.warn("Block {} already exists in container {}. Skipping reconciliation for block.", blockID, containerData.getContainerID()); return; } - // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. - long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); - List peerChunksList = peerBlockData.getChunksList(); List successfulChunksList = new ArrayList<>(); // Update BcsId only if all chunks are successfully written. boolean overwriteBcsId = true; - BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() .setBlockID(blockID) - .setLength(peerBlockData.getSize()) .setPipeline(pipeline) .setToken(blockToken) .build(); + blkInfo.setUnderConstruction(true); try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), @@ -1663,6 +1647,13 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk blockInputStream.initialize(); + // BlockInputStream#initialize() should be called before this, as it gets the BlockData for the block. + // and sets the block length. + ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); + // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. + long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); + List peerChunksList = peerBlockData.getChunksList(); + // Don't update bcsId if chunk read fails for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { try { @@ -1701,10 +1692,10 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe * datanode and writes it to the local container. If the chunk write fails, the block commit sequence * id is not updated. */ - private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper tokenHelper, - XceiverClientSpi xceiverClient, Pipeline pipeline, + private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper tokenHelper, Pipeline pipeline, DNContainerOperationClient dnClient, long blockId, List chunkList) throws IOException { + ContainerData containerData = container.getContainerData(); Map offsetLengthMap = chunkList.stream().collect(Collectors.toMap( ContainerProtos.ChunkMerkleTree::getOffset, ContainerProtos.ChunkMerkleTree::getLength)); @@ -1712,14 +1703,7 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); - // TODO: Re-use the blockResponse for the same block again. https://issues.apache.org/jira/browse/HDDS-12623 - ContainerProtos.GetBlockResponseProto blockResponse = ContainerProtocolCalls.getBlock(xceiverClient, blockID, - blockToken, new HashMap<>()); - ContainerProtos.BlockData peerBlockData = blockResponse.getBlockData(); BlockData localBlockData = getBlockManager().getBlock(container, blockID); - // Check the local bcsId with the one from the bcsId from the peer datanode. - long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), - localBlockData.getBlockCommitSequenceId()); SortedMap localChunksMap = localBlockData.getChunks().stream() .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, @@ -1728,10 +1712,10 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() .setBlockID(blockID) - .setLength(peerBlockData.getSize()) .setPipeline(pipeline) .setToken(blockToken) .build(); + blkInfo.setUnderConstruction(true); try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), @@ -1739,6 +1723,13 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk blockInputStream.initialize(); + // BlockInputStream#initialize() should be called before this, as it gets the BlockData for the block + // and sets the block length. + ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); + // Check the local bcsId with the one from the bcsId from the peer datanode. + long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), + localBlockData.getBlockCommitSequenceId()); + for (Map.Entry offsetLength : offsetLengthMap.entrySet()) { Long offset = offsetLength.getKey(); Long length = offsetLength.getValue(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 8d8da1423594..d93926ab0334 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -640,9 +640,7 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws XceiverClientSpi client = mock(XceiverClientSpi.class); dnClientMock.when(() -> DNContainerOperationClient.createSingleNodePipeline(datanode)).thenReturn(pipeline); - when(xceiverClientManager.acquireClient(pipeline)).thenReturn(client); when(xceiverClientManager.acquireClientForReadData(pipeline)).thenReturn(client); - doNothing().when(xceiverClientManager).releaseClient(eq(client), anyBoolean()); doNothing().when(xceiverClientManager).releaseClientForReadData(eq(client), anyBoolean()); when(client.getPipeline()).thenReturn(pipeline); @@ -651,11 +649,6 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws .thenReturn(checksumManager.read(container.getContainerData()).get()); // Mock getBlock - containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(eq(client), any(), any(), anyMap())) - .thenAnswer(inv -> ContainerProtos.GetBlockResponseProto.newBuilder() - .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(1)).getProtoBufMessage()) - .build()); - containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(eq(client), any(), any(), any(), anyMap())) .thenAnswer(inv -> ContainerProtos.GetBlockResponseProto.newBuilder() .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)).getProtoBufMessage()) From d03e4d93d18757fc970ac2f3028866e829ebb189 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Tue, 25 Mar 2025 00:39:10 +0530 Subject: [PATCH 16/21] Use ByteBuffer instead of byte array. --- .../container/keyvalue/KeyValueHandler.java | 60 +++++++++++++------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index d5e68b88ff66..a324134ccb7e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -37,6 +37,8 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.PUT_SMALL_FILE_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNCLOSED_CONTAINER_IO; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.UNSUPPORTED_REQUEST; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_DEFAULT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getBlockDataResponse; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getBlockLengthResponse; import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getEchoResponse; @@ -1625,8 +1627,9 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe // is not used to validate the token for getBlock call. Token blockToken = tokenHelper.getBlockToken(blockID, 0L); if (getBlockManager().blockExists(container, blockID)) { - LOG.warn("Block {} already exists in container {}. Skipping reconciliation for block.", blockID, - containerData.getContainerID()); + LOG.warn("Block {} already exists in container {}. This block {} is expected to not exist. The container " + + "merkle tree for container {} is stale. Skipping reconciliation for block.", blockID, + containerData.getContainerID(), blockID, containerData.getContainerID()); return; } @@ -1639,6 +1642,7 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe .setPipeline(pipeline) .setToken(blockToken) .build(); + // Under construction is set here, during BlockInputStream#initialize() it is used to update the block length. blkInfo.setUnderConstruction(true); try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), @@ -1653,6 +1657,9 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); List peerChunksList = peerBlockData.getChunksList(); + int chunkSize = (int) conf.getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, + StorageUnit.BYTES); + ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); // Don't update bcsId if chunk read fails for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { @@ -1662,14 +1669,20 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe // Read the chunk data from the BlockInputStream and write it to the container. int chunkLength = (int) chunkInfoProto.getLen(); - byte[] chunkData = new byte[chunkLength]; - int bytesRead = blockInputStream.read(chunkData, 0, chunkLength); + if (chunkByteBuffer.capacity() < chunkLength) { + chunkByteBuffer = ByteBuffer.allocate(chunkLength); + } + + chunkByteBuffer.clear(); + chunkByteBuffer.limit(chunkLength); + int bytesRead = blockInputStream.read(chunkByteBuffer); if (bytesRead != chunkLength) { throw new IOException("Error while reading chunk data from block input stream. Expected length: " + chunkLength + ", Actual length: " + bytesRead); } - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); + chunkByteBuffer.flip(); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkByteBuffer); ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); @@ -1697,8 +1710,6 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to List chunkList) throws IOException { ContainerData containerData = container.getContainerData(); - Map offsetLengthMap = chunkList.stream().collect(Collectors.toMap( - ContainerProtos.ChunkMerkleTree::getOffset, ContainerProtos.ChunkMerkleTree::getLength)); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. @@ -1715,6 +1726,7 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to .setPipeline(pipeline) .setToken(blockToken) .build(); + // Under construction is set here, during BlockInputStream#initialize() it is used to update the block length. blkInfo.setUnderConstruction(true); try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), @@ -1730,12 +1742,15 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), localBlockData.getBlockCommitSequenceId()); - for (Map.Entry offsetLength : offsetLengthMap.entrySet()) { - Long offset = offsetLength.getKey(); - Long length = offsetLength.getValue(); + int chunkSize = (int) conf.getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, + StorageUnit.BYTES); + ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); + + for (ContainerProtos.ChunkMerkleTree chunkMerkleTree : chunkList) { + long chunkOffset = chunkMerkleTree.getOffset(); try { // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. - blockInputStream.seek(offset); + blockInputStream.seek(chunkOffset); ChunkInputStream currentChunkStream = blockInputStream.getChunkStreams().get( blockInputStream.getChunkIndex()); ContainerProtos.ChunkInfo chunkInfoProto = currentChunkStream.getChunkInfo(); @@ -1743,23 +1758,30 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); // Verify the chunk offset and length. - verifyChunksLength(chunkInfoProto, localChunksMap.get(offset)); + verifyChunksLength(chunkInfoProto, localChunksMap.get(chunkOffset)); - // Read the chunk data from the block input stream and write it to the container. - byte[] chunkData = new byte[length.intValue()]; - int bytesRead = blockInputStream.read(chunkData, 0, length.intValue()); - if (bytesRead != length) { + // Read the chunk data from the BlockInputStream and write it to the container. + int chunkLength = (int) chunkInfoProto.getLen(); + if (chunkByteBuffer.capacity() < chunkLength) { + chunkByteBuffer = ByteBuffer.allocate(chunkLength); + } + + chunkByteBuffer.clear(); + chunkByteBuffer.limit(chunkLength); + int bytesRead = blockInputStream.read(chunkByteBuffer); + if (bytesRead != chunkLength) { throw new IOException("Error while reading chunk data from block input stream. Expected length: " + - length + ", Actual length: " + bytesRead); + chunkLength + ", Actual length: " + bytesRead); } - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(ByteBuffer.wrap(chunkData)); + chunkByteBuffer.flip(); + ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkByteBuffer); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); } catch (IOException ex) { overwriteBcsId = false; LOG.error("Error while reconciling chunk {} for block {} in container {}", - offset, blockID, containerData.getContainerID(), ex); + chunkOffset, blockID, containerData.getContainerID(), ex); } } From 369b24d85d17d45c5b85df23c6500d017e3bd192 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Sat, 5 Apr 2025 00:09:24 +0530 Subject: [PATCH 17/21] Address review comments. --- .../hdds/scm/storage/ChunkInputStream.java | 2 +- .../hadoop/hdds/client/ReplicationConfig.java | 5 - .../ContainerChecksumTreeManager.java | 9 +- .../checksum/DNContainerOperationClient.java | 2 +- .../container/keyvalue/KeyValueHandler.java | 104 ++++++++---------- .../keyvalue/impl/BlockManagerImpl.java | 2 - .../ContainerMerkleTreeTestUtils.java | 32 ------ .../container/common/ContainerTestUtils.java | 31 ++++++ .../TestReconcileContainerCommandHandler.java | 8 +- .../keyvalue/TestKeyValueHandler.java | 4 +- .../TestContainerCommandReconciliation.java | 3 +- 11 files changed, 89 insertions(+), 113 deletions(-) diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java index ca7d15342e78..efad9ff76cec 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java @@ -727,7 +727,7 @@ String getChunkName() { return chunkInfo.getChunkName(); } - public long getLength() { + protected long getLength() { return length; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java index 33f545b40ed3..20ddf555bcd6 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/ReplicationConfig.java @@ -113,11 +113,6 @@ static HddsProtos.ReplicationFactor getLegacyFactor( .getReplicationFactor(); } - if (replicationConfig instanceof StandaloneReplicationConfig) { - return ((StandaloneReplicationConfig) replicationConfig) - .getReplicationFactor(); - } - throw new UnsupportedOperationException( "Replication configuration of type " + replicationConfig.getReplicationType() diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index c4b60f10cef8..27642474eb25 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -162,11 +162,12 @@ public ContainerDiffReport diff(ContainerProtos.ContainerChecksumInfo thisChecks ContainerDiffReport report = new ContainerDiffReport(); try { captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> { - Preconditions.assertNotNull(thisChecksumInfo, "Our checksum info is null"); - Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null"); + Preconditions.assertNotNull(thisChecksumInfo, "Datanode's checksum info is null."); + Preconditions.assertNotNull(peerChecksumInfo, "Peer checksum info is null."); if (thisChecksumInfo.getContainerID() != peerChecksumInfo.getContainerID()) { - throw new StorageContainerException("Container Id does not match for container " - + thisChecksumInfo.getContainerID(), ContainerProtos.Result.CONTAINER_ID_MISMATCH); + throw new StorageContainerException("Container ID does not match. Local container ID " + + thisChecksumInfo.getContainerID() + " , Peer container ID " + peerChecksumInfo.getContainerID(), + ContainerProtos.Result.CONTAINER_ID_MISMATCH); } compareContainerMerkleTree(thisChecksumInfo, peerChecksumInfo, report); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java index 2310e7d61fe7..d5ba243dd12a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/DNContainerOperationClient.java @@ -65,7 +65,7 @@ public DNContainerOperationClient(ConfigurationSource conf, } @Nonnull - public static XceiverClientManager createClientManager( + private static XceiverClientManager createClientManager( ConfigurationSource conf, CertificateClient certificateClient) throws IOException { ClientTrustManager trustManager = null; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a324134ccb7e..a6b2104bc292 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -25,7 +25,6 @@ import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CHUNK_FILE_INCONSISTENCY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_ALREADY_EXISTS; -import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_CHECKSUM_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_INTERNAL_ERROR; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY; import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.DELETE_ON_NON_EMPTY_CONTAINER; @@ -73,7 +72,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.time.Clock; -import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; @@ -133,7 +131,6 @@ import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.helpers.TokenHelper; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -182,6 +179,7 @@ public class KeyValueHandler extends Handler { private final long maxDeleteLockWaitMs; private final Function byteBufferToByteString; private final boolean validateChunkChecksumData; + private final int chunkSize; // A striped lock that is held during container creation. private final Striped containerCreationLocks; private final ContainerChecksumTreeManager checksumManager; @@ -250,6 +248,7 @@ public KeyValueHandler(ConfigurationSource config, .createByteBufferConversion(isUnsafeByteBufferConversionEnabled); blockInputStreamFactory = new BlockInputStreamFactoryImpl(); + chunkSize = (int) conf.getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, StorageUnit.BYTES); if (ContainerLayoutVersion.getConfiguredVersion(conf) == ContainerLayoutVersion.FILE_PER_CHUNK) { @@ -627,19 +626,24 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } - public Optional createContainerMerkleTree(Container container) { + + /** + * Create a Merkle tree for the container if it does not exist. + * TODO: This method should be changed to private after HDDS-10374 is merged. + */ + @VisibleForTesting + public void createContainerMerkleTree(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { - return Optional.empty(); + return; } try { KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - return Optional.of(updateContainerChecksum(containerData)); + updateAndGetContainerChecksum(containerData); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); } - return Optional.empty(); } /** @@ -1505,25 +1509,18 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); Optional optionalChecksumInfo = checksumManager.read(containerData); long oldDataChecksum = 0; - long dataChecksum = 0; ContainerProtos.ContainerChecksumInfo checksumInfo; if (optionalChecksumInfo.isPresent()) { checksumInfo = optionalChecksumInfo.get(); - oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - optionalChecksumInfo = createContainerMerkleTree(container); - if (!optionalChecksumInfo.isPresent()) { - throw new StorageContainerException("Failed to reconcile container " + containerData.getContainerID() - + " as checksum info is not available", CONTAINER_CHECKSUM_ERROR); - } - checksumInfo = optionalChecksumInfo.get(); - oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); + checksumInfo = updateAndGetContainerChecksum(containerData); } + oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); for (DatanodeDetails peer : peers) { - Instant start = Instant.now(); + long start = Instant.now().toEpochMilli(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( containerData.getContainerID(), peer); if (peerChecksumInfo == null) { @@ -1533,13 +1530,13 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container } ContainerDiffReport diffReport = checksumManager.diff(checksumInfo, peerChecksumInfo); - TokenHelper tokenHelper = dnClient.getTokenHelper(); Pipeline pipeline = createSingleNodePipeline(peer); + ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); // Handle missing blocks for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { try { - handleMissingBlock(kvContainer, tokenHelper, pipeline, dnClient, missingBlock); + handleMissingBlock(kvContainer, pipeline, dnClient, missingBlock, chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), containerData.getContainerID(), e); @@ -1549,30 +1546,28 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, tokenHelper, pipeline, dnClient, entry.getKey(), - entry.getValue()); + reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); + containerData.getContainerID(), e); } } // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { try { - reconcileChunksPerBlock(kvContainer, tokenHelper, pipeline, dnClient, entry.getKey(), - entry.getValue()); + reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); + containerData.getContainerID(), e); } } - // Update checksum based on RocksDB metadata, The read chunk validates the checksum of the data + // Update checksum based on RocksDB metadata. The read chunk validates the checksum of the data // we read. So we can update the checksum only based on the RocksDB metadata. - ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateContainerChecksum(containerData); - dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); + ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateAndGetContainerChecksum(containerData); + long dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); - long duration = Duration.between(start, Instant.now()).toMillis(); + long duration = Instant.now().toEpochMilli() - start; if (dataChecksum == oldDataChecksum) { metrics.incContainerReconciledWithoutChanges(); LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", @@ -1595,7 +1590,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container * Updates the container merkle tree based on the RocksDb's block metadata and returns the updated checksum info. * @param containerData - Container data for which the container merkle tree needs to be updated. */ - private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueContainerData containerData) + private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(KeyValueContainerData containerData) throws IOException { ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); @@ -1604,37 +1599,36 @@ private ContainerProtos.ContainerChecksumInfo updateContainerChecksum(KeyValueCo while (blockIterator.hasNext()) { BlockData blockData = blockIterator.nextBlock(); List chunkInfos = blockData.getChunks(); + // TODO: Add empty blocks to the merkle tree. Done in HDDS-10374, needs to be backported. merkleTree.addChunks(blockData.getLocalID(), chunkInfos); } } ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager .writeContainerDataTree(containerData, merkleTree); + // TODO: Remove this as this is being set in writeContainerDataTree by HDDS-12745 containerData.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); return checksumInfo; } /** * Handle missing block. It reads the missing block data from the peer datanode and writes it to the local container. - * If the block write fails, the block commit sequence id is not updated. + * If the block write fails, the block commit sequence id of the container and the block are not updated. */ - private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHelper, - Pipeline pipeline, DNContainerOperationClient dnClient, - ContainerProtos.BlockMerkleTree missingBlock) + private void handleMissingBlock(KeyValueContainer container, Pipeline pipeline, DNContainerOperationClient dnClient, + ContainerProtos.BlockMerkleTree missingBlock, ByteBuffer chunkByteBuffer) throws IOException { ContainerData containerData = container.getContainerData(); BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. - Token blockToken = tokenHelper.getBlockToken(blockID, 0L); + Token blockToken = dnClient.getTokenHelper().getBlockToken(blockID, 0L); if (getBlockManager().blockExists(container, blockID)) { - LOG.warn("Block {} already exists in container {}. This block {} is expected to not exist. The container " + - "merkle tree for container {} is stale. Skipping reconciliation for block.", blockID, - containerData.getContainerID(), blockID, containerData.getContainerID()); + LOG.warn("Block {} already exists in container {}. The block should not exist and our container merkle tree" + + " is stale. Skipping reconciliation for this block.", blockID, containerData.getContainerID()); return; } List successfulChunksList = new ArrayList<>(); - // Update BcsId only if all chunks are successfully written. boolean overwriteBcsId = true; BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() @@ -1648,18 +1642,13 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), null, conf.getObject(OzoneClientConfig.class))) { - // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk + // Initialize the BlockInputStream. Gets the blockData from the peer, sets the block length and + // initializes ChunkInputStream for each chunk. blockInputStream.initialize(); - - // BlockInputStream#initialize() should be called before this, as it gets the BlockData for the block. - // and sets the block length. ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); List peerChunksList = peerBlockData.getChunksList(); - int chunkSize = (int) conf.getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, - StorageUnit.BYTES); - ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); // Don't update bcsId if chunk read fails for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { @@ -1686,6 +1675,10 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + // If the chunk read/write fails, we are expected to have holes in the blockData's chunk list. + // But that is okay, if the read fails it means there might be a hole in the peer datanode as well. + // If the chunk write fails then we don't want to add the metadata without the actual data as there is + // no data to verify the chunk checksum. successfulChunksList.add(chunkInfoProto); } catch (IOException ex) { overwriteBcsId = false; @@ -1705,15 +1698,16 @@ private void handleMissingBlock(KeyValueContainer container, TokenHelper tokenHe * datanode and writes it to the local container. If the chunk write fails, the block commit sequence * id is not updated. */ - private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper tokenHelper, Pipeline pipeline, + private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipeline, DNContainerOperationClient dnClient, long blockId, - List chunkList) throws IOException { + List chunkList, ByteBuffer chunkByteBuffer) + throws IOException { ContainerData containerData = container.getContainerData(); BlockID blockID = new BlockID(containerData.getContainerID(), blockId); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. - Token blockToken = tokenHelper.getBlockToken(blockID, 0L); + Token blockToken = dnClient.getTokenHelper().getBlockToken(blockID, 0L); BlockData localBlockData = getBlockManager().getBlock(container, blockID); SortedMap localChunksMap = localBlockData.getChunks().stream() @@ -1732,20 +1726,14 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), null, conf.getObject(OzoneClientConfig.class))) { - // Initialize the BlockInputStream. Initializes the blockData and ChunkInputStream for each chunk + // Initialize the BlockInputStream. Gets the blockData from the peer, sets the block length and + // initializes ChunkInputStream for each chunk. blockInputStream.initialize(); - - // BlockInputStream#initialize() should be called before this, as it gets the BlockData for the block - // and sets the block length. ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); // Check the local bcsId with the one from the bcsId from the peer datanode. long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), localBlockData.getBlockCommitSequenceId()); - int chunkSize = (int) conf.getStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, OZONE_SCM_CHUNK_SIZE_DEFAULT, - StorageUnit.BYTES); - ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); - for (ContainerProtos.ChunkMerkleTree chunkMerkleTree : chunkList) { long chunkOffset = chunkMerkleTree.getOffset(); try { @@ -1756,8 +1744,6 @@ private void reconcileChunksPerBlock(KeyValueContainer container, TokenHelper to ContainerProtos.ChunkInfo chunkInfoProto = currentChunkStream.getChunkInfo(); ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - - // Verify the chunk offset and length. verifyChunksLength(chunkInfoProto, localChunksMap.get(chunkOffset)); // Read the chunk data from the BlockInputStream and write it to the container. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java index b1c5aeeb45ce..20ab37cf1c99 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/impl/BlockManagerImpl.java @@ -112,8 +112,6 @@ public long putBlockForClosedContainer(Container container, BlockData data, bool // We are not locking the key manager since RocksDB serializes all actions // against a single DB. We rely on DB level locking to avoid conflicts. try (DBHandle db = BlockUtils.getDB(containerData, config)) { - // This is a post condition that acts as a hint to the user. - // Should never fail. Preconditions.checkNotNull(db, DB_NULL_ERR_MSG); long blockBcsID = data.getBlockCommitSequenceId(); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index b33c04c76be6..22559aa37813 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import com.google.common.collect.Lists; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -39,23 +38,15 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.ozone.HddsDatanodeService; -import org.apache.hadoop.ozone.container.ContainerTestHelper; -import org.apache.hadoop.ozone.container.common.helpers.BlockData; -import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; -import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -363,27 +354,4 @@ public static void writeContainerDataTreeProto(ContainerData data, ContainerProt + data.getContainerID(), ex); } } - - /** - * Creates block metadata for the given container with the specified number of blocks and chunks per block. - */ - public static void createBlockMetaData(KeyValueContainerData data, int numOfBlocksPerContainer, - int numOfChunksPerBlock) throws IOException { - try (DBHandle metadata = BlockUtils.getDB(data, new OzoneConfiguration())) { - for (int j = 0; j < numOfBlocksPerContainer; j++) { - BlockID blockID = new BlockID(data.getContainerID(), j); - String blockKey = data.getBlockKey(blockID.getLocalID()); - BlockData kd = new BlockData(blockID); - List chunks = Lists.newArrayList(); - for (int k = 0; k < numOfChunksPerBlock; k++) { - long dalaLen = 10L; - ChunkInfo chunkInfo = ContainerTestHelper.getChunk(blockID.getLocalID(), k, k * dalaLen, dalaLen); - ContainerTestHelper.setDataChecksum(chunkInfo, ContainerTestHelper.getData((int) dalaLen)); - chunks.add(chunkInfo.getProtoBufMessage()); - } - kd.setChunks(chunks); - metadata.getStore().getBlockDataTable().put(blockKey, kd); - } - } - } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java index aa771cb6c365..ab8929c18c8a 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/ContainerTestUtils.java @@ -22,21 +22,25 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.conf.ReconfigurationHandler; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto; @@ -53,6 +57,8 @@ import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; @@ -60,6 +66,7 @@ import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.interfaces.VolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -76,6 +83,7 @@ import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError; @@ -419,4 +427,27 @@ public static XceiverServerRatis newXceiverServerRatis( getNoopContainerDispatcher(), getEmptyContainerController(), null, null); } + + /** + * Creates block metadata for the given container with the specified number of blocks and chunks per block. + */ + public static void createBlockMetaData(KeyValueContainerData data, int numOfBlocksPerContainer, + int numOfChunksPerBlock) throws IOException { + try (DBHandle metadata = BlockUtils.getDB(data, new OzoneConfiguration())) { + for (int j = 0; j < numOfBlocksPerContainer; j++) { + BlockID blockID = new BlockID(data.getContainerID(), j); + String blockKey = data.getBlockKey(blockID.getLocalID()); + BlockData kd = new BlockData(blockID); + List chunks = Lists.newArrayList(); + for (int k = 0; k < numOfChunksPerBlock; k++) { + long dataLen = 10L; + ChunkInfo chunkInfo = ContainerTestHelper.getChunk(blockID.getLocalID(), k, k * dataLen, dataLen); + ContainerTestHelper.setDataChecksum(chunkInfo, ContainerTestHelper.getData((int) dataLen)); + chunks.add(chunkInfo.getProtoBufMessage()); + } + kd.setChunks(chunks); + metadata.getStore().getBlockDataTable().put(blockKey, kd); + } + } + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java index 0582848206d6..d7d01f9349af 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestReconcileContainerCommandHandler.java @@ -20,7 +20,7 @@ import static java.util.Collections.singletonMap; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; import static org.apache.hadoop.ozone.OzoneConsts.GB; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.createBlockMetaData; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createBlockMetaData; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -102,6 +102,7 @@ public void init(ContainerLayoutVersion layout, IncrementalReportSender blockMerkleTreeList = builder.getContainerMerkleTree() diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 593ae8cb61bf..f806d33c6322 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -373,7 +373,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { db.getStore().flushDB(); } - // TODO: Use On-demand container scanner to build the new container merkle tree. + // TODO: Use On-demand container scanner to build the new container merkle tree. (HDDS-10374) Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); kvHandler.createContainerMerkleTree(container); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = @@ -476,6 +476,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { // 3. Set Unhealthy for first chunk of all blocks. This should be done by the scanner, Until then this is a // manual step. + // // TODO: Use On-demand container scanner to build the new container merkle tree (HDDS-10374) Random random = new Random(); ContainerProtos.ContainerChecksumInfo.Builder builder = containerChecksumAfterChunkCorruption.toBuilder(); List blockMerkleTreeList = builder.getContainerMerkleTree() From f062bed1c8a004084d014eb17fecbd29c92ff6a3 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Sat, 5 Apr 2025 01:31:24 +0530 Subject: [PATCH 18/21] Address review comments. --- .../hadoop/ozone/container/keyvalue/KeyValueHandler.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a6b2104bc292..19c625612413 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1508,7 +1508,6 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); Optional optionalChecksumInfo = checksumManager.read(containerData); - long oldDataChecksum = 0; ContainerProtos.ContainerChecksumInfo checksumInfo; if (optionalChecksumInfo.isPresent()) { @@ -1517,7 +1516,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Try creating the checksum info from RocksDB metadata if it is not present. checksumInfo = updateAndGetContainerChecksum(containerData); } - oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); + long oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); for (DatanodeDetails peer : peers) { long start = Instant.now().toEpochMilli(); @@ -1763,6 +1762,9 @@ private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipel chunkByteBuffer.flip(); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkByteBuffer); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); + // In reconciling missing chunks which happens at the end of the block, we are expected to have holes in + // the blockData's chunk list because we continue to reconcile even if there are failures while reconciling + // chunks which is fine as we don't update the bcsId. localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); } catch (IOException ex) { overwriteBcsId = false; From a5796a1dc7045e2a2882a73cfa6f40d79d6248de Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Wed, 9 Apr 2025 00:15:05 +0530 Subject: [PATCH 19/21] Update tests and address review comments. --- .../container/keyvalue/KeyValueHandler.java | 2 + .../keyvalue/TestKeyValueHandler.java | 135 +++++++++--------- 2 files changed, 73 insertions(+), 64 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 19c625612413..5a58ad04f967 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1689,6 +1689,7 @@ private void handleMissingBlock(KeyValueContainer container, Pipeline pipeline, BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); putBlockData.setChunks(successfulChunksList); putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); + chunkManager.finishWriteChunks(container, putBlockData); } } @@ -1776,6 +1777,7 @@ private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipel List localChunkList = new ArrayList<>(localChunksMap.values()); localBlockData.setChunks(localChunkList); putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); + chunkManager.finishWriteChunks(container, localBlockData); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index ceb45589f2fc..c3f75902defa 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -29,7 +29,6 @@ import static org.apache.hadoop.ozone.OzoneConsts.GB; import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; -import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.COMMIT_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createBlockMetaData; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; @@ -41,12 +40,11 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; -import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.times; @@ -54,7 +52,6 @@ import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; @@ -70,6 +67,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Random; import java.util.Set; @@ -82,7 +80,6 @@ import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -90,17 +87,14 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; -import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; -import org.apache.hadoop.hdds.security.SecurityConfig; import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChecksumData; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; @@ -109,7 +103,6 @@ import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; -import org.apache.hadoop.ozone.container.common.helpers.TokenHelper; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; @@ -128,6 +121,7 @@ import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.util.Sets; import org.apache.ozone.test.GenericTestUtils; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -150,13 +144,10 @@ public class TestKeyValueHandler { private Path tempDir; @TempDir private Path dbFile; - @TempDir - private Path testRoot; private static final long DUMMY_CONTAINER_ID = 9999; private static final String DUMMY_PATH = "dummy/dir/doesnt/exist"; - private static final int UNIT_LEN = 1024; - private static final int CHUNK_LEN = 3 * UNIT_LEN; + private static final int CHUNK_LEN = 3 * (int) OzoneConsts.KB; private static final int CHUNKS_PER_BLOCK = 4; private static final String DATANODE_UUID = UUID.randomUUID().toString(); private static final String CLUSTER_ID = UUID.randomUUID().toString(); @@ -165,6 +156,9 @@ public class TestKeyValueHandler { private KeyValueHandler handler; private OzoneConfiguration conf; + /** + * Number of corrupt blocks and chunks. + */ public static Stream corruptionValues() { return Stream.of( Arguments.of(5, 0), @@ -186,8 +180,8 @@ public static Stream corruptionValues() { public void setup() throws IOException { // Create mock HddsDispatcher and KeyValueHandler. conf = new OzoneConfiguration(); - conf.set(HDDS_DATANODE_DIR_KEY, testRoot.toString()); - conf.set(OZONE_METADATA_DIRS, testRoot.toString()); + conf.set(HDDS_DATANODE_DIR_KEY, tempDir.toString()); + conf.set(OZONE_METADATA_DIRS, tempDir.toString()); handler = mock(KeyValueHandler.class); HashMap handlers = new HashMap<>(); @@ -546,7 +540,7 @@ public void testDeleteContainer() throws IOException { } @ContainerLayoutTestInfo.ContainerTest - public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws Exception { + public void testContainerChecksumInvocation(ContainerLayoutVersion layoutVersion) throws Exception { conf = new OzoneConfiguration(); KeyValueContainerData data = new KeyValueContainerData(123L, layoutVersion, GB, @@ -585,22 +579,20 @@ public void testReconcileContainer(ContainerLayoutVersion layoutVersion) throws when(mockDnClient.getContainerChecksumInfo(anyLong(), any())).thenReturn(null); keyValueHandler.reconcileContainer(mockDnClient, container, Sets.newHashSet(peer1, peer2, peer3)); // Make sure all the replicas are used for reconciliation. - Mockito.verify(mockDnClient, times(3)).getContainerChecksumInfo(anyLong(), any()); + Mockito.verify(mockDnClient, atMostOnce()).getContainerChecksumInfo(anyLong(), eq(peer1)); + Mockito.verify(mockDnClient, atMostOnce()).getContainerChecksumInfo(anyLong(), eq(peer2)); + Mockito.verify(mockDnClient, atMostOnce()).getContainerChecksumInfo(anyLong(), eq(peer3)); Assertions.assertEquals(1, icrCount.get()); } @ParameterizedTest @MethodSource("corruptionValues") public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { - KeyValueHandler kvHandler = createKeyValueHandler(testRoot); + KeyValueHandler kvHandler = createKeyValueHandler(tempDir); ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); - DNContainerOperationClient dnClient = mock(DNContainerOperationClient.class); - XceiverClientManager xceiverClientManager = mock(XceiverClientManager.class); - TokenHelper tokenHelper = new TokenHelper(new SecurityConfig(conf), null); - when(dnClient.getTokenHelper()).thenReturn(tokenHelper); - when(dnClient.getXceiverClientManager()).thenReturn(xceiverClientManager); + DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, null, null); final long containerID = 100L; - // Create 3 containers with 10 blocks each and 3 replicas. + // Create 3 containers with 15 blocks each and 3 replicas. List containers = createContainerWithBlocks(kvHandler, containerID, 15, 3); assertEquals(3, containers.size()); @@ -620,44 +612,50 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws // There should be more than 1 checksum because of the corruption. assertTrue(checksumsBeforeReconciliation.size() > 1); - List datanodes = Lists.newArrayList(randomDatanodeDetails(), randomDatanodeDetails(), + List datanodes = ImmutableList.of(randomDatanodeDetails(), randomDatanodeDetails(), randomDatanodeDetails()); + Map dnToContainerMap = new HashMap<>(); + dnToContainerMap.put(datanodes.get(0).getUuidString(), containers.get(0)); + dnToContainerMap.put(datanodes.get(1).getUuidString(), containers.get(1)); + dnToContainerMap.put(datanodes.get(2).getUuidString(), containers.get(2)); // Setup mock for each datanode network calls needed for reconciliation. - try (MockedStatic containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class); - MockedStatic dnClientMock = Mockito.mockStatic(DNContainerOperationClient.class)) { - - for (int i = 0; i < datanodes.size(); i++) { - DatanodeDetails datanode = datanodes.get(i); - KeyValueContainer container = containers.get(i); - - Pipeline pipeline = Pipeline.newBuilder() - .setNodes(ImmutableList.of(datanode)) - .setId(PipelineID.valueOf(datanode.getUuid())) - .setState(Pipeline.PipelineState.CLOSED) - .setReplicationConfig(StandaloneReplicationConfig.getInstance( - HddsProtos.ReplicationFactor.ONE)).build(); - XceiverClientSpi client = mock(XceiverClientSpi.class); - - dnClientMock.when(() -> DNContainerOperationClient.createSingleNodePipeline(datanode)).thenReturn(pipeline); - when(xceiverClientManager.acquireClientForReadData(pipeline)).thenReturn(client); - doNothing().when(xceiverClientManager).releaseClientForReadData(eq(client), anyBoolean()); - when(client.getPipeline()).thenReturn(pipeline); - - // Mock checksum info - when(dnClient.getContainerChecksumInfo(containerID, datanode)) - .thenReturn(checksumManager.read(container.getContainerData()).get()); - - // Mock getBlock - containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(eq(client), any(), any(), any(), anyMap())) - .thenAnswer(inv -> ContainerProtos.GetBlockResponseProto.newBuilder() - .setBlockData(kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)).getProtoBufMessage()) - .build()); - - // Mock readChunk - containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(eq(client), any(), any(), any(), any())) - .thenAnswer(inv -> createReadChunkResponse(inv, container, kvHandler)); - } + try (MockedStatic containerProtocolMock = + Mockito.mockStatic(ContainerProtocolCalls.class)) { + // Mock getContainerChecksumInfo + containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + ByteString checksumInfo = checksumManager.getContainerChecksumInfo(container.getContainerData()); + return ContainerProtos.GetContainerChecksumInfoResponseProto.newBuilder() + .setContainerID(containerID) + .setContainerChecksumInfo(checksumInfo) + .build(); + }); + + // Mock getBlock + containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(any(), any(), any(), any(), anyMap())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + ContainerProtos.BlockData blockData = kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)) + .getProtoBufMessage(); + return ContainerProtos.GetBlockResponseProto.newBuilder() + .setBlockData(blockData) + .build(); + }); + + // Mock readChunk + containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(any(), any(), any(), any(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + return createReadChunkResponse(inv, container, kvHandler); + }); kvHandler.reconcileContainer(dnClient, containers.get(0), Sets.newHashSet(datanodes)); kvHandler.reconcileContainer(dnClient, containers.get(1), Sets.newHashSet(datanodes)); @@ -810,6 +808,10 @@ private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final KeyValueHandler kvHandler = ContainerTestUtils.getKeyValueHandler(conf, DATANODE_UUID, containerSet, volumeSet); kvHandler.setClusterID(CLUSTER_ID); + // Clean up metrics for next tests. + hddsVolume.getVolumeInfoStats().unregister(); + hddsVolume.getVolumeIOStats().unregister(); + ContainerMetrics.remove(); return kvHandler; } @@ -827,7 +829,7 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa MutableVolumeSet volumeSet = new MutableVolumeSet(DATANODE_UUID, conf, null, StorageVolume.VolumeType.DATA_VOLUME, null); createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); - int bytesPerChecksum = 2 * UNIT_LEN; + int bytesPerChecksum = 2 * (int) OzoneConsts.KB; Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, bytesPerChecksum); byte[] chunkData = RandomStringUtils.randomAscii(CHUNK_LEN).getBytes(UTF_8); @@ -837,7 +839,7 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa KeyValueContainerData containerData = new KeyValueContainerData(containerId, ContainerLayoutVersion.FILE_PER_BLOCK, (long) CHUNKS_PER_BLOCK * CHUNK_LEN * blocks, UUID.randomUUID().toString(), UUID.randomUUID().toString()); - Path kvContainerPath = Files.createDirectory(testRoot.resolve(containerId + "-" + j)); + Path kvContainerPath = Files.createDirectory(tempDir.resolve(containerId + "-" + j)); containerData.setMetadataPath(kvContainerPath.toString()); containerData.setDbFile(kvContainerPath.toFile()); @@ -863,9 +865,8 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa chunkList.add(info.getProtoBufMessage()); kvHandler.getChunkManager().writeChunk(container, blockID, info, ByteBuffer.wrap(chunkData), WRITE_STAGE); - kvHandler.getChunkManager().writeChunk(container, blockID, info, - ByteBuffer.wrap(chunkData), COMMIT_STAGE); } + kvHandler.getChunkManager().finishWriteChunks(container, blockData); blockData.setChunks(chunkList); blockData.setBlockCommitSequenceId(i); kvHandler.getBlockManager().putBlock(container, blockData); @@ -880,6 +881,12 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa return containers; } + /** + * Introduce corruption in the container. + * 1. Delete blocks from the container. + * 2. Corrupt chunks at an offset. + * If revers is true, the blocks and chunks are deleted in reverse order. + */ private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer keyValueContainer, int numBlocks, int numChunks, boolean reverse) throws IOException { Random random = new Random(); From d578f3ca521b9dc95fe4eda747e638a0e4f816f2 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 10 Apr 2025 13:04:35 +0530 Subject: [PATCH 20/21] Address review comments. --- .../ContainerChecksumTreeManager.java | 2 + .../container/keyvalue/KeyValueHandler.java | 2 - .../ContainerMerkleTreeTestUtils.java | 1 + .../keyvalue/TestKeyValueHandler.java | 94 +++++++++++-------- .../TestContainerCommandReconciliation.java | 34 ++++--- 5 files changed, 80 insertions(+), 53 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 27642474eb25..99b5800c450c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -364,6 +364,8 @@ private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo che throw new IOException("Error occurred when writing container merkle tree for containerID " + data.getContainerID(), ex); } + // Set in-memory data checksum. + data.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 5a58ad04f967..43926ca5e282 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1604,8 +1604,6 @@ private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(KeyV } ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager .writeContainerDataTree(containerData, merkleTree); - // TODO: Remove this as this is being set in writeContainerDataTree by HDDS-12745 - containerData.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); return checksumInfo; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 22559aa37813..811e4b483a25 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -353,5 +353,6 @@ public static void writeContainerDataTreeProto(ContainerData data, ContainerProt throw new IOException("Error occurred when writing container merkle tree for containerID " + data.getContainerID(), ex); } + data.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index c3f75902defa..33f4faefb6b8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -90,6 +90,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.security.token.TokenVerifier; @@ -603,11 +604,12 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws // Without reconciliation, checksums should be different because of the corruption. Set checksumsBeforeReconciliation = new HashSet<>(); for (KeyValueContainer kvContainer : containers) { - kvHandler.createContainerMerkleTree(kvContainer); Optional containerChecksumInfo = checksumManager.read(kvContainer.getContainerData()); assertTrue(containerChecksumInfo.isPresent()); - checksumsBeforeReconciliation.add(containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum()); + long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); + checksumsBeforeReconciliation.add(dataChecksum); } // There should be more than 1 checksum because of the corruption. assertTrue(checksumsBeforeReconciliation.size() > 1); @@ -622,40 +624,7 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws // Setup mock for each datanode network calls needed for reconciliation. try (MockedStatic containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class)) { - // Mock getContainerChecksumInfo - containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - ByteString checksumInfo = checksumManager.getContainerChecksumInfo(container.getContainerData()); - return ContainerProtos.GetContainerChecksumInfoResponseProto.newBuilder() - .setContainerID(containerID) - .setContainerChecksumInfo(checksumInfo) - .build(); - }); - - // Mock getBlock - containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(any(), any(), any(), any(), anyMap())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - ContainerProtos.BlockData blockData = kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)) - .getProtoBufMessage(); - return ContainerProtos.GetBlockResponseProto.newBuilder() - .setBlockData(blockData) - .build(); - }); - - // Mock readChunk - containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(any(), any(), any(), any(), any())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - DatanodeDetails dn = xceiverClientSpi.getPipeline().getClosestNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - return createReadChunkResponse(inv, container, kvHandler); - }); + mockContainerProtocolCalls(containerProtocolMock, dnToContainerMap, checksumManager, kvHandler, containerID); kvHandler.reconcileContainer(dnClient, containers.get(0), Sets.newHashSet(datanodes)); kvHandler.reconcileContainer(dnClient, containers.get(1), Sets.newHashSet(datanodes)); @@ -668,14 +637,61 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Optional containerChecksumInfo = checksumManager.read(kvContainer.getContainerData()); assertTrue(containerChecksumInfo.isPresent()); + long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); if (prevContainerChecksumInfo != null) { - assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), - containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum()); + assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), dataChecksum); } prevContainerChecksumInfo = containerChecksumInfo.get(); } } } + private void mockContainerProtocolCalls(MockedStatic containerProtocolMock, + Map dnToContainerMap, + ContainerChecksumTreeManager checksumManager, + KeyValueHandler kvHandler, + long containerID) { + // Mock getContainerChecksumInfo + containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + ByteString checksumInfo = checksumManager.getContainerChecksumInfo(container.getContainerData()); + return ContainerProtos.GetContainerChecksumInfoResponseProto.newBuilder() + .setContainerID(containerID) + .setContainerChecksumInfo(checksumInfo) + .build(); + }); + + // Mock getBlock + containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(any(), any(), any(), any(), anyMap())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + ContainerProtos.BlockData blockData = kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)) + .getProtoBufMessage(); + return ContainerProtos.GetBlockResponseProto.newBuilder() + .setBlockData(blockData) + .build(); + }); + + // Mock readChunk + containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(any(), any(), any(), any(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); + return createReadChunkResponse(inv, container, kvHandler); + }); + } // Helper method to create readChunk responses private ContainerProtos.ReadChunkResponseProto createReadChunkResponse(InvocationOnMock inv, @@ -873,7 +889,7 @@ protected List createContainerWithBlocks(KeyValueHandler kvHa } ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); - container.close(); + container.markContainerForClose(); kvHandler.closeContainer(container); containers.add(container); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index f806d33c6322..4af04ac71cb0 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; @@ -108,7 +109,6 @@ import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; -import org.apache.ratis.util.ExitUtils; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -148,8 +148,6 @@ public static void init() throws Exception { // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); - ExitUtils.disableSystemExit(); - startMiniKdc(); setSecureConfig(); createCredentialsInKDC(); @@ -334,7 +332,7 @@ public void testGetChecksumInfoSuccess() throws Exception { @Test public void testContainerChecksumWithBlockMissing() throws Exception { // 1. Write data to a container. - // Read the key back check it's hash. + // Read the key back and check its hash. String volume = UUID.randomUUID().toString(); String bucket = UUID.randomUUID().toString(); Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); @@ -383,11 +381,17 @@ public void testContainerChecksumWithBlockMissing() throws Exception { Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); // 3. Reconcile the container. - cluster.getStorageContainerLocationClient().reconcileContainer(containerID); + StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); + long lastHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() + .getLastHeartbeat(dataNodeDetails.get(0)); + scmClient.reconcileContainer(containerID); GenericTestUtils.waitFor(() -> { try { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum; + long newHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() + .getLastHeartbeat(dataNodeDetails.get(0)); + return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum && + newHeartbeat > lastHeartbeat; } catch (Exception ex) { return false; } @@ -395,8 +399,8 @@ public void testContainerChecksumWithBlockMissing() throws Exception { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); - List containerReplicas = cluster.getStorageContainerManager() - .getClientProtocolServer().getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); + List containerReplicas = scmClient.getContainerReplicas(containerID, + ClientVersion.CURRENT_VERSION); // Compare and check if dataChecksum is same on all replicas. Set dataChecksums = containerReplicas.stream() .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) @@ -495,11 +499,17 @@ public void testContainerChecksumChunkCorruption() throws Exception { writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); // 4. Reconcile the container. - cluster.getStorageContainerLocationClient().reconcileContainer(containerID); + StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); + long lastHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() + .getLastHeartbeat(dataNodeDetails.get(0)); + scmClient.reconcileContainer(containerID); GenericTestUtils.waitFor(() -> { try { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum; + long newHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() + .getLastHeartbeat(dataNodeDetails.get(0)); + return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum && + newHeartbeat > lastHeartbeat; } catch (Exception ex) { return false; } @@ -508,8 +518,8 @@ public void testContainerChecksumChunkCorruption() throws Exception { assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); Assertions.assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); - List containerReplicas = cluster.getStorageContainerManager() - .getClientProtocolServer().getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); + List containerReplicas = scmClient.getContainerReplicas(containerID, + ClientVersion.CURRENT_VERSION); // Compare and check if dataChecksum is same on all replicas. Set dataChecksums = containerReplicas.stream() .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) From 130d57aa7cbfd862c1dddb0888b905b1102e7dad Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 11 Apr 2025 01:04:17 +0530 Subject: [PATCH 21/21] Address review comments. --- .../TestContainerCommandReconciliation.java | 76 ++++++++----------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 4af04ac71cb0..f51dbfed43af 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -76,7 +76,6 @@ import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; @@ -104,6 +103,7 @@ import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.apache.hadoop.ozone.container.ozoneimpl.MetadataScanResult; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.security.UserGroupInformation; import org.apache.ozone.test.GenericTestUtils; @@ -380,32 +380,19 @@ public void testContainerChecksumWithBlockMissing() throws Exception { // Checksum should have changed after block delete. Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); + // Since the container is already closed, we have manually updated the container checksum file. + // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. + // Marking a container unhealthy will send an ICR. + kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); + waitForDataChecksumsAtSCM(containerID, 2); + // 3. Reconcile the container. - StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); - long lastHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() - .getLastHeartbeat(dataNodeDetails.get(0)); - scmClient.reconcileContainer(containerID); - GenericTestUtils.waitFor(() -> { - try { - ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - long newHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() - .getLastHeartbeat(dataNodeDetails.get(0)); - return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum && - newHeartbeat > lastHeartbeat; - } catch (Exception ex) { - return false; - } - }, 500, 20000); + cluster.getStorageContainerLocationClient().reconcileContainer(containerID); + // Compare and check if dataChecksum is same on all replicas. + waitForDataChecksumsAtSCM(containerID, 1); ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); - List containerReplicas = scmClient.getContainerReplicas(containerID, - ClientVersion.CURRENT_VERSION); - // Compare and check if dataChecksum is same on all replicas. - Set dataChecksums = containerReplicas.stream() - .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) - .collect(Collectors.toSet()); - assertEquals(1, dataChecksums.size()); TestHelper.validateData(KEY_NAME, data, store, volume, bucket); } @@ -498,34 +485,35 @@ public void testContainerChecksumChunkCorruption() throws Exception { Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); + // Since the container is already closed, we have manually updated the container checksum file. + // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. + // Marking a container unhealthy will send an ICR. + kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); + waitForDataChecksumsAtSCM(containerID, 2); + // 4. Reconcile the container. - StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); - long lastHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() - .getLastHeartbeat(dataNodeDetails.get(0)); - scmClient.reconcileContainer(containerID); + cluster.getStorageContainerLocationClient().reconcileContainer(containerID); + // Compare and check if dataChecksum is same on all replicas. + waitForDataChecksumsAtSCM(containerID, 1); + ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); + assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), + newContainerChecksumInfo.getContainerMerkleTree()); + Assertions.assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + TestHelper.validateData(KEY_NAME, data, store, volume, bucket); + } + + private void waitForDataChecksumsAtSCM(long containerID, int expectedSize) throws Exception { GenericTestUtils.waitFor(() -> { try { - ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - long newHeartbeat = cluster.getStorageContainerManager().getScmNodeManager() - .getLastHeartbeat(dataNodeDetails.get(0)); - return newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum() == oldDataChecksum && - newHeartbeat > lastHeartbeat; + Set dataChecksums = cluster.getStorageContainerLocationClient().getContainerReplicas(containerID, + ClientVersion.CURRENT_VERSION).stream() + .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) + .collect(Collectors.toSet()); + return dataChecksums.size() == expectedSize; } catch (Exception ex) { return false; } }, 500, 20000); - ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); - assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), - newContainerChecksumInfo.getContainerMerkleTree()); - Assertions.assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); - List containerReplicas = scmClient.getContainerReplicas(containerID, - ClientVersion.CURRENT_VERSION); - // Compare and check if dataChecksum is same on all replicas. - Set dataChecksums = containerReplicas.stream() - .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) - .collect(Collectors.toSet()); - assertEquals(1, dataChecksums.size()); - TestHelper.validateData(KEY_NAME, data, store, volume, bucket); } private Pair getDataAndContainer(boolean close, int dataLen, String volumeName, String bucketName)