From 1a9fdc1cd20b66399d4885bd2876bd43738edd90 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 31 Mar 2025 14:52:44 +0530 Subject: [PATCH 1/9] HDDS-12745. Container checksum should be reported during container close and DN restart --- .../ContainerChecksumTreeManager.java | 17 ++++++++++ .../helpers/KeyValueContainerUtil.java | 13 ++++++++ .../TestContainerCommandReconciliation.java | 33 +++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index c247380fbe3f..7a863a895f79 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -361,6 +361,8 @@ private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo che throw new IOException("Error occurred when writing container merkle tree for containerID " + data.getContainerID(), ex); } + // Set in-memory data checksum. + data.setDataChecksum(checksumInfo.getContainerMerkleTree().getDataChecksum()); } /** @@ -378,6 +380,21 @@ public ByteString getContainerChecksumInfo(KeyValueContainerData data) throws IO } } + public static Optional readChecksumInfo(KeyValueContainerData data) { + File checksumFile = getContainerChecksumFile(data); + if (!checksumFile.exists()) { + LOG.error("Checksum file not found for container {}", data.getContainerID()); + return Optional.empty(); + } + + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)); + } catch (Exception e) { + LOG.error("Error while reading the checksum file for container {}", data.getContainerID()); + return Optional.empty(); + } + } + @VisibleForTesting public ContainerMerkleTreeMetrics getMetrics() { return this.metrics; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java index f0d13c14d305..82bd0f395b4d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -26,12 +26,15 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Optional; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerChecksumInfo; import org.apache.hadoop.hdds.utils.MetadataKeyFilters; import org.apache.hadoop.hdds.utils.db.Table; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; @@ -277,6 +280,15 @@ public static void parseKVContainerData(KeyValueContainerData kvContainerData, } } + private static void populateContainerDataChecksum(KeyValueContainerData kvContainerData) { + Optional optionalContainerChecksumInfo = ContainerChecksumTreeManager + .readChecksumInfo(kvContainerData); + if (optionalContainerChecksumInfo.isPresent()) { + ContainerChecksumInfo containerChecksumInfo = optionalContainerChecksumInfo.get(); + kvContainerData.setDataChecksum(containerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + } + } + private static void populateContainerMetadata( KeyValueContainerData kvContainerData, DatanodeStore store, boolean bCheckChunksFilePath) @@ -356,6 +368,7 @@ private static void populateContainerMetadata( // Load finalizeBlockLocalIds for container in memory. populateContainerFinalizeBlock(kvContainerData, store); + populateContainerDataChecksum(kvContainerData); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 2211aaf2b069..5cb89ce40bbd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -32,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.StandardOpenOption; import java.util.List; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; @@ -40,6 +42,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; @@ -247,6 +250,36 @@ public void testGetChecksumInfoSuccess() throws Exception { } } + @Test + public void testDataChecksumReportedAtSCM() throws Exception { + long containerID = writeDataAndGetContainer(true); + // Check non-zero checksum after container close + Set containerReplicas = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)); + for (ContainerReplica containerReplica: containerReplicas) { + assertNotEquals(0, containerReplica.getDataChecksum()); + } + cluster.getStorageContainerLocationClient().reconcileContainer(containerID); + Thread.sleep(10000); + + // Check non-zero checksum after container reconciliation + containerReplicas = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)); + for (ContainerReplica containerReplica: containerReplicas) { + assertNotEquals(0, containerReplica.getDataChecksum()); + } + + // Check non-zero checksum after datanode restart + cluster.shutdownHddsDatanodes(); + cluster.startHddsDatanodes(); + cluster.waitForClusterToBeReady(); + containerReplicas = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)); + for (ContainerReplica containerReplica: containerReplicas) { + assertNotEquals(0, containerReplica.getDataChecksum()); + } + } + private long writeDataAndGetContainer(boolean close) throws Exception { String volumeName = UUID.randomUUID().toString(); String bucketName = UUID.randomUUID().toString(); From c0820bcdc6d05a6e83609380babbb835c88d597c Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 31 Mar 2025 18:20:28 +0530 Subject: [PATCH 2/9] HDDS-12745. Find findbugs. --- .../container/checksum/ContainerChecksumTreeManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 7a863a895f79..31bf2e8d4b58 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -389,8 +389,8 @@ public static Optional readChecksumInfo(K try (FileInputStream inStream = new FileInputStream(checksumFile)) { return Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)); - } catch (Exception e) { - LOG.error("Error while reading the checksum file for container {}", data.getContainerID()); + } catch (IOException ex) { + LOG.error("Error while reading the checksum file for container {}", data.getContainerID(), ex); return Optional.empty(); } } From b271dc6506352c638344e4b0504db8b501dc6a78 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 31 Mar 2025 22:55:35 +0530 Subject: [PATCH 3/9] HDDS-12745. Fix tests. --- hadoop-ozone/dist/src/main/smoketest/admincli/container.robot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index b17973e1f364..627cda2c976f 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -153,8 +153,8 @@ Reconcile closed container # TODO When the scanner is computing checksums automatically, this test may need to be updated. ${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 - # 0 is the hex value of an empty checksum. - Should Be Equal As Strings 0 ${data_checksum} + # 0 is the hex value of an empty checksum. After container close the data checksum should not be 0. + Should Not Be Equal As Strings 0 ${data_checksum} # When reconciliation finishes, replica checksums should be shown. Execute ozone admin container reconcile ${container} Wait until keyword succeeds 1min 5sec Reconciliation complete ${container} From 994e2d51a5e83c7b605701ae52ff3329cef0e91f Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Thu, 3 Apr 2025 00:55:44 +0530 Subject: [PATCH 4/9] Fix tests. --- .../keyvalue/helpers/KeyValueContainerUtil.java | 4 ++++ .../hadoop/hdds/scm/container/ContainerReplica.java | 1 + .../apache/hadoop/hdds/scm/TestCloseContainer.java | 13 +++++++++++++ .../TestContainerCommandReconciliation.java | 6 +++--- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java index 82bd0f395b4d..c92eb13519d4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -281,6 +281,10 @@ public static void parseKVContainerData(KeyValueContainerData kvContainerData, } private static void populateContainerDataChecksum(KeyValueContainerData kvContainerData) { + if (kvContainerData.isOpen()) { + return; + } + Optional optionalContainerChecksumInfo = ContainerChecksumTreeManager .readChecksumInfo(kvContainerData); if (optionalContainerChecksumInfo.isPresent()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java index 16afcc960850..0e2baeeeb341 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplica.java @@ -188,6 +188,7 @@ public String toString() { ",replicaIndex=" + replicaIndex : "") + ", isEmpty=" + isEmpty + + ", dataChecksum=" + dataChecksum + '}'; } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java index 1c838a85f88e..8c63ec5caf87 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java @@ -158,6 +158,9 @@ public void testReplicasAreReportedForClosedContainerAfterRestart() // Ensure 3 replicas are reported successfully as expected. GenericTestUtils.waitFor(() -> getContainerReplicas(newContainer).size() == 3, 200, 30000); + for (ContainerReplica replica : getContainerReplicas(newContainer)) { + assertNotEquals(0, replica.getDataChecksum()); + } } /** @@ -198,6 +201,10 @@ public void testCloseClosedContainer() assertTrue(containerChecksumFileExists(hddsDatanode, container)); } + for (ContainerReplica replica : getContainerReplicas(container)) { + assertNotEquals(0, replica.getDataChecksum()); + } + assertThrows(IOException.class, () -> cluster.getStorageContainerLocationClient() .closeContainer(container.getContainerID()), @@ -269,6 +276,12 @@ public void testContainerChecksumForClosedContainer() throws Exception { assertNotEquals(prevExpectedChecksumInfo1.getContainerID(), prevExpectedChecksumInfo2.getContainerID()); assertNotEquals(prevExpectedChecksumInfo1.getContainerMerkleTree().getDataChecksum(), prevExpectedChecksumInfo2.getContainerMerkleTree().getDataChecksum()); + for (ContainerReplica replica : getContainerReplicas(containerInfo1)) { + assertNotEquals(0, replica.getDataChecksum()); + } + for (ContainerReplica replica : getContainerReplicas(containerInfo2)) { + assertNotEquals(0, replica.getDataChecksum()); + } } private boolean checkContainerCloseInDatanode(HddsDatanodeService hddsDatanode, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 5cb89ce40bbd..6572524e0e48 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -87,6 +87,7 @@ public static void init() throws Exception { conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); // Disable the container scanner so it does not create merkle tree files that interfere with this test. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); + conf.setBoolean("hdds.container.scrub.enabled", false); cluster = MiniOzoneCluster.newBuilder(conf) .setNumDatanodes(3) .build(); @@ -270,9 +271,8 @@ public void testDataChecksumReportedAtSCM() throws Exception { } // Check non-zero checksum after datanode restart - cluster.shutdownHddsDatanodes(); - cluster.startHddsDatanodes(); - cluster.waitForClusterToBeReady(); + // Restarting all the nodes take more time in mini ozone cluster, so restarting only one node + cluster.restartHddsDatanode(0, true); containerReplicas = cluster.getStorageContainerManager().getContainerManager() .getContainerReplicas(ContainerID.valueOf(containerID)); for (ContainerReplica containerReplica: containerReplicas) { From 97543d07e994f6eb5e09c06593d076c7eac86083 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Sat, 5 Apr 2025 02:00:01 +0530 Subject: [PATCH 5/9] Address review comments. --- .../dn/checksum/TestContainerCommandReconciliation.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 6572524e0e48..6429ccb8405a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -255,8 +255,10 @@ public void testGetChecksumInfoSuccess() throws Exception { public void testDataChecksumReportedAtSCM() throws Exception { long containerID = writeDataAndGetContainer(true); // Check non-zero checksum after container close + // TODO: Introduce corruption in the container and after reconciliation all checksums should match (HDDS-11763) Set containerReplicas = cluster.getStorageContainerManager().getContainerManager() .getContainerReplicas(ContainerID.valueOf(containerID)); + assertEquals(3, containerReplicas.size()); for (ContainerReplica containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } @@ -266,6 +268,7 @@ public void testDataChecksumReportedAtSCM() throws Exception { // Check non-zero checksum after container reconciliation containerReplicas = cluster.getStorageContainerManager().getContainerManager() .getContainerReplicas(ContainerID.valueOf(containerID)); + assertEquals(3, containerReplicas.size()); for (ContainerReplica containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } @@ -273,8 +276,10 @@ public void testDataChecksumReportedAtSCM() throws Exception { // Check non-zero checksum after datanode restart // Restarting all the nodes take more time in mini ozone cluster, so restarting only one node cluster.restartHddsDatanode(0, true); + cluster.restartStorageContainerManager(true); containerReplicas = cluster.getStorageContainerManager().getContainerManager() .getContainerReplicas(ContainerID.valueOf(containerID)); + assertEquals(3, containerReplicas.size()); for (ContainerReplica containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } From 3928be8b7cd991ea9155975fa0f910c5113458e6 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 11 Apr 2025 17:00:00 +0530 Subject: [PATCH 6/9] Fix test. --- .../TestContainerCommandReconciliation.java | 88 +++++++++++++++---- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 168156964bf3..439d22895120 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -77,7 +77,9 @@ import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.symmetric.SecretKeyClient; import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient; import org.apache.hadoop.hdds.utils.db.BatchOperation; @@ -85,6 +87,7 @@ import org.apache.hadoop.ozone.ClientVersion; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; import org.apache.hadoop.ozone.client.ObjectStore; import org.apache.hadoop.ozone.client.OzoneBucket; import org.apache.hadoop.ozone.client.OzoneClient; @@ -121,7 +124,7 @@ */ public class TestContainerCommandReconciliation { - private static MiniOzoneCluster cluster; + private static MiniOzoneHAClusterImpl cluster; private static OzoneClient rpcClient; private static ObjectStore store; private static OzoneConfiguration conf; @@ -506,38 +509,92 @@ public void testContainerChecksumChunkCorruption() throws Exception { @Test public void testDataChecksumReportedAtSCM() throws Exception { + // 1. Write data to a container. + // Read the key back and check its hash. String volume = UUID.randomUUID().toString(); String bucket = UUID.randomUUID().toString(); - long containerID = writeDataAndGetContainer(true, volume, bucket); + Pair containerAndData = getDataAndContainer(true, 20 * 1024 * 1024, volume, bucket); + long containerID = containerAndData.getLeft(); + byte[] data = containerAndData.getRight(); + // Get the datanodes where the container replicas are stored. + List dataNodeDetails = cluster.getStorageContainerManager().getContainerManager() + .getContainerReplicas(ContainerID.valueOf(containerID)) + .stream().map(ContainerReplica::getDatanodeDetails) + .collect(Collectors.toList()); + Assertions.assertEquals(3, dataNodeDetails.size()); + HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); + DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); + Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); + KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + ContainerProtos.ContainerChecksumInfo oldContainerChecksumInfo = readChecksumFile(container.getContainerData()); + KeyValueHandler kvHandler = (KeyValueHandler) datanodeStateMachine.getContainer().getDispatcher() + .getHandler(ContainerProtos.ContainerType.KeyValueContainer); + + BlockManager blockManager = kvHandler.getBlockManager(); + List blockDataList = blockManager.listBlock(container, -1, 100); + String chunksPath = container.getContainerData().getChunksPath(); + long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); // Check non-zero checksum after container close - // TODO: Introduce corruption in the container and after reconciliation all checksums should match (HDDS-11763) - Set containerReplicas = cluster.getStorageContainerManager().getContainerManager() - .getContainerReplicas(ContainerID.valueOf(containerID)); + StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); + List containerReplicas = scmClient.getContainerReplicas(containerID, + ClientVersion.CURRENT_VERSION); assertEquals(3, containerReplicas.size()); - for (ContainerReplica containerReplica: containerReplicas) { + for (HddsProtos.SCMContainerReplicaProto containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } - cluster.getStorageContainerLocationClient().reconcileContainer(containerID); - Thread.sleep(10000); + // 2. Delete some blocks to simulate missing blocks. + try (DBHandle db = BlockUtils.getDB(containerData, conf); + BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { + for (int i = 0; i < blockDataList.size(); i += 2) { + BlockData blockData = blockDataList.get(i); + // Delete the block metadata from the container db + db.getStore().getBlockDataTable().deleteWithBatch(op, containerData.getBlockKey(blockData.getLocalID())); + // Delete the block file. + Files.deleteIfExists(Paths.get(chunksPath + "/" + blockData.getBlockID().getLocalID() + ".block")); + } + db.getStore().getBatchHandler().commitBatchOperation(op); + db.getStore().flushDB(); + } + + // TODO: Use On-demand container scanner to build the new container merkle tree. (HDDS-10374) + Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); + kvHandler.createContainerMerkleTree(container); + ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = + readChecksumFile(container.getContainerData()); + long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); + // Checksum should have changed after block delete. + Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); + + // Since the container is already closed, we have manually updated the container checksum file. + // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. + // Marking a container unhealthy will send an ICR. + kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); + waitForDataChecksumsAtSCM(containerID, 2); + scmClient.reconcileContainer(containerID); + + waitForDataChecksumsAtSCM(containerID, 1); // Check non-zero checksum after container reconciliation - containerReplicas = cluster.getStorageContainerManager().getContainerManager() - .getContainerReplicas(ContainerID.valueOf(containerID)); + containerReplicas = scmClient.getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); assertEquals(3, containerReplicas.size()); - for (ContainerReplica containerReplica: containerReplicas) { + for (HddsProtos.SCMContainerReplicaProto containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } // Check non-zero checksum after datanode restart // Restarting all the nodes take more time in mini ozone cluster, so restarting only one node cluster.restartHddsDatanode(0, true); - cluster.restartStorageContainerManager(true); - containerReplicas = cluster.getStorageContainerManager().getContainerManager() - .getContainerReplicas(ContainerID.valueOf(containerID)); + for (StorageContainerManager scm : cluster.getStorageContainerManagers()) { + cluster.restartStorageContainerManager(scm, false); + } + cluster.waitForClusterToBeReady(); + waitForDataChecksumsAtSCM(containerID, 1); + containerReplicas = scmClient.getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); assertEquals(3, containerReplicas.size()); - for (ContainerReplica containerReplica: containerReplicas) { + for (HddsProtos.SCMContainerReplicaProto containerReplica: containerReplicas) { assertNotEquals(0, containerReplica.getDataChecksum()); } + TestHelper.validateData(KEY_NAME, data, store, volume, bucket); } private void waitForDataChecksumsAtSCM(long containerID, int expectedSize) throws Exception { @@ -660,7 +717,6 @@ private static void startCluster() throws Exception { .setSCMServiceId("SecureSCM") .setNumOfStorageContainerManagers(3) .setNumOfOzoneManagers(1) - .setNumDatanodes(3) .build(); cluster.waitForClusterToBeReady(); rpcClient = OzoneClientFactory.getRpcClient(conf); From 9e1db898f1c6dbf23fbfd79731e973781d213b97 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Fri, 11 Apr 2025 23:24:40 +0530 Subject: [PATCH 7/9] Address review comments. --- .../ContainerChecksumTreeManager.java | 37 ++++++++----------- .../helpers/KeyValueContainerUtil.java | 14 ++++--- .../TestContainerCommandReconciliation.java | 18 ++++----- 3 files changed, 33 insertions(+), 36 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 01fd9c61ce10..636c2267f0e7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -318,21 +318,11 @@ private Lock getLock(long containerID) { * swapped into place. */ public Optional read(ContainerData data) throws IOException { - long containerID = data.getContainerID(); - File checksumFile = getContainerChecksumFile(data); try { - if (!checksumFile.exists()) { - LOG.debug("No checksum file currently exists for container {} at the path {}", containerID, checksumFile); - return Optional.empty(); - } - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), - () -> Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream))); - } + return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(), () -> readChecksumInfo(data)); } catch (IOException ex) { metrics.incrementMerkleTreeReadFailures(); - throw new IOException("Error occurred when reading container merkle tree for containerID " - + data.getContainerID() + " at path " + checksumFile, ex); + throw new IOException(ex); } } @@ -383,18 +373,21 @@ public ByteString getContainerChecksumInfo(KeyValueContainerData data) throws IO } } - public static Optional readChecksumInfo(KeyValueContainerData data) { + public static Optional readChecksumInfo(ContainerData data) + throws IOException { + long containerID = data.getContainerID(); File checksumFile = getContainerChecksumFile(data); - if (!checksumFile.exists()) { - LOG.error("Checksum file not found for container {}", data.getContainerID()); - return Optional.empty(); - } - - try (FileInputStream inStream = new FileInputStream(checksumFile)) { - return Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)); + try { + if (!checksumFile.exists()) { + LOG.debug("No checksum file currently exists for container {} at the path {}", containerID, checksumFile); + return Optional.empty(); + } + try (FileInputStream inStream = new FileInputStream(checksumFile)) { + return Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream)); + } } catch (IOException ex) { - LOG.error("Error while reading the checksum file for container {}", data.getContainerID(), ex); - return Optional.empty(); + throw new IOException("Error occurred when reading container merkle tree for containerID " + + data.getContainerID() + " at path " + checksumFile, ex); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java index c92eb13519d4..dbf5cfaa8e99 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/helpers/KeyValueContainerUtil.java @@ -285,11 +285,15 @@ private static void populateContainerDataChecksum(KeyValueContainerData kvContai return; } - Optional optionalContainerChecksumInfo = ContainerChecksumTreeManager - .readChecksumInfo(kvContainerData); - if (optionalContainerChecksumInfo.isPresent()) { - ContainerChecksumInfo containerChecksumInfo = optionalContainerChecksumInfo.get(); - kvContainerData.setDataChecksum(containerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + try { + Optional optionalContainerChecksumInfo = ContainerChecksumTreeManager + .readChecksumInfo(kvContainerData); + if (optionalContainerChecksumInfo.isPresent()) { + ContainerChecksumInfo containerChecksumInfo = optionalContainerChecksumInfo.get(); + kvContainerData.setDataChecksum(containerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + } + } catch (IOException ex) { + LOG.warn("Failed to read checksum info for container {}", kvContainerData.getContainerID(), ex); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 439d22895120..6d748023cfbb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -114,7 +114,6 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -150,6 +149,7 @@ public static void init() throws Exception { conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 128 * 1024, StorageUnit.BYTES); conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 512 * 1024, StorageUnit.BYTES); // Disable the container scanner so it does not create merkle tree files that interfere with this test. + // TODO: Currently container scrub sets the checksum to 0, Revert this after HDDS-10374 is merged. conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); conf.setBoolean("hdds.container.scrub.enabled", false); @@ -348,7 +348,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { .getContainerReplicas(ContainerID.valueOf(containerID)) .stream().map(ContainerReplica::getDatanodeDetails) .collect(Collectors.toList()); - Assertions.assertEquals(3, dataNodeDetails.size()); + assertEquals(3, dataNodeDetails.size()); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); @@ -383,7 +383,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { readChecksumFile(container.getContainerData()); long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); // Checksum should have changed after block delete. - Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); + assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); // Since the container is already closed, we have manually updated the container checksum file. // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. @@ -414,7 +414,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { .getContainerReplicas(ContainerID.valueOf(containerID)) .stream().map(ContainerReplica::getDatanodeDetails) .collect(Collectors.toList()); - Assertions.assertEquals(3, dataNodeDetails.size()); + assertEquals(3, dataNodeDetails.size()); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); @@ -468,11 +468,11 @@ public void testContainerChecksumChunkCorruption() throws Exception { long dataChecksumAfterAfterChunkCorruption = containerChecksumAfterChunkCorruption .getContainerMerkleTree().getDataChecksum(); // Checksum should have changed after chunk corruption. - Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterAfterChunkCorruption); + assertNotEquals(oldDataChecksum, dataChecksumAfterAfterChunkCorruption); // 3. Set Unhealthy for first chunk of all blocks. This should be done by the scanner, Until then this is a // manual step. - // // TODO: Use On-demand container scanner to build the new container merkle tree (HDDS-10374) + // TODO: Use On-demand container scanner to build the new container merkle tree (HDDS-10374) Random random = new Random(); ContainerProtos.ContainerChecksumInfo.Builder builder = containerChecksumAfterChunkCorruption.toBuilder(); List blockMerkleTreeList = builder.getContainerMerkleTree() @@ -503,7 +503,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { ContainerProtos.ContainerChecksumInfo newContainerChecksumInfo = readChecksumFile(container.getContainerData()); assertTreesSortedAndMatch(oldContainerChecksumInfo.getContainerMerkleTree(), newContainerChecksumInfo.getContainerMerkleTree()); - Assertions.assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); + assertEquals(oldDataChecksum, newContainerChecksumInfo.getContainerMerkleTree().getDataChecksum()); TestHelper.validateData(KEY_NAME, data, store, volume, bucket); } @@ -521,7 +521,7 @@ public void testDataChecksumReportedAtSCM() throws Exception { .getContainerReplicas(ContainerID.valueOf(containerID)) .stream().map(ContainerReplica::getDatanodeDetails) .collect(Collectors.toList()); - Assertions.assertEquals(3, dataNodeDetails.size()); + assertEquals(3, dataNodeDetails.size()); HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); @@ -564,7 +564,7 @@ public void testDataChecksumReportedAtSCM() throws Exception { readChecksumFile(container.getContainerData()); long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); // Checksum should have changed after block delete. - Assertions.assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); + assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); // Since the container is already closed, we have manually updated the container checksum file. // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. From d188a7915188899bc5d52f749562fbdeafef6367 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Sat, 12 Apr 2025 00:36:22 +0530 Subject: [PATCH 8/9] Fix test. --- .../container/checksum/TestContainerChecksumTreeManager.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java index 987ff7cf81f2..538fd9c15c6f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java @@ -201,7 +201,6 @@ public void testDeletedBlocksPreservedOnTreeWrite() throws Exception { assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTreeWriter tree = buildTestTree(config); checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); @@ -222,7 +221,6 @@ public void testTreePreservedOnDeletedBlocksWrite() throws Exception { assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTreeWriter tree = buildTestTree(config); checksumManager.writeContainerDataTree(container, tree); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); List expectedBlocksToDelete = Arrays.asList(1L, 2L, 3L); checksumManager.markBlocksAsDeleted(container, new ArrayList<>(expectedBlocksToDelete)); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); @@ -242,8 +240,6 @@ public void testReadContainerMerkleTreeMetric() throws Exception { assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); ContainerMerkleTreeWriter tree = buildTestTree(config); checksumManager.writeContainerDataTree(container, tree); - assertEquals(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total(), 0); - checksumManager.writeContainerDataTree(container, tree); assertTrue(metrics.getWriteContainerMerkleTreeLatencyNS().lastStat().total() > 0); assertTrue(metrics.getReadContainerMerkleTreeLatencyNS().lastStat().total() > 0); } From 1813fe116d55e70819bb225e5f9b0b038c192a94 Mon Sep 17 00:00:00 2001 From: Aswin Shakil Balasubramanian Date: Mon, 14 Apr 2025 23:29:51 +0530 Subject: [PATCH 9/9] Address review. --- .../container/checksum/ContainerChecksumTreeManager.java | 5 +++++ .../dn/checksum/TestContainerCommandReconciliation.java | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 636c2267f0e7..261073123b7c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -373,6 +373,11 @@ public ByteString getContainerChecksumInfo(KeyValueContainerData data) throws IO } } + /** + * Reads the container checksum info file (containerID.tree) from the disk. + * Callers are not required to hold a lock while calling this since writes are done to a tmp file and atomically + * swapped into place. + */ public static Optional readChecksumInfo(ContainerData data) throws IOException { long containerID = data.getContainerID(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 6d748023cfbb..4624cc562ff5 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -530,9 +530,6 @@ public void testDataChecksumReportedAtSCM() throws Exception { KeyValueHandler kvHandler = (KeyValueHandler) datanodeStateMachine.getContainer().getDispatcher() .getHandler(ContainerProtos.ContainerType.KeyValueContainer); - BlockManager blockManager = kvHandler.getBlockManager(); - List blockDataList = blockManager.listBlock(container, -1, 100); - String chunksPath = container.getContainerData().getChunksPath(); long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); // Check non-zero checksum after container close StorageContainerLocationProtocolClientSideTranslatorPB scmClient = cluster.getStorageContainerLocationClient(); @@ -544,6 +541,9 @@ public void testDataChecksumReportedAtSCM() throws Exception { } // 2. Delete some blocks to simulate missing blocks. + BlockManager blockManager = kvHandler.getBlockManager(); + List blockDataList = blockManager.listBlock(container, -1, 100); + String chunksPath = container.getContainerData().getChunksPath(); try (DBHandle db = BlockUtils.getDB(containerData, conf); BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { for (int i = 0; i < blockDataList.size(); i += 2) {